部屋の天下の中古の部屋のマネージャーの電話の情報


&&&&&&
# -*- coding: utf-8 -*
from bs4 import BeautifulSoup
import requests
from xlwt import Workbook
import time
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')


def fangTianXia(url):
    response = requests.get(url)
    response = response.text
    soup = BeautifulSoup(response, 'lxml')
    houseAgentPageUrlList = []
    for house in soup.find_all('dd', class_="info rel floatr"):
        try:
            houseAgentPage =  'http://esf.zz.fang.com'+house.find('p',class_='gray6 mt10').a['href']
        except:
            pass
        houseAgentPageUrlList.append(houseAgentPage)
    return houseAgentPageUrlList

def  houseAgent(url):
    response = requests.get(url)
    response = response.text
    soup = BeautifulSoup(response, 'lxml')
    personInfo =  soup.find('ul',class_='person')
    try:
        try:
            img = soup.find('div', class_='conltop clearfix').img['src']   #  
        except:
            img = ' '
        name = personInfo.find('li',class_='name').b.text   #   
        phone = personInfo.find('b',class_='redword').string  #     
        businessCircle = personInfo.find('span',class_='grayword').text  #    
        try:
            key_area = ''.join(re.findall(r">(.+?)", soup.find('input',id='importantesfprojname')['value'])) #    
        except:
            key_area ='    '
        try:
            numberOfHouses = soup.find('div',class_='sortLeft').span.string   #        
        except:
            numberOfHouses ='0'
    except:
        pass
    return [name,img,phone,businessCircle,key_area,numberOfHouses]

if __name__ == '__main__':
    agentUrlList =[]
    for page in range(1,100):
        url = 'http://esf.zz.fang.com/house/i3%s/' % page
        urlList = fangTianXia(url)
        urlList = list(set(urlList))
        agentUrlList += urlList
    agentUrlList =list(set(agentUrlList))
    datalistnew = []
    book = Workbook(encoding='utf-8')  #   execl    
    sheet1 = book.add_sheet('Sheet 1')  #   execl  
    sheet1.write(0, 0, u'   ')
    sheet1.write(0, 1, u'  ')
    sheet1.write(0, 2, u'  ')
    sheet1.write(0, 3, u'    ')
    sheet1.write(0, 4, u'    ')
    sheet1.write(0, 5, u'        ')
    for  agentUrl in agentUrlList:
        try:
            datalist = houseAgent(agentUrl)
        except:
            time.sleep(3)
            datalist =houseAgent(agentUrl)
        datalistnew.append(datalist)
    datalist = datalistnew
    for data in range(0, len(datalist)):  #       ,          
        name = datalist[data][0]
        img = datalist[data][1]
        phone = datalist[data][2]
        businessCircle = datalist[data][3]
        key_area = datalist[data][4]
        numberOfHouses = datalist[data][5]
        sheet1.write(data + 1, 0, name)
        sheet1.write(data + 1, 1, img)
        sheet1.write(data + 1, 2, phone)
        sheet1.write(data + 1, 3, businessCircle)
        sheet1.write(data + 1, 4, key_area)
        sheet1.write(data + 1, 5, numberOfHouses)

    book.save(u"      .xls")  #     ,

&&&&&&