部屋の天下の中古の部屋のマネージャーの電話の情報
&&&&&&
&&&&&&
# -*- coding: utf-8 -*
from bs4 import BeautifulSoup
import requests
from xlwt import Workbook
import time
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
def fangTianXia(url):
response = requests.get(url)
response = response.text
soup = BeautifulSoup(response, 'lxml')
houseAgentPageUrlList = []
for house in soup.find_all('dd', class_="info rel floatr"):
try:
houseAgentPage = 'http://esf.zz.fang.com'+house.find('p',class_='gray6 mt10').a['href']
except:
pass
houseAgentPageUrlList.append(houseAgentPage)
return houseAgentPageUrlList
def houseAgent(url):
response = requests.get(url)
response = response.text
soup = BeautifulSoup(response, 'lxml')
personInfo = soup.find('ul',class_='person')
try:
try:
img = soup.find('div', class_='conltop clearfix').img['src'] #
except:
img = ' '
name = personInfo.find('li',class_='name').b.text #
phone = personInfo.find('b',class_='redword').string #
businessCircle = personInfo.find('span',class_='grayword').text #
try:
key_area = ''.join(re.findall(r">(.+?)", soup.find('input',id='importantesfprojname')['value'])) #
except:
key_area =' '
try:
numberOfHouses = soup.find('div',class_='sortLeft').span.string #
except:
numberOfHouses ='0'
except:
pass
return [name,img,phone,businessCircle,key_area,numberOfHouses]
if __name__ == '__main__':
agentUrlList =[]
for page in range(1,100):
url = 'http://esf.zz.fang.com/house/i3%s/' % page
urlList = fangTianXia(url)
urlList = list(set(urlList))
agentUrlList += urlList
agentUrlList =list(set(agentUrlList))
datalistnew = []
book = Workbook(encoding='utf-8') # execl
sheet1 = book.add_sheet('Sheet 1') # execl
sheet1.write(0, 0, u' ')
sheet1.write(0, 1, u' ')
sheet1.write(0, 2, u' ')
sheet1.write(0, 3, u' ')
sheet1.write(0, 4, u' ')
sheet1.write(0, 5, u' ')
for agentUrl in agentUrlList:
try:
datalist = houseAgent(agentUrl)
except:
time.sleep(3)
datalist =houseAgent(agentUrl)
datalistnew.append(datalist)
datalist = datalistnew
for data in range(0, len(datalist)): # ,
name = datalist[data][0]
img = datalist[data][1]
phone = datalist[data][2]
businessCircle = datalist[data][3]
key_area = datalist[data][4]
numberOfHouses = datalist[data][5]
sheet1.write(data + 1, 0, name)
sheet1.write(data + 1, 1, img)
sheet1.write(data + 1, 2, phone)
sheet1.write(data + 1, 3, businessCircle)
sheet1.write(data + 1, 4, key_area)
sheet1.write(data + 1, 5, numberOfHouses)
book.save(u" .xls") # ,
&&&&&&