05-サイトに登る練習をする
1907 ワード
5日目
学習遅延機能
安住客賃貸情報の取得を練習する
from bs4 import BeautifulSoup
import requests
def sexual(n):
if n.get('class') == ['member_girl_ico']:
return ' '
else:
return ' '
def get_rentinfo(url):
web_content = requests.get(url)
soup = BeautifulSoup(web_content.text, 'lxml')
titles = soup.select('div.pho_info > h4 > em')
addresses = soup.select('div.pho_info > p')
prices = soup.select('div.day_l > span')
images = soup.select('#curBigImage')
landlord_pics = soup.select('div.member_pic > a > img')
landlord_sexuals = soup.select('div.w_240 > h6 > span')
landlord_names = soup.select('div.w_240 > h6 > a')
for title, address, price, image, landlord_pic, landlord_sexual, landlord_name in zip(titles, addresses, prices, images,
landlord_pics, landlord_sexuals,
landlord_names):
data = {
' :': title.get_text(),
' :': address.get_text(),
' :': price.get_text(),
' :': image.get('src'),
' :': landlord_pic.get('src'),
' :': sexual(landlord_sexual),
' :': landlord_name.get_text(),
}
print(data)
search_page = requests.get('http://hz.xiaozhu.com/?startDate=2016-06-27&endDate=2016-07-31')
list_soup = BeautifulSoup(search_page.text, 'lxml')
house_list = list_soup.select('#page_list > ul > li > a')
for i in house_list:
rent_url = i.get('href')
get_rentinfo(rent_url)