05-サイトに登る練習をする

1907 ワード

5日目


学習遅延機能
安住客賃貸情報の取得を練習する

from bs4 import BeautifulSoup
import requests


def sexual(n):
    if n.get('class') == ['member_girl_ico']:
        return ' '
    else:
        return ' '


def get_rentinfo(url):
    web_content = requests.get(url)
    soup = BeautifulSoup(web_content.text, 'lxml')

    titles = soup.select('div.pho_info > h4 > em')
    addresses = soup.select('div.pho_info > p')
    prices = soup.select('div.day_l > span')
    images = soup.select('#curBigImage')
    landlord_pics = soup.select('div.member_pic > a > img')
    landlord_sexuals = soup.select('div.w_240 > h6 > span')
    landlord_names = soup.select('div.w_240 > h6 > a')

    for title, address, price, image, landlord_pic, landlord_sexual, landlord_name in zip(titles, addresses, prices, images,
                                                                                          landlord_pics, landlord_sexuals,
                                                                                          landlord_names):
        data = {
            ' :': title.get_text(),
            ' :': address.get_text(),
            ' :': price.get_text(),
            ' :': image.get('src'),
            ' :': landlord_pic.get('src'),
            ' :': sexual(landlord_sexual),
            ' :': landlord_name.get_text(),
        }
        print(data)


search_page = requests.get('http://hz.xiaozhu.com/?startDate=2016-06-27&endDate=2016-07-31')
list_soup = BeautifulSoup(search_page.text, 'lxml')
house_list = list_soup.select('#page_list > ul > li > a')

for i in house_list:
    rent_url = i.get('href')
    get_rentinfo(rent_url)