Python実戦-第5節:MongoDBの使用開始

3165 ワード

メモ

  • 接続データベースサービス:
    client = pymongo.MongoClient('localhost', 27017)
    
  • データベースの作成/アクセス:
    $dbName = client['$dbName']
    
  • データテーブルの作成/アクセス:
    $tableName = $dbName['$tableName']
    
  • 挿入データ:
    $tableName.insert_one(data)
    
  • クエリーデータ:
    $tableName.find()
    $tableName.find({'$columnName':$columnValue})
    # $lt/$lte/$gt/$gte/$ne   <=/>/>=/!=
    $tableName.find({'$columnName':{'$lte':$value}})
    
  • さぎょう

  • 借家情報入庫
  • import pymongo
    from bs4 import BeautifulSoup
    import requests
    import time
    
    \# MongoDB connect
    client = pymongo.MongoClient('localhost', 27017)
    xiaozhu = client['xiaozhu']
    duanzufang = xiaozhu['duanzufang']
    
    \# URL Parse
    urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 4)]
    
    
    def parse_gender(gender_class):
    
        if gender_class == 'member_ico1':
            return ' '
        elif gender_class == 'member_ico':
            return ' '
        else:
            return ' '
    
    
    def parse_datail_page(url):
    
        web_data = requests.get(url)
        soap = BeautifulSoup(web_data.text, 'lxml')
    
        titles = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
        addrs = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
        rants = soap.select('#pricePart > div.day_l > span')
        pics = soap.select('#curBigImage')
        owner_pics = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
        gender_class = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')[0]['class'][0]
        owner_names = soap.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
    
        data = {
            'title': titles[0].get_text(),
            'addr': addrs[0].get_text().strip(),
            'rant': int(rants[0].get_text()),
            'pic': pics[0].get('src'),
            'owner_pic': owner_pics[0].get('src'),
            'owner_name': owner_names[0].get_text(),
            'gender': parse_gender(gender_class),
        }
    
        print(data)
        \# insert to MongoDB
        duanzufang.insert_one(data)
        time.sleep(1)
    
    
    def parse_list_page(url):
    
        web_data = requests.get(url)
        soap = BeautifulSoup(web_data.text, 'lxml')
    
        detail_urls = soap.select('#page_list > ul > li > a')
        for detail_url in detail_urls:
            parse_datail_page(detail_url.get('href'))
    
    
    for url in urls:
        parse_list_page(url)
    
    
  • 検索賃貸料が500以上の住宅源情報
  • をフィルタリングする
    import pymongo
    
    client = pymongo.MongoClient('localhost', 27017)
    xiaozhu = client['xiaozhu']
    duanzufang = xiaozhu['duanzufang']
    
    for item in duanzufang.find({'rant':{'$gte':500}}):
        print(item)