Python実戦-第5節:MongoDBの使用開始
3165 ワード
メモ
client = pymongo.MongoClient('localhost', 27017)
$dbName = client['$dbName']
$tableName = $dbName['$tableName']
$tableName.insert_one(data)
$tableName.find()
$tableName.find({'$columnName':$columnValue})
# $lt/$lte/$gt/$gte/$ne <=/>/>=/!=
$tableName.find({'$columnName':{'$lte':$value}})
さぎょう
import pymongo
from bs4 import BeautifulSoup
import requests
import time
\# MongoDB connect
client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
duanzufang = xiaozhu['duanzufang']
\# URL Parse
urls = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(1, 4)]
def parse_gender(gender_class):
if gender_class == 'member_ico1':
return ' '
elif gender_class == 'member_ico':
return ' '
else:
return ' '
def parse_datail_page(url):
web_data = requests.get(url)
soap = BeautifulSoup(web_data.text, 'lxml')
titles = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addrs = soap.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
rants = soap.select('#pricePart > div.day_l > span')
pics = soap.select('#curBigImage')
owner_pics = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
gender_class = soap.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')[0]['class'][0]
owner_names = soap.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
data = {
'title': titles[0].get_text(),
'addr': addrs[0].get_text().strip(),
'rant': int(rants[0].get_text()),
'pic': pics[0].get('src'),
'owner_pic': owner_pics[0].get('src'),
'owner_name': owner_names[0].get_text(),
'gender': parse_gender(gender_class),
}
print(data)
\# insert to MongoDB
duanzufang.insert_one(data)
time.sleep(1)
def parse_list_page(url):
web_data = requests.get(url)
soap = BeautifulSoup(web_data.text, 'lxml')
detail_urls = soap.select('#page_list > ul > li > a')
for detail_url in detail_urls:
parse_datail_page(detail_url.get('href'))
for url in urls:
parse_list_page(url)
import pymongo
client = pymongo.MongoClient('localhost', 27017)
xiaozhu = client['xiaozhu']
duanzufang = xiaozhu['duanzufang']
for item in duanzufang.find({'rant':{'$gte':500}}):
print(item)