requestsモジュールは微博熱検索ランキングに登ります
7607 ワード
微博熱検索ランキングに登る
, , , ,
requests lxml , 。
, txt 。
import requests
from lxml import etree
import datetime
if __name__=='__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/84.0.4147.125 Safari/537.36 Edg/84.0.522.59'
}
url='https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6'
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
list=tree.xpath('//div[@class="data"]//tbody/tr')
fp = open(' .txt', 'w')
now_time = datetime.datetime.now().strftime('%F %A %H:%M:%S') + '
' #
fp.write(now_time) #
# , 50 , ( )
'''
,
list len(list), -1 range(len(list)-1),
list[i] xpath ,list 0 ,list[i+1] list[1] ,
for i in range(len(list)-1):
rank=list[i+1].xpath('./td[@class="td-01 ranktop"]/text()')[0]
data=list[i+1].xpath('./td[@class="td-02"]/a/text()')[0]
hot=list[i+1].xpath('./td[@class="td-02"]/span/text()')[0]
for li in list:
rank=list.xpath('./td[@class="td-01 ranktop"]/text()')[0]
data=list.xpath('./td[@class="td-02"]/a/text()')[0]
hot=list.xpath('./td[@class="td-02"]/span/text()')[0]
,
'''
for i in range(len(list)-1):
rank=list[i+1].xpath('./td[@class="td-01 ranktop"]/text()')[0]
data=list[i+1].xpath('./td[@class="td-02"]/a/text()')[0]
hot=list[i+1].xpath('./td[@class="td-02"]/span/text()')[0]
hot_search=rank + '.' + data + '\t'+ hot + '
'
print(hot_search)
fp.write(hot_search)