妹の図の爬虫類の小さいプログラムに登ります
3889 ワード
import requests
import os
from lxml import etree
from urllib import request
import random
#
def meizitu(url):
headers = {
'Cookie' : 'UM_distinctid=1654601b2fc0-05766907b723fb-37664109-144000-1654601b2ff9a9; bdshare_firstime=1534477856447; safedog-flow-item=; CNZZDATA30056528=cnzz_eid%3D1545626971-1534477372-http%253A%252F%252Fwww.meizitu.com%252F%26ntime%3D1534593928',
'Referer' : 'http://www.meizitu.com/',
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
}
#
if not os.path.exists('downloads'):
os.mkdir('downloads')
response = requests.get(url,headers=headers)
mzt_ele = etree.HTML(response.text)
# li
ul_ele = mzt_ele.xpath('//ul[@class="wp-list clearfix"]/li')
for li_ele in ul_ele:
# href
a_href = li_ele.xpath('./div/div/a/@href')[0]
# print(a_href)
response =requests.get(a_href,headers=headers)
#
response.encoding = 'gb2312'
info_ele = etree.HTML(response.text)
#
try:
img_info = info_ele.xpath('//div[@id="picture"]/p/img/@src')
img_name = info_ele.xpath('//div[@id="picture"]/p/img/@alt')[0]
except:
img_info = info_ele.xpath('//div[@class="postContent"]/p/img/@src')
img_name = info_ele.xpath('//div[@class="postContent"]/p/img/@alt')[0]
try:
for i in img_info:
#
img_name = img_name+str(random.random())
print(img_name)
request.urlretrieve(i,'downloads/'+img_name+'.jpg')
except:
pass
if __name__ == '__main__':
# for i in range(1,4):
url = 'http://www.meizitu.com/tag/quanluo_4_2.html'
meizitu(url)