妹の絵
4033 ワード
import requests
from lxml import etree
import os
#
def geturl(page):
url = 'http://www.mzitu.com/page/%d'
for i in range(1,page+1):
full_url = url % i
response = requests.get(full_url)
html_ele = etree.HTML(response.text)
li_list = html_ele.xpath('//ul[@id="pins"]/li')
for href_list in li_list:
a_url = href_list.xpath('./a/@href')[0]
# print(a)
title = href_list.xpath('./span/a/text()')[0]
# print(title)
get_img(a_url,title)
print(' %d ' % i)
#
def get_img(a_url,title):
path = 'mzitu/' + title
#
if not os.path.exists(path):
os.makedirs(path)
response = requests.get(a_url)
html_ele = etree.HTML(response.text)
#
max_page = html_ele.xpath('//div[@class="pagenavi"]/a/span')[-2].text
max_page = int(max_page)
print('%s ' % title)
for i in range(1,max_page+1):
full_url = a_url + '/' +str(i)
# print(full_url)
response = requests.get(full_url)
img_ele = etree.HTML(response.text)
img_src = img_ele.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
# print(img_src)
img_name = img_src.split('/')[-1].split('.')[0]
filename = path + '/' + img_name + '.jpg'
print(filename)
headers = {
'Referer': full_url,
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
response = requests.get(img_src,headers=headers)
with open(filename, 'wb') as f:
f.write(response.content)
print('%s ' % title)
if __name__ == "__main__":
geturl(2)