妹の絵

4033 ワード

import requests
from lxml import etree
import os
#     
def geturl(page):
    url = 'http://www.mzitu.com/page/%d'
    for i in range(1,page+1):
        full_url = url % i
        response = requests.get(full_url)
        html_ele = etree.HTML(response.text)

        li_list = html_ele.xpath('//ul[@id="pins"]/li')
        for href_list in li_list:
            a_url = href_list.xpath('./a/@href')[0]
            # print(a)
            title = href_list.xpath('./span/a/text()')[0]
            # print(title)
            get_img(a_url,title)
        print(' %d     ' % i)

#     
def get_img(a_url,title):
    path = 'mzitu/' + title
    #      
    if not os.path.exists(path):
        os.makedirs(path)
    response = requests.get(a_url)
    html_ele = etree.HTML(response.text)
    #      
    max_page = html_ele.xpath('//div[@class="pagenavi"]/a/span')[-2].text
    max_page = int(max_page)
    print('%s     ' % title)
    for i in range(1,max_page+1):
        full_url = a_url + '/' +str(i)
        # print(full_url)
        response = requests.get(full_url)
        img_ele = etree.HTML(response.text)
        img_src = img_ele.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
        # print(img_src)
        img_name = img_src.split('/')[-1].split('.')[0]
        filename = path + '/' + img_name + '.jpg'
        print(filename)
        headers = {
            'Referer': full_url,
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',

        }
        response = requests.get(img_src,headers=headers)
        with open(filename, 'wb') as f:
            f.write(response.content)
    print('%s     ' % title)

if __name__ == "__main__":
    geturl(2)