requests爬取画像保存--記録

3724 ワード

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import requests,time,os
from bs4 import BeautifulSoup

headers = {
    "User-Agent":'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10'
}

class imgClass(object):
    '''    '''
    def __init__(self,url):
        self.url = url

    def soupImg(self):
        global headers
        wb_data = requests.get(self.url,headers=headers)
        soup = BeautifulSoup(wb_data.text,'lxml')
        return soup

    def imgSpider(self):
        '''    URL'''
        soup = self.soupImg()
        imgs = soup.select('img.entry-thumbnail')
        for img in imgs:
            self.__saveImg(img['src'])

    def __saveImg(self,imgUrl):
        '''    '''
        from re import split
        fileNum = split('images/(\d+)/',imgUrl)[1]
        fileFix = split('superthumb(.\w+)',imgUrl)[1]
        if not os.path.isdir('images'):
            os.makedirs('images')
        fileName = 'images\\' + fileNum + fileFix
        images = requests.get(imgUrl)
        img = images.content
        if images.status_code == 200:
            print('  : %s%s     ..' % (fileNum,fileFix))
            with open(fileName,'wb') as fp:
                fp.write(img)

if __name__ == '__main__':
    for x in range(1,21):
        i = imgClass('https://weheartit.com/inspirations/taylorswift?scrolling=true&page=%s' % x)
        i.imgSpider()
        time.sleep(2)