requests爬取画像保存--記録
3724 ワード
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import requests,time,os
from bs4 import BeautifulSoup
headers = {
"User-Agent":'Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10'
}
class imgClass(object):
''' '''
def __init__(self,url):
self.url = url
def soupImg(self):
global headers
wb_data = requests.get(self.url,headers=headers)
soup = BeautifulSoup(wb_data.text,'lxml')
return soup
def imgSpider(self):
''' URL'''
soup = self.soupImg()
imgs = soup.select('img.entry-thumbnail')
for img in imgs:
self.__saveImg(img['src'])
def __saveImg(self,imgUrl):
''' '''
from re import split
fileNum = split('images/(\d+)/',imgUrl)[1]
fileFix = split('superthumb(.\w+)',imgUrl)[1]
if not os.path.isdir('images'):
os.makedirs('images')
fileName = 'images\\' + fileNum + fileFix
images = requests.get(imgUrl)
img = images.content
if images.status_code == 200:
print(' : %s%s ..' % (fileNum,fileFix))
with open(fileName,'wb') as fp:
fp.write(img)
if __name__ == '__main__':
for x in range(1,21):
i = imgClass('https://weheartit.com/inspirations/taylorswift?scrolling=true&page=%s' % x)
i.imgSpider()
time.sleep(2)