爬虫類の勉強は、私の嫁よりも醜いです。
4030 ワード
import requests
import re
import urllib
import os
dirName = 'Libs'
if not os.path.exists(dirName):
os.mkdir(dirName)
#http://www.521609.com/uploads/allimg/111019/11046303404-1-lp.jpg
url = 'http://www.521609.com/qingchunmeinv/'
headers ={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}
#1
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
page_text = response.text
#2
ex = '.*? '
img_src_list = re.findall(ex, page_text, re.S)
for src in img_src_list:
src = 'http://www.521609.com' + src
imgPath = dirName + '/' + src.split('/')[-1]
urllib.request.urlretrieve(src, imgPath)
print(imgPath, " ")