爬虫類の勉強は、私の嫁よりも醜いです。

4030 ワード

爬虫学习,比我媳妇还丑的校花下载_第1张图片
import requests
import re
import urllib
import os

dirName = 'Libs'

if not os.path.exists(dirName):
    os.mkdir(dirName)

#http://www.521609.com/uploads/allimg/111019/11046303404-1-lp.jpg

url = 'http://www.521609.com/qingchunmeinv/'
headers ={
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'
}

#1           
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
page_text = response.text


#2                   

ex = '
  • .*? '
  • img_src_list = re.findall(ex, page_text, re.S) for src in img_src_list: src = 'http://www.521609.com' + src imgPath = dirName + '/' + src.split('/')[-1] urllib.request.urlretrieve(src, imgPath) print(imgPath, " ")