爬虫類のダウンロード画像
7117 ワード
爬虫類を使って画像をダウンロードする
爬虫類を習ったばかりで何日も経っていないので、みんなにもっと意見を言ってほしいです.私が登ったのはある掲示板です.urlは自分で設定しましょう.
import requests
from lxml import etree
"""
url
url,
url
"""
def url_names(n):
# url
url_name = "https://tieba.baidu.com"+str(n)
return url_name
def next_url(next_url_if,n):
# url
if " " in next_url_if:
n += 1
else:
n = "end"
return n
def img_url_name(url_name,list):
htmls = requests.get(url_name,)
url_htmls = htmls.text
html = etree.HTML(url_htmls)
img_path = html.xpath("//div//img[@class='BDE_Image']/@src")
for i in img_path:
list.append(i)
next_if_else = html.xpath("//li[@class='l_pager pager_theme_4 pb_list_pager']/a[@href]/text()")
return next_if_else,list
def downloader(img_path):
for name,img_data_url in enumerate(img_path):
name = str(name)
req = requests.get(img_data_url)
with open(name+".jpg","wb") as f:
f.write(req.content)
print(" ")
def main():
n = 1
list = []
while n != "end":
url_name = url_names(n)
next_url_if,list = img_url_name(url_name,list)
n = next_url(next_url_if,n)
downloader(list)
print(" !")
if __name__ == "__main__":
main()