仕事のノート|爬虫類の小さいケースは映画の名称によって大量にダウンロードのリンクを獲得します
7600 ワード
Data
|-- move_list.txt(gbk符号化)|--download.py
Python Program
import requests
from bs4 import BeautifulSoup
from urllib.request import quote
import json
# quote() , url ,
if __name__ == "__main__":
base_url = 'http://s.ygdy8.com/plus/s0.php?typeid=1&keyword='
output_file=open("output_file.txt","w",encoding="gbk")
with open("movie_list.txt","r",encoding="gbk") as f:
lines=f.readlines()
for line in lines:
movie_name=line.strip()
gbkmovie = movie_name.encode('gbk')
url=base_url+ quote(gbkmovie)
res = requests.get(url)
res.encoding = 'gbk'
soup_movie = BeautifulSoup(res.text, 'html.parser')
try:
#
urlpart = soup_movie.find(class_="co_content8").find_all('table')
if urlpart:
urlpart = urlpart[0].find('a')['href']
urlmovie = 'https://www.ygdy8.com/' + urlpart
res1 = requests.get(urlmovie)
res1.encoding = 'gbk'
soup_movie1 = BeautifulSoup(res1.text, 'html.parser')
urldownload = soup_movie1.find('div', id="Zoom").find('span').find('table').find('a')['href']
print(urldownload)
output_file.write(f"{movie_name},{urldownload}
")
else:
print(' ' + movie_name)
# ,
#
except:
print(' ')