requests BeautifulSoupキャッツアイ映画情報の這い出し
**
ユーザーが必要とする数に応じて、猫眼映画網の映画情報を取得します.
**
ユーザーが必要とする数に応じて、猫眼映画網の映画情報を取得します.
**
import requests
from requests.exceptions import RequestException
from bs4 import BeautifulSoup
import json,os,sys
import lxml
from multiprocessing import Pool
filename = os.path.abspath(__file__)#D:\tools\JetBrains\wk\PC\pcfile\PcForMaoYanTop.py
result_path = os.path.dirname(os.path.dirname(filename))#D:\tools\JetBrains\wk\PC
def get_one_page(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
response = requests.get(url,headers=headers)
if response.status_code == 200:
return response.text
else:
return None
except RequestException:
print(' ')
return None
def parse_html(html):
soup = BeautifulSoup(html,'lxml')
print(soup.prettify())
items = soup.find_all('dd')
for item in items:
yield {
'index' : item.i.string,
'title' : item.find(class_='name').string,
'img' : item.find(class_='board-img')['data-src'],
'star' : item.find(class_='star').string.strip()[3:],
'time' : item.find(class_='releasetime').string.strip()[5:],
'score' : item.find(class_='integer').string.strip()+item.find(class_='fraction').string.strip()
}
def write_to_file(content):
url = result_path+'\pcresult\maoYan.txt'
with open(url,'a',encoding='utf-8') as f:
f.write(json.dumps(content,ensure_ascii=False)+'
')
f.close()
def write_img(content):
for i in range(len(content)):
url = result_path+'\pcresult\\'+str(content['title'])+'.jpg'
print(content['img'])
with open(url,'ab') as f:
resp = requests.get(content['img']).content
f.write(resp)
f.close()
def main(offset):
url = 'https://maoyan.com/board/4?offset='+str(offset)
html = get_one_page(url)
text = parse_html(html)
for i in text:
write_to_file(i)
write_img(i)
if __name__ == '__main__':
a = input(" ")
a = int(a)
pool = Pool()
pool.map(main,[i*10 for i in range(a)])