猫の目top 100映画を取り出してローカルcsvファイルに保存
3667 ワード
import requests
from bs4 import BeautifulSoup
import time
import csv
def getcontent(url):
#
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 OPR/49.0.2725.47'}
# get
r = requests.get(url,headers=headers)
#
content = r.text
soup = BeautifulSoup(content, 'lxml')
div_name = soup.find_all(class_='name')#
div_star = soup.find_all(class_='star')#
div_time = soup.find_all(class_='releasetime')#
div_score = soup.find_all(class_='score')#
#
long = len(div_name)
# List, , list
global DATA
#
for i in range(0, long):
# list
data =[]
data.append(div_name[i].get_text())# i
data.append(div_star[i].get_text())# i
data.append(div_time[i].get_text())# i
data.append(div_score[i].get_text())# i
#print(data)
#time.sleep(1)
# i list
DATA.append(data)
# i csv
# newline='' csv
# encoding='gb18030' csv ,
with open('E:/Python/Spider/MaoyanTOP100.csv', 'w', newline='',encoding='gb18030') as f:
writer = csv.writer(f)
writer.writerows(DATA)
# List
DATA = []
# , offset
for i in range(0, 100, 10):# 0 , 10, 100 , 100
url = "http://maoyan.com/board/4?offset=" + str(i)
#
getcontent(url)