python豆弁の映画名と採点を分析する

1685 ワード

pythonを用いて豆弁網上のデータを抽出し,分析を行い,映画名とスコアを得た.直接運転可能です.
import time
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import datetime  

##       ,     excel  
driver = webdriver.Chrome(executable_path = "D:\\work\\python\\chromedriver.exe")
driver.maximize_window()
#driver.close()
driver.switch_to_window(driver.window_handles[0])
url = 'https://movie.douban.com/tag/#/?sort=U&range=2,10&tags=%E7%94%B5%E5%BD%B1,2010%E5%B9%B4%E4%BB%A3,%E4%B8%AD%E5%9B%BD%E5%A4%A7%E9%99%86'
js='window.open("'+url+'")'
driver.execute_script(js)
driver.close()
driver.switch_to_window(driver.window_handles[0])

'''  
i_pag = 0
while i_pag < 3:
   try:
     js="var q=document.documentElement.scrollTop=10000000"  
     driver.execute_script(js)
     driver.find_element_by_class_name('more').click()
     time.sleep(2)
     i_pag = i_pag + 1
   except:
     break 
'''

while True:
   try:
     js="var q=document.documentElement.scrollTop=10000000"  
     driver.execute_script(js)
     driver.find_element_by_class_name('more').click()
     time.sleep(2)
   except:
     break 

name = [k.text for k in driver.find_elements_by_class_name('title')]
score = [k.text for k in driver.find_elements_by_class_name('rate')]
url = [k.get_attribute('href') for k in driver.find_elements_by_class_name('item')]
pd.DataFrame({'name':name,'score':score,'url':url}).to_excel('    .xlsx')

学習用のみ!