selenium+python爬虫類実装-兼職猫アルバイト情報を取得
3753 ワード
学習用にのみ削除
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
URL = "https://guangzhou.jianzhimao.com/dbx_zbx_5/"
class main(object):
def __init__(self):
self.driver = webdriver.Chrome()
#
self.driver.set_window_size(1920, 1080)
self.driver.get(URL)
#
self.driver.implicitly_wait(5)
def run(self):
#
elementPage = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/div/ul/li/a")
elementPageCount = len(elementPage)
print(elementPageCount)
if elementPageCount == 1:
elementPageCount = 2
#
for j in range(2,elementPageCount):
#
elementTitle = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li/a")
elementTitleCount = len(elementTitle)
print(elementTitleCount)
#
for i in range(1,elementTitleCount+1):
print(i)
if i>=9:
autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a')
self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)
time.sleep(1)
self.driver.find_element_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a").click()
time.sleep(2)
self.driver.switch_to_window(self.driver.window_handles[1])
#
title = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/h1').text
#
company_name = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/p').text
time.sleep(0.5)
#
use_num = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[1]/span[2]').text
#
description = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/div[1]/div[2]').text
#
way = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[1]/span[2]').text
#
wage = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[2]/span[2]').text
#
timee = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[2]/li[2]/span[2]').text
#
place = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[2]/span[2]').text
time.sleep(1)
self.driver.close()
time.sleep(2)
self.driver.switch_to_window(self.driver.window_handles[0])
autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li[1]')
self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)
if elementPageCount != 2:
self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li['+str(j)+']').click()
if __name__=='__main__':
mn = main()
mn.run()