selenium+python爬虫類実装-兼職猫アルバイト情報を取得

3753 ワード

学習用にのみ削除
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import time
URL = "https://guangzhou.jianzhimao.com/dbx_zbx_5/"
class main(object):
    def __init__(self):
        self.driver = webdriver.Chrome()
        #          
        self.driver.set_window_size(1920, 1080)
        self.driver.get(URL) 
        #                          
        self.driver.implicitly_wait(5)    
    def run(self):
    #      
        elementPage = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/div/ul/li/a")
        elementPageCount = len(elementPage)
        print(elementPageCount)
        if elementPageCount == 1:
            elementPageCount = 2
    #         
        for j in range(2,elementPageCount):
            #        
            elementTitle = self.driver.find_elements_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li/a")
            elementTitleCount = len(elementTitle)
            print(elementTitleCount)
    #          
            for i in range(1,elementTitleCount+1):
                print(i)
                if i>=9:
                    autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a')
                    self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)
                time.sleep(1)
                self.driver.find_element_by_xpath("/html/body/section[1]/article/div[2]/div[1]/ul/li["+str(i)+"]/a").click()
                time.sleep(2)
                self.driver.switch_to_window(self.driver.window_handles[1])
                #  
                title = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/h1').text
                #   
                company_name = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[1]/p').text
                time.sleep(0.5)
                #    
                use_num = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[1]/span[2]').text
                #    
                description = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/div[1]/div[2]').text
                #    
                way = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[1]/span[2]').text
                #  
                wage = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[3]/li[2]/span[2]').text
                #    
                timee = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[2]/li[2]/span[2]').text
                #    
                place = self.driver.find_element_by_xpath('/html/body/section/article/div/div[1]/div[2]/div[2]/ul[1]/li[2]/span[2]').text
                time.sleep(1)
                self.driver.close()
                time.sleep(2)
                self.driver.switch_to_window(self.driver.window_handles[0])
            autoScroop = self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li[1]')
            self.driver.execute_script("arguments[0].scrollIntoView();", autoScroop)   
            if elementPageCount != 2:           
                self.driver.find_element_by_xpath('/html/body/section[1]/article/div[2]/div[1]/div/ul/li['+str(j)+']').click()  
if __name__=='__main__':
    mn = main()
    mn.run()