Crowdworks の募集中の案件数を取得 (Beautifulsoup4)


Selenium と Beautifulsoup4 のサンプルとして、CrowdWorks の募集中の案件数を取得してみました。
次のページをスクレイピングします。
システム開発 の仕事・依頼を探す

crowdworks_development.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   crowdworks_development.py
#
#                   Sep/02/2018
#
# ------------------------------------------------------------------
import sys
from selenium.webdriver import Firefox
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
#
from bs4 import BeautifulSoup
#
# ------------------------------------------------------------------
def file_write_proc(file_name,str_out):
    fp_out = open(file_name,mode='w',encoding='utf-8')
    fp_out.write(str_out)
    fp_out.close()
#
# ------------------------------------------------------------------
def page_ready_wait_proc(driver):
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME,'result_count'))
        )
# ------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
url_target = "https://crowdworks.jp/public/jobs/group/development/u/professionals?order=new"
file_html = "tmp001.html"
#
#
options = Options()
options.add_argument('-headless')
driver = Firefox(executable_path='/usr/bin/geckodriver', firefox_options=options)
ttx = 100
wait = WebDriverWait(driver, timeout=ttx)
driver.get(url_target)
#
idx="filter_hide_expired"
box_check = driver.find_element_by_id(idx)
box_check.click()
#
idx="filter_hide_budget_pending"
box_check = driver.find_element_by_id(idx)
box_check.click()
#
page_ready_wait_proc(driver)
html = driver.page_source
#
driver.quit()
#
file_write_proc(file_html,html)
#
soup = BeautifulSoup(html, "html.parser")
ccx=soup.find(class_="result_count")
ccy=ccx.find("span")
count=ccy.get_text()
sys.stderr.write("count = " + count + "\n")
#
sys.stderr.write("*** 終了 ***\n")
# ------------------------------------------------------------------

実行結果

$ ./crowdworks_development.py
*** 開始 ***
count = 182
*** 終了 ***