Headless で リストで与えられたページを取り込む (python3)
リストで示された URL を取り込む方法です。
fetch_html_list.py
#! /usr/bin/python
# ------------------------------------------------------------------
#
# fetch_html_list.py
#
# Aug/24/2018
# ------------------------------------------------------------------
import sys
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as expected
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.firefox.options import Options
# ------------------------------------------------------------------
def file_write_proc(file_name,str_out):
fp_out = open(file_name,mode='w',encoding='utf-8')
fp_out.write(str_out)
fp_out.close()
#
# ------------------------------------------------------------------
def fetch_single_proc(url_target,file_html):
sys.stderr.write("url_target = " + url_target + "\n")
#
driver.get(url_target)
driver.save_screenshot("out.png")
html = driver.page_source
file_write_proc(file_html,html)
#
# ------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
file_list = sys.argv[1]
fp_in = open(file_list,encoding='utf-8')
lines = fp_in.readlines()
fp_in.close()
#
options = Options()
options.add_argument('-headless')
driver = Firefox(executable_path='/usr/bin/geckodriver', firefox_options=options)
ttx = 100
wait = WebDriverWait(driver, timeout=ttx)
#
for line in lines:
print(line)
cols= line[:-1].split()
print(len(cols))
fetch_single_proc(cols[0],cols[1])
#
driver.quit()
#
sys.stderr.write("*** 終了 ***\n")
# ------------------------------------------------------------------
使い方
./fetch_html_list.py list_url.txt
list_url.txt
https://example.com/aaa/ a001.html
https://example.com/bbb/ a002.html
https://example.com/ccc/ a003.html
https://example.com/ddd/ a004.html
Author And Source
この問題について(Headless で リストで与えられたページを取り込む (python3)), 我々は、より多くの情報をここで見つけました https://qiita.com/ekzemplaro/items/6b01988e76f537b7a2c5著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .