どのようにPython+Selenium+PhantomJS/Chromeを通じてHTTPの状態とResonse Headersを取得しますか?

6246 ワード

SeleniumはHTTP状態コードを取得するAPIを提供していません。今後もこの機能を提供するつもりはないようです。ここでPython+Selenium+PhantomJSの実現を提供します。参考にしてください。
# Python 2.7
from selenium import webdriver  
import json
from collections import OrderedDict

def getResponseHeaders(browser):
    har = json.loads(browser.get_log('har')[0]['message'])
    return OrderedDict(sorted([(header["name"], header["value"]) for header in har['log']['entries'][0]['response']["headers"]], key = lambda x: x[0]))

def getResponseStatus(browser):
    har = json.loads(browser.get_log('har')[0]['message'])
    return (har['log']['entries'][0]['response']["status"],\
            str(har['log']['entries'][0]['response']["statusText"]))

browser = webdriver.PhantomJS()

# Simple Test
print ">>>>> 404"
browser.get("http://www.questionfish.cn/notfound.html")
print "status: ", getResponseStatus(browser)
headers = getResponseHeaders(browser)
for key in headers:
    print key, "=>", headers[key]
print 
Python+Selenium+ChromeDriverはもちろん解決方法があります。
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import json

d = DesiredCapabilities.CHROME
d['loggingPrefs'] = { 'performance':'ALL' }

def getHttpStatus(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            if response[u'url'] == browser.current_url:
                return (response[u'status'], response[u'statusText'])
        except:
            pass
    return None

def getHttpResponseHeader(browser):
    for responseReceived in browser.get_log('performance'):
        try:
            response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
            print
            if response[u'url'] == browser.current_url:
                return response[u'headers']
        except:
            pass
    return None

browser = webdriver.Chrome(desired_capabilities=d)
url = 'http://www.questionfish.cn/notfound.html'
browser.get(url)
print getHttpStatus(browser)
#  get_log         ,          
# print getHttpResponseHeader(browser)
browser.quit()
REF:How to get status code by using selenium.py(python code)-Stock Overflow