代理の登山と検査
3490 ワード
import requests
from lxml import etree
def get_all_porxy():
url = 'http://www.xicidaili.com/nn/1'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url,headers=headers)
html = response.text
html = etree.HTML(html)
ip_list = html.xpath('//table[@id="ip_list"]/tr/td[2]/text()')
d_list = html.xpath('//table[@id="ip_list"]/tr/td[3]/text()')
# print(ip_list,d_list)
porxy_list = []
for i in range(0,len(ip_list)):
proxy = 'http://' + ip_list[i] + ':' + d_list[i]
# print(proxy)
porxy_list.append(proxy)
# print(porxy_list)
return porxy_list
# with open('xicidaili.html','wb') as f:
# f.write(response.content)
def check_all_proxy(porxy_list):
ip_list = []
for proxy in porxy_list:
# print(proxy)
porxy_dict = {
'http':proxy
}
url = 'http://fanyi.baidu.com/sug'
try:
response = requests.get(url,proxies=porxy_dict,timeout=3)
if response.status_code == 200:
ip_list.append(proxy)
print(type(proxy))
with open('ip.txt', 'a') as f:
f.write(proxy+'
')
else:
print(' '*20)
except Exception as e:
print(e)
return ip_list
if __name__ == '__main__':
# ,
porxy_list = get_all_porxy()
#
ip = check_all_proxy(porxy_list)