Pythonで省コードを取得
2639 ワード
import requests
import xml.etree.ElementTree as ET
from xml.parsers.expat import ParserCreate
In [2]:
class DefaultSaxHandler(object):
def __init__(self, provinces):
self.provinces = provinces
#
def start_element(self, name, attrs):
if name != 'map':
name = attrs['title']
number = attrs['href']
self.provinces.append((name, number))
#
def end_element(self, name):
pass
#
def char_data(self, text):
pass
In [3]:
def get_province_entry(url):
#
content = requests.get(url).content.decode('gb2312')
# ,
start = content.find('')
content = content[start:end + len('')].strip()
provinces = []
# sex
handler = DefaultSaxHandler(provinces)
#
parser = ParserCreate()
parser.StartElementHandler = handler.start_element
parser.EndElementHandler = handler.end_element
parser.CharacterDataHandler = handler.char_data
#
parser.Parse(content)
#
return provinces
In [5]:
provinces = get_province_entry('http://www.ip138.com/post')
print(provinces)
[(' ', '/83/'), (' ', '/85/'), (' ', '/81/'), (' ', '/73/'), (' ', '/61/'), (' ', '/65/'), (' ', '/75/'), (' ', '/01/'), (' ', '/15/'), (' ', '/13/'), (' ', '/11/'), (' ', '/50/'), (' ', '/10/'), (' ', '/30/'), (' ', '/71/'), (' ', '/03/'), (' ', '/25/'), (' ', '/45/'), (' ', '/40/'), (' ', '/43/'), (' ', '/23/'), (' ', '/21/'), (' ', '/20/'), (' ', '/55/'), (' ', '/53/'), (' ', '/41/'), (' ', '/33/'), (' ', '/31/'), (' ', '/35/'), (' ', '/51/'), (' ', '/57/'), (' ', '/taiwang/'), (' ', '/aomen/'), (' ', '/xianggang/')]
In [ ]: