Python 3自動認識アドレス情報
1.文字列抽出情報(氏名、携帯電話番号、住所)
2.住所分詞
分詞庫:jieba(「結巴」中国語分詞:最良のPython中国語分詞コンポーネントを作る)
インストール使用リファレンス:github:https://github.com/fxsjy/jieba
Pythonコードは以下の通りです.
query()は、私がカプセル化したデータベース・クエリー・メソッドです(自己実装)
要求の例:
結果を返します.
import re
def extract_info(list_value):
mobile = str()
name = address = list_value[0]
for i in list_value:
if i.isnumeric() and len(i) == 11:
mobile = i
else:
if len(i) > len(name):
address = i
else:
name = i
return name, mobile, address
address = " 18119990001 , 4 401 "
delivery_address = re.sub('[\s,,]+', ',', address).split(",")
print(extract_info(delivery_address))
2.住所分詞
分詞庫:jieba(「結巴」中国語分詞:最良のPython中国語分詞コンポーネントを作る)
インストール使用リファレンス:github:https://github.com/fxsjy/jieba
Pythonコードは以下の通りです.
query()は、私がカプセル化したデータベース・クエリー・メソッドです(自己実装)
import jieba
def address_match(delivery_addresses):
if not delivery_addresses:
return {}
delivery_addresses = re.sub('[\s,,]+', ',', delivery_addresses).split(",") #
name, mobile, address = str(), str(), str()
try:
name, mobile, address = extract_info(delivery_addresses)
except:
pass
seg_list = jieba.lcut(address, cut_all=False)
# ( )
for key in seg_list:
if key.encode() in [' ', ' ', ' ', ' ']:
seg_list.remove(key)
# province
provinces = query('SELECT * FROM area WHERE display=1 AND level=1 AND pid=0;')
province_dict = {}
for province in provinces:
for key in seg_list:
if key in province.get("name"):
province_dict = province
break
# cities
if not province_dict:
cities = query('SELECT * FROM area WHERE display=1 AND level=2;')
else:
cities = query(f'SELECT * FROM area WHERE display=1 AND level=2 AND pid={province_dict.get("id")};')
city_dict = {}
for city in cities:
for key in seg_list:
if key in city.get("name"):
city_dict = city
break
# counties
if not city_dict:
counties = query('SELECT * FROM area WHERE display=1 AND level=3;')
else:
counties = query(f'SELECT * FROM area WHERE display=1 AND level=3 AND pid={city_dict.get("id")};')
dis_dict = {}
index = 0
for county in counties:
for key in seg_list:
if key in county.get("name"):
dis_dict = county
index = address.index(key) + 2
if not dis_dict:
return {}
else:
if not city_dict:
city = query(f'SELECT * FROM area WHERE id={dis_dict.get("pid")};')
city_dict = city
if not province_dict:
province = query(f'SELECT * FROM area WHERE id={city_dict.get("pid")};')
province_dict = province
address_dict = {
'province': province_dict.get('name'),
'province_id': province_dict.get('id'),
'city': city_dict.get('name'),
'city_id': city_dict.get('id'),
'county': dis_dict.get('name'),
'county_id': dis_dict.get('id'),
'address': address[int(index) + len(key):],
'mobile': mobile,
'name': name
}
return address_dict
要求の例:
address=" 18119990001 , 4 401 "
address_dict = await address_match(address)
結果を返します.
{
"province": " ",
"province_id": 1,
"city": " ",
"city_id": 2,
"county": " ",
"county_id": 3,
"address": " 4 401 ",
"mobile": "19919990001",
"name": " "
}