Python 3自動認識アドレス情報


1.文字列抽出情報(氏名、携帯電話番号、住所)
import re

def extract_info(list_value):
    mobile = str()
    name = address = list_value[0]
    for i in list_value:
        if i.isnumeric() and len(i) == 11:
            mobile = i
        else:
            if len(i) > len(name):
                address = i
            else:
                name = i
    return name, mobile, address

address = "    18119990001 ,                 4 401 "
delivery_address = re.sub('[\s,,]+', ',', address).split(",")
print(extract_info(delivery_address))

2.住所分詞
分詞庫:jieba(「結巴」中国語分詞:最良のPython中国語分詞コンポーネントを作る)
インストール使用リファレンス:github:https://github.com/fxsjy/jieba
Pythonコードは以下の通りです.
query()は、私がカプセル化したデータベース・クエリー・メソッドです(自己実装)
import jieba

def address_match(delivery_addresses):
    if not delivery_addresses:
        return {}
    delivery_addresses = re.sub('[\s,,]+', ',', delivery_addresses).split(",")  #     
    name, mobile, address = str(), str(), str()
    try:
        name, mobile, address = extract_info(delivery_addresses)
    except:
        pass

    seg_list = jieba.lcut(address, cut_all=False)

    #          (      )
    for key in seg_list:
        if key.encode() in [' ', ' ', ' ', ' ']:
            seg_list.remove(key)

    # province
    provinces = query('SELECT * FROM area WHERE display=1 AND level=1 AND pid=0;')
    province_dict = {}
    for province in provinces:
        for key in seg_list:
            if key in province.get("name"):
                province_dict = province
                break

    # cities
    if not province_dict:
        cities = query('SELECT * FROM area WHERE display=1 AND level=2;')
    else:
        cities = query(f'SELECT * FROM area WHERE display=1 AND level=2 AND pid={province_dict.get("id")};')
    city_dict = {}
    for city in cities:
        for key in seg_list:
            if key in city.get("name"):
                city_dict = city
                break

    # counties
    if not city_dict:
        counties = query('SELECT * FROM area WHERE display=1 AND level=3;')
    else:
        counties = query(f'SELECT * FROM area WHERE display=1 AND level=3 AND pid={city_dict.get("id")};')
    dis_dict = {}
    index = 0
    for county in counties:
        for key in seg_list:
            if key in county.get("name"):
                dis_dict = county
                index = address.index(key) + 2
    if not dis_dict:
        return {}
    else:
        if not city_dict:
            city = query(f'SELECT * FROM area WHERE id={dis_dict.get("pid")};')
            city_dict = city

        if not province_dict:
            province =  query(f'SELECT * FROM area WHERE id={city_dict.get("pid")};') 
            province_dict = province
    address_dict = {
        'province': province_dict.get('name'),
        'province_id':  province_dict.get('id'),
        'city': city_dict.get('name'),
        'city_id': city_dict.get('id'),
        'county': dis_dict.get('name'),
        'county_id': dis_dict.get('id'),
        'address': address[int(index) + len(key):],
        'mobile': mobile,
        'name': name
    }
    return address_dict

要求の例:
address="    18119990001 ,                 4 401 "
address_dict = await address_match(address)

結果を返します.
{
    "province": "  ",
    "province_id": 1,
    "city": "   ",
    "city_id": 2,
    "county": "   ",
    "county_id": 3,
    "address": "       4 401 ",
    "mobile": "19919990001",
    "name": "  "
}