COTOHA API で構文解析


COTOHA API Portal の使用例です。

こちらのプログラムと同じことですが、プログラムを見やすくしました。

COTOHA API で品詞分類

参考ページ

APIリファレンス

フォルダー構造

$ tree -a
.
├── .env
├── get_config.py
├── get_token.py
└── parsing.py
parsing.py
#! /usr/bin/python
# -*- coding:utf-8 -*-
#
#   parsing.py
#
#                       Feb/21/2020
# ----------------------------------------------------------------------
import sys
import json
import requests
# ----------------------------------------------------------------------
from get_config import get_config_proc
from get_token import get_token_proc
# ----------------------------------------------------------------------
sys.stderr.write("*** 開始 ***\n")
config = get_config_proc()
access_token = get_token_proc(config)
#
sentence = "特急はくたか"
#
headers={
    "Content-Type": "application/json",
    "Authorization": "Bearer " + access_token
    }
#
data = {
    "sentence": sentence,
    "type": "default"
    }

str_json = json.dumps(data)
url = config['DEVELOPER_API_BASE_URL'] + "v1/parse"
try:
    rr=requests.post(url,headers=headers,data=str_json)
#   print(rr.text)
    dict_aa = json.loads(rr.text)
    for unit in dict_aa["result"]:
        llx = len(unit['tokens'])
        sys.stderr.write("llx(tokens) = %d\n" % llx)
#
    for unit in dict_aa["result"]:
        for token in unit['tokens']:
            print(token['form'],token['pos'])
except Exception as ee:
    sys.stderr.write("*** error *** in requests.post ***\n")
    sys.stderr.write(str(ee) + "\n")
#
sys.stderr.write("*** 終了 ***\n")
# ----------------------------------------------------------------------
get_config.py
# ----------------------------------------------------------------------
#
#   get_config.py
#
#                       Feb/21/2020
# ----------------------------------------------------------------------
import os
from dotenv import load_dotenv
# ----------------------------------------------------------------------
def get_config_proc():
    config = {} 
    access_token  = ""
    dotenv_path = '.env'
    load_dotenv(dotenv_path)
    config['CLIENT_ID'] = os.environ.get("CLIENT_ID")
    config['CLIENT_SECRET'] = os.environ.get("CLIENT_SECRET")
    config['DEVELOPER_API_BASE_URL'] = os.environ.get("DEVELOPER_API_BASE_URL")
    config['ACCESS_TOKEN_PUBLISH_URL'] = os.environ.get("ACCESS_TOKEN_PUBLISH_URL")
#
    return  config
#
# ----------------------------------------------------------------------
get_token.py
# -*- coding:utf-8 -*-
#
#   get_token.py
#
#                       Feb/21/2020
#
# ----------------------------------------------------------------------
import sys
import os
import json
import  requests
# ----------------------------------------------------------------------
def get_token_proc(config):
#
    data = {
        "grantType": "client_credentials",
        "clientId": config['CLIENT_ID'],
        "clientSecret": config['CLIENT_SECRET']
        }
    str_json = json.dumps(data)

    url = config['ACCESS_TOKEN_PUBLISH_URL']
    headers={
        "Content-Type": "application/json"
        }

    try:
        rr=requests.post(url,headers=headers,data=str_json)
        dict_aa = json.loads(rr.text)
        access_token = dict_aa["access_token"]
    except Exception as ee:
        sys.stderr.write("*** error *** in requests.post ***\n")
        sys.stderr.write(str(ee) + "\n")
#
    return access_token
# ----------------------------------------------------------------------
.env
LIENT_ID = AAAAAAAAAAAAAAAAAAAAAAAAAA
CLIENT_SECRET = aaaaaaaaaaaaaaaaaaaaaaaaaa
DEVELOPER_API_BASE_URL = https://api.ce-cotoha.com/api/dev/nlp/
ACCESS_TOKEN_PUBLISH_URL = https://api.ce-cotoha.com/v1/oauth/accesstokens

実行結果

$ ./parsing.py
*** 開始 ***
llx(tokens) = 3
llx(tokens) = 1
特急 名詞
は 動詞語幹
く 動詞接尾辞
たか 名詞
*** 終了 ***