中国語の分詞ツール
7985 ワード
jieba
thulac
pkuseg
pyhanlp
snownlp
import jieba
import re
sentence = ' , 。 , , 。'
sentence = re.sub('[,。?!、]', '', sentence)
result = list(jieba.cut(sentence))
print(result)
# [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
thulac
import thulac
import re
sentence = ' , 。 , , 。'
sentence = re.sub('[,。?!、]', '', sentence)
thu = thulac.thulac(seg_only=True)
result = thu.cut(sentence, text=True)
print(result.split())
# [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
pkuseg
import pkuseg
import re
sentence = ' , 。 , , 。'
sentence = re.sub('[,。?!、]', '', sentence)
pk_useg = pkuseg.pkuseg()
result = pk_useg.cut(sentence)
print(result)
# [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
pyhanlp
from pyhanlp import HanLP
import re
sentence = ' , 。 , , 。'
sentence = re.sub('[,。?!、]', '', sentence)
result = HanLP.segment(sentence)
print([term.word for term in result])
# [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
snownlp
from snownlp import SnowNLP
import re
sentence = ' , 。 , , 。'
sentence = re.sub('[,。?!、]', '', sentence)
result = SnowNLP(sentence)
print(result.words)
# [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']