python3で裁判の判例データからWord Cloudを生成する


(zsh)
brew install mecab mecab-ipadic
pip3.5 install mecab-python3

pip3.5 install wordcloud
pip3.5 install numpy Pillow matplotlib # wordcloudを使用するために必要なライブラリ
#brew install numpy # error
#brew install homebrew/python/numpy # smthngs wrong...
#sudo xcode-select --install # doesnt work...

### 新語が追加されたMeCab辞書"mecab-ipadic-neologd"を取得
cd /usr/local/lib/mecab/dic
git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git
./bin/install-mecab-ipadic-neologd -n

wordcloud.py
import MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt

pos_list = [10, 11, 31, 32, 34]
pos_list.extend(list(range(36,50)))
pos_list.extend([59, 60, 62, 67])
def create_mecab_list(text):
    mecab_list = []
    mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
    mecab.parse("")
    # encoding = text.encode('utf-8')
    node = mecab.parseToNode(text)
    while node:
        if len(node.surface) > 1:
            if node.posid in pos_list:
                morpheme = node.surface
                mecab_list.append(morpheme)
        node = node.next
    return mecab_list

with open("./086064_hanrei_utf8.txt", "r") as file:
    hanrei = file.read()

string = " ".join(create_mecab_list(hanrei))#.decode("utf-8")


fpath = "/Library/Fonts/ヒラギノ丸ゴ ProN W4.ttc"
wordcloud = WordCloud(
    # background_color="white",
    max_font_size=40,
    relative_scaling=.5,
    # width=900,
    # height=500,
    font_path=fpath
    ).generate(string)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

(zsh)
python3 wordcloud.py