StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 11.2 混合正規分布


実行環境

インポート

import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
import seaborn as sns
%matplotlib inline

データ読み込み

mix1 = pd.read_csv('./data/data-mix1.txt')
mix2 = pd.read_csv('./data/data-mix2.txt')

11.2 混合正規分布

plt.figure(figsize=figaspect(3/4))
sns.distplot(mix1['Y'], bins=30, hist_kws=dict(facecolor='w', edgecolor='k'), kde_kws=dict(color='k', shade=True))
plt.show()

data = dict(
    N=mix1.index.size,
    Y=mix1['Y']
)
fit = pystan.stan('./stan/model11-5.stan', data=data, init=lambda: dict(
        a=0.5,
        mu=(0, 5),
        sigma=(1, 1)
), seed=123)
plt.figure(figsize=figaspect(3/4))
sns.distplot(mix2['Y'], bins=25, hist_kws=dict(facecolor='w', edgecolor='k'), kde_kws=dict(color='k', shade=True))
plt.show()

K = 5
data = dict(
    N=mix2.index.size,
    K=K,
    Y=mix2['Y']
)
def init():
    return dict(
        a=np.full(K, 1/K),
        mu=np.linspace(10, 40, K),
        sigma=np.ones(K),
        s_mu=20
    )
fit = pystan.stan('./stan/model11-6.stan', data=data, init=init, seed=1234)
fit_b = pystan.stan('./stan/model11-6b.stan', data=data, init=init, seed=1234)