StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 7.9 外れ値


実行環境

インポート

import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
%matplotlib inline

データ読み込み

outlier = pd.read_csv('./data/data-outlier.txt')

7.9 外れ値

ax = outlier.plot.scatter('X', 'Y')
fig = plt.gcf()
plt.setp(fig, size_inches=figaspect(3/4))
plt.setp(ax, xlim=(-0.2, 11.2), ylim=(-25, 75))
plt.show()

X_new = np.linspace(0, 11, 100)
data = dict(
    N=outlier.index.size,
    X=outlier['X'],
    Y=outlier['Y'],
    N_new=X_new.size,
    X_new=X_new
)

fit1 = pystan.stan('./stan/model7-8.stan', data=data, seed=1234)

ms1 = fit1.extract()
d_est1 = np.percentile(ms1['y_new'], (2.5, 25, 50, 75, 97.5), axis=0)

fit2 = pystan.stan('./stan/model7-9.stan', data=data, seed=1234)

ms2 = fit2.extract()
d_est2 = np.percentile(ms2['y_new'], (2.5, 25, 50, 75, 97.5), axis=0)
_, axes = plt.subplots(1, 2, figsize=figaspect(3/8), sharex=True, sharey=True)
for ax, d_est in zip(axes, [d_est1, d_est2]):
    outlier.plot.scatter('X', 'Y', color='w', edgecolor='k', ax=ax)
    ax.fill_between(X_new, d_est[0], d_est[-1], color='k', alpha=0.2)
    ax.fill_between(X_new, d_est[1], d_est[-2], color='k', alpha=0.4)
    ax.plot(X_new, d_est[2], color='k')
plt.setp(ax, xlim=(-0.2, 11.2), ylim=(-25, 75))
plt.show()