StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 7.5 交絡


実行環境

インポート

import pandas as pd
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
from matplotlib.markers import MarkerStyle
%matplotlib inline

データ読み込み

d = pd.read_csv('./data/data-50m.txt')
d['Age'] = pd.Categorical(d['Age'])

7.5 交絡

_, (ax1, ax2) = plt.subplots(1, 2, figsize=figaspect(3/8), sharex=True, sharey=True)
ax1.scatter('Weight', 'Y', data=d)
for i, age in enumerate(d['Age'].cat.categories):
    ax2.scatter('Weight', 'Y', data=d.query('Age==@age'), marker=MarkerStyle.filled_markers[i], label=age)
ax2.legend(title='Age')
for ax in [ax1, ax2]:
    plt.setp(ax, xlabel='Weight', ylabel='Y')
plt.show()

data = {col: d[col] for col in d.columns}
data['N'] = d.index.size
fit = pystan.stan('./stan/model7-5.stan', data=data, seed=1234)