python分散分析

2497 ワード

pwd
‘d:\\python\\exerise-df\\df-data-analysis’
from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt

たんいんしぶんさんぶんせき
dat = pd.read_csv("one-way.csv")
dat.head()

Variety
rep
y
0
A
b1
15.3
1
B
b1
18.0
2
C
b1
16.6
3
D
b1
16.4
4
E
b1
13.7
model = ols('y ~ Variety',dat).fit()
anovat = anova_lm(model)
print(anovat)
            df     sum_sq    mean_sq          F        PR(>F)
Variety    5.0  52.378333  10.475667  40.334118  3.662157e-09
Residual  18.0   4.675000   0.259722        NaN           NaN

にいんしぶんさんぶんせき
dat = pd.read_csv("anova.csv")
dat.head()

loc
cul
y
0
Ann
BH93
4.460
1
Ari
BH93
4.417
2
Aug
BH93
4.669
3
Cas
BH93
4.732
4
Del
BH93
4.390
formula = 'y~ loc + cul'
anova_results = anova_lm(ols(formula,dat).fit())
print(anova_results)
             df      sum_sq    mean_sq          F        PR(>F)
loc        17.0   22.671174   1.333598   9.087496  2.327448e-15
cul         8.0  114.536224  14.317028  97.560054  1.611882e-52
Residual  136.0   19.958126   0.146751        NaN           NaN