statistical principles in experimental design(winer)



#statistical principles in experimental design

#p.31

#H1:mu_a=mu_b

na=7;nb=10

Xa=c(3,5,2,4,6,2,7)

Xb=c(6,5,7,8,9,4,7,8,9,7)

La=na*sum(Xa^2)-sum(Xa)^2

Lb=nb*sum(Xb^2)-sum(Xb)^2

t2=(na+nb-2)*(nb*sum(Xa)-na*sum(Xb))^2/((na+nb)*(nb*La+na*Lb))

alpha=0.05

qt(1-alpha/2,df=(na+nb-2))^2


#test for homogeneity of variance

sa2=15.58;sb2=28.2

na=10;nb=8

F_obs=sb2/sa2

qf(1-0.05,nb-1,na-1)


#2.6 testing hypotheses about the difference between two means

#small sample sizes

#H1:mu_a=mu_b

na=16;nb=8

mean_Xa=30;mean_Xb=21

sa2=32;sb2=80

t_obs=(mean_Xa-mean_Xb)/sqrt(sa2/na+sb2/nb)

alpha=0.05

c=(sa2/na)/(sa2/na+sb2/nb)

#degree of freedom

V=sa2/na;W=sb2/nb;U=V+W

fa=na-1;fb=nb-1

#(2)

f1=(U^2)/((V^2/(na+1))+(W^2/(nb+1)))-2

#(3)

f2=(fa*fb)/(fb*c^2+fa*(1-c)^2)

f=min(f1,f2)

qt(1-0.05/2,df=f)



#2.7 testing hypotheses about the difference between two means-correlated observations

#H:mu_a-mu_b=0

data=data.frame(person_number=c(1:7),before_treatment=c(3,8,4,6,9,2,12),after_treatment=c(6,14,8,4,16,7,19))

data=data%>%mutate(difference=after_treatment-before_treatment)

L_d=nrow(data)*sum(data$difference^2)-(sum(data$difference)^2)

sd2=L_d/(nrow(data)*(nrow(data)-1))

t_obs=(mean(data$difference))/sqrt(sd2/nrow(data))

qt(1-0.05/2,df=nrow(data)-1)


#Before treatment

B=data$before_treatment

L_b=nrow(data)*sum(B^2)-sum(B)^2

sb2=L_b/(nrow(data)*(nrow(data)-1))

#after treatment

A=data$after_treatment

L_a=nrow(data)*sum(A^2)-sum(A)^2

sa2=L_a/(nrow(data)*(nrow(data)-1))

#Product treatment

L_ab=nrow(data)*sum(A*B)-sum(A)*sum(B)

rab=L_ab/sqrt(L_a*L_b)

#table 2.8-1

t_obs=c(0.87,0.54,1.1,1.5,1.3)

probability=1-pt(t_obs,df=14)

kai2=-log(probability)

sum(kai2)

qchisq(1-0.05/2,df=length(t_obs)*2)



#3.2 Definitions and numerical examples

method1=c(3,5,2,4,8,4,3,9)

method2=c(4,4,3,8,7,4,2,5)

method3=c(6,7,8,6,7,9,10,9)

t1=sum(method1)

t2=sum(method2)

t3=sum(method3)

SS1=sum(method1^2)-t1^2/length(method1)

SS2=sum(method2^2)-t2^2/length(method2)

SS3=sum(method3^2)-t3^2/length(method3)

G=t1+t2+t3

#with-in variance

SS_w=sum(method1^2)+sum(method2^2)+sum(method3^2)-(sum(method1)^2+sum(method2)^2+sum(method3)^2)/length(method1)

G_bar=G/(3*length(method1))

SS_method=length(method1)*sum((c(t1,t2,t3)/length(method1)-G_bar)^2)

SS_total=sum((cbind(method1,method2,method3)-G_bar)^2)

SS_error=SS_total-SS_method

MS_method=SS_method/(3-1)

MS_error=SS_error/(3*(length(method1)-1))

MS_error=sum(var(method1)+var(method2)+var(method3))/(3)

MS_treat=length(method1)*sum((c(t1,t2,t3)/length(method1)-G_bar)^2)/(3-1)

#H:population means area equal

f=MS_method/MS_error

qf(1-0.05,3-1,3*(length(method1)-1))

#H:treatment effects are equal

f=MS_treat/MS_error

qf(1-0.05,3-1,3*(length(method1)-1))

#comparisons among treatment means p.65

#H:sum(C*t)=0

t=c(t1,t2,t3)

#C=c(1,-1,0)

C=c(1,-1,0)

f=sum(C*t)^2/(length(method1)*sum(C^2)*MS_error)

k=length(C[abs(C)>0])

qf(1-0.05,k-1,3*(length(method1)-1))


#p.71 

#There are six displays(k=6) and each sample is observed(n=10) under only one of the displays. 

#The hypothesis that the mean reaction times for the displays are equal

#For example,six different Web page sagements(complexity)

k=6;n=10

t=c(100,110,120,180,190,210)

X2=c(1180,1210,1600,3500,3810,4610)

G=sum(t)

SS_displays=sum(t^2)/n-G^2/(k*n)

SS_error=sum(X2)-sum(t^2)/n

SS_total=sum(X2)-G^2/(k*n)

f=(SS_displays/(k-1))/(SS_error/((k)*(n-1)))

qf(1-0.05,k-1,(k-1)*(n-1))

#linear(1次),Quadratic(2次),Cubic(3次) trend

#linear

coef=c(-5,-3,-1,1,3,5)

D=sum(coef^2)*n

C_lin=sum(coef*t)

C2D=C_lin^2/D;SS_lin=C2D

f_lin=C2D/(SS_error/((k)*(n-1)))

qf(1-0.05,k-1,k*(n-1))

#quadratic

coef=c(5,-1,-4,-4,-1,5)

D=sum(coef^2)*n

C_lin=sum(coef*t)

C2D=C_lin^2/D

f_quad=C2D/(SS_error/((k)*(n-1)))

qf(1-0.05,k-1,k*(n-1))

#cubic

coef=c(-5,7,4,-4,-7,5)

D=sum(coef^2)*n

C_lin=sum(coef*t)

C2D=C_lin^2/D

f_cub=C2D/(SS_error/((k)*(n-1)))

qf(1-0.05,k-1,k*(n-1))

#non-linear

SS_nonlinear=SS_displays-SS_lin

f_nonlinear=(SS_nonlinear/(k-2))/MS_error

qf(1-0.05,k-2,k*(n-1))


#linear equation

SSK=n*(k^3-k)/12

b=sqrt(SS_lin/SSK)

K_bar=(k+1)/2

X_bar=G/(k*n)

a=X_bar-b*K_bar

r=sqrt(SS_lin/SS_total)


#p.77 Use of the Studentized Range Statics

#H:treatment effects are equal

method1=c(3,5,2,4,8,4,3,9)

method2=c(4,4,3,8,7,4,2,5)

method3=c(6,7,8,6,7,9,10,9)

t1=sum(method1)

t2=sum(method2)

t3=sum(method3)

t=c(t1,t2,t3)

MS_error=sum(var(method1)+var(method2)+var(method3))/(3)

#Newman-Kleus test

#biostatics analysis p.215

q=(max(t)-min(t))/sqrt(MS_error/length(method1))