離散多変量データの解析 柳川尭先生 共立出版




#p.52

#データ
mat=matrix(c(230,73,65,32),ncol=2)

#期待値
E=matrix(c(224,80,72,24),ncol=2)

#適合度
X2=sum((mat-E)^2/E)

#p.49

#超過リスクを利用する場合

P1=0.008

f=0.015

P2=P1+(1-P1)*f

alpha=0.05;beta=0.1

n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)

#オッズ比を利用する場合

pthi=2

P1=0.008

P2=pthi*P1/(pthi*P1+(1-P1))

n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)

#事例対象研究

pthi=2

#P1:P(OC服用|非CHD)

P1=0.30

#P2:P(OC服用|CHD)

P2=pthi*P1/(pthi*P1+(1-P1))

n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)

#クロス分類研究の場合




#オッズ比の推定

a=mat[1,1];b=mat[2,1];c=mat[1,2];d=mat[2,2]

pthi=a*d/(b*c)

V=1/a+1/b+1/c+1/d

#信頼区間

alpha=0.05

und=log(pthi)-qnorm(1-alpha/2)*sqrt(V)

up=log(pthi)+qnorm(1-alpha/2)*sqrt(V)


#標本数が非常に小さいとき、および確率に偏りがあるとき

pthi_c=(a+0.5)*(d+0.5)/((c+0.5)*(b+0.5))

V_c=1/(a+0.5)+1/(b+0.5)+1/(c+0.5)+1/(d+0.5)

#直接法

n.1=apply(mat,2,sum)[1]

n.2=apply(mat,2,sum)[2]

n1.=apply(mat,1,sum)[1]

n2.=apply(mat,1,sum)[2]

#newton-method

f=function(p){

value=c()

for(i in 0:(n.1)){

value=c(value,choose(n.1,i)*choose(n.2,n1.-i)*p^(i))

}

Q=sum(value)

return(sum(c(0:n.1)*value)/Q-a)

}

ite=1000

eta=0.01

pthi=1

h=0.001

for(l in 1:ite){

f_val=f(pthi)

df_val=(f(pthi+h)-f(pthi))/h

pthi=pthi-eta*f_val/df_val

print(f_val)

}


#信頼区間の推定

alpha=0.05

#関数定義f
f=function(p,sig){

value=c()

for(i in 0:(n.1)){

value=c(value,choose(n.1,i)*choose(n.2,n1.-i)*p^(i))

}

Q=sum(value)

val1=sum(value[(a+1):(n.1+1)])/Q-alpha/2

val2=sum(value[1:(a+1)])/Q-alpha/2

return(ifelse(sig==1,val1,val2))

}

#信頼区間(下側)

ite=1000

eta=0.01

pthi=1

h=0.001

for(l in 1:ite){

f_val=f(pthi,1)

df_val=(f(pthi+h,1)-f(pthi,1))/h

pthi=pthi-eta*f_val/df_val

print(f_val)

}

pthi_und=pthi

#信頼区間(上側)

ite=1000

eta=0.01

pthi=1

h=0.001

for(l in 1:ite){

f_val=f(pthi,0)

df_val=(f(pthi+h,0)-f(pthi,0))/h

pthi=pthi-eta*f_val/df_val

print(f_val)

}

pthi_up=pthi

#p.66 MH test

#pthi=const

mat1=matrix(c(1,10,4,68),ncol=2)

mat2=matrix(c(3,5,5,22),ncol=2)

mat3=matrix(c(6,5,25,37),ncol=2)

mat4=matrix(c(8,7,17,23),ncol=2)

mat5=matrix(c(1,1,23,20),ncol=2)

mat6=matrix(c(1,0,7,5),ncol=2)

mat=list(mat1,mat2,mat3,mat4,mat5,mat6)

L=length(mat)

#勾配上昇法

pthi=1;sita=rep(1,L)

ite=10000

eta=0.001

ns1=c();ns2=c();xs1=c();xs2=c();Nl=c();pthi_MH1=c();pthi_MH2=c()

for(i in 1:L){

matrix=matrix(unlist(mat[i]),ncol=2)

ns1=c(ns1,apply(matrix,1,sum)[1])

ns2=c(ns2,apply(matrix,1,sum)[2])

xs1=c(xs1,matrix[1,1])

xs2=c(xs2,matrix[2,1])

Nl=c(Nl,sum(matrix))

pthi_MH1=c(pthi_MH1,matrix[1,1]*matrix[2,2]/sum(matrix))

pthi_MH2=c(pthi_MH2,matrix[1,2]*matrix[2,1]/sum(matrix))

}

x.1=xs1+xs2;x.2=Nl-x.1

for(l in 1:ite){

dpthi=-sum(ns1*exp(sita)/(1+pthi*exp(sita)))+sum(xs1)/pthi

dsitas=c()

for(j in 1:L){

dsita=-ns1[j]*pthi*exp(sita[j])/(1+pthi*exp(sita[j]))-ns2[j]*exp(sita[j])/(1+exp(sita[j]))+x.1[j]

dsitas=c(dsitas,dsita)

}

pthi=pthi+eta*dpthi;sita=sita+eta*dsitas

loglik=sum(-ns1*log(1+pthi*exp(sita)))-sum(ns2*log(1+exp(sita)))+log(pthi)*sum(xs1)+sum(sita*x.1)

print(loglik)

}

#MH statics
MH=(abs(sum(xs1)-sum(ns1*x.1/Nl)))^2/sum(ns1*ns2*x.1*x.2/(Nl*Nl*(Nl-1)))

#近似法
pthi_MH=sum(pthi_MH1)/sum(pthi_MH2)

#MH 条件
und=sum(ifelse(x.1-ns2>0,x.1-ns2,0))+5
value=sum(ns1*x.1/Nl)
upp=sum(apply(cbind(ns1,x.1),1,min))-5

und<value&value<upp

#信頼区間

alpha=0.05

conf_und=pthi_MH^(1-qnorm(1-alpha/2)/sqrt(MH))

conf_upp=pthi_MH^(1+qnorm(1-alpha/2)/sqrt(MH))

#ワンステップ改良推定量とBreslow-days test

f=function(e){

return(e*(ns2-x.1+e)-pthi_MH*((ns1-e)*(x.1-e)))

}

#newton-raphson

vec=apply(cbind(x.1,ns1),1,min)/2

eta=10^(-3)

ite=10^4

h=0.01

for(l in 1:ite){

f_val=f(vec)

df_val=(f(vec+h)-f(vec))/h

vec=vec-eta*f_val/df_val

print(f_val)

}

V=(1/vec+1/(ns1-vec)+1/(x.1-vec)+1/(ns2-x.1+vec))^(-1)

XBD=sum((xs1-vec)^2/V)

YBD=sum((xs1-vec)^2/V)-(sum(xs1)-sum(vec))^2/sum(V)

# 第4章 rxk 表の統計解析

mat=matrix(c(2892,983,2625,679,570,134),ncol=3)

n.=apply(mat,1,sum);x.=apply(mat,2,sum)

r=2;k=3;N=sum(n.)

eij=t(t(n.))%*%t(x.)/N

V=-t(t(x.[-1]))%*%t(x.[-1])

diag(V)=diag(V)+c(x.[-1]*N)

V=prod(n.)*V/((N-1)*N^2)

t=(N-1)*sum((mat-eij)^2/eij)/N

qchisq(1-0.05,df=(r-1)*(k-1))

#一方のカテゴリに順序があるrxk表の検定

mat=matrix(c(105,13,72,56,26,32),ncol=3)

n.=apply(mat,1,sum);x.=apply(mat,2,sum)

r=2;k=3;N=sum(n.)

eij=t(t(n.))%*%t(x.)/N

c_vec=c(0,1,2)

ei=eij%*%c_vec

c_bar=sum(c_vec*x./N)

S2=(N-1)*sum(((mat-eij)%*%c_vec)^2/n.)/sum(x.*(c_vec-c_bar)^2)

qchisq(1-0.05,df=r-1)

#ウィルコクスンスコアの方法

mat=matrix(c(32,38,27,21,42,40,42,76,82),ncol=3)

n.=apply(mat,1,sum);x.=apply(mat,2,sum)

r=nrow(mat);k=ncol(mat);c_vec=c()

for(j in 1:k){

if(j==1){

c_vec=c(c_vec,(1+x.[1])/2)

}else{

c_vec=c(c_vec,sum(x.[1:(j-1)])+(1+x.[j])/2)

}

}

N=sum(n.)

eij=t(t(n.))%*%t(x.)/N

ei=eij%*%c_vec

c_bar=sum(c_vec*x./N)

S2=(N-1)*sum(((mat-eij)%*%c_vec)^2/n.)/sum(x.*(c_vec-c_bar)^2)

qchisq(1-0.05,df=r-1)

# 4.6 用量・反応の検定 p.102

mat=matrix(c(4,11,3,30,2,33),ncol=3)

x=mat[1,]

x.=apply(mat,1,sum)[1]

n=apply(mat,2,sum);N=sum(n)

d=c(0,1,2)

res_fun=function(a,b,s){

if(s==1){

return(1-1/(1+exp(-(a+b*d))))

}

if(s==2){

return(pnorm(a+b*d))

}

if(s==3){

return(sin(a+b*d)^2)

}

}

loglik_fun=function(f){

return(sum(x*log(ifelse(f>0,f,1))+(n-x)*log(1-ifelse(f<1,f,0))))

}


ite=1000

eta=0.01

alpha=1;beta=1

h=0.01

k=1

for(l in 1:ite){

dp=c((loglik_fun(res_fun(alpha+h,beta,k))-loglik_fun(res_fun(alpha,beta,k)))/h,(loglik_fun(res_fun(alpha,beta+h,k))-loglik_fun(res_fun(alpha,beta,k)))/h)

ddp=array(0,dim=c(2,2))

ddp[1,1]=(loglik_fun(res_fun(alpha+2*h,beta,k))-2*loglik_fun(res_fun(alpha+h,beta,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)

ddp[1,2]=ddp[2,1]=(loglik_fun(res_fun(alpha+h,beta+h,k))-loglik_fun(res_fun(alpha+h,beta,k))-loglik_fun(res_fun(alpha,beta+h,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)

ddp[2,2]=(loglik_fun(res_fun(alpha,beta+2*h,k))-2*loglik_fun(res_fun(alpha,beta+h,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)

param=c(alpha,beta)-eta*solve(ddp)%*%dp

alpha=param[1];beta=param[2]

print(loglik_fun(res_fun(alpha,beta,k)))

}

#cochran-armitage test

p_hat=sum(x)/N

S=(sum(x*d)-sum(n*d)*p_hat)^2/(p_hat*(1-p_hat)*(sum(n*d^2)-sum(n*d)^2/N))

qchisq(1-0.05,df=1)

#p_value

2*(1-pnorm(sqrt(S)))

#直接法(標本数がだいたい全体で50程度で反応率が0.1%~0.2%のとき)

k=length(d)

t0=sum(x*d)

e=p_hat*sum(n*d)

m=abs(t0-e)

library(gtools)

combn=permutations(n=length(c(0:sum(x))),r=k,v=c(0:sum(x)),repeats.allowed = F)

combn=t(combn)

sums=apply(combn,2,sum)

num=c(1:ncol(combn))

num=num[sums==sum(x)]

combn=combn[,num]

value=t(combn)%*%d

num=c(1:ncol(combn))

num_up=num[value>=e+m]

combn_up=combn[,num_up]

num_und=num[value<=e-m]

combn_und=combn[,num_und]

prob=function(X){

val=1

for(i in 1:length(n)){

val=val*choose(n[i],X[i])

}

return(val/choose(N,sum(X)))

}

p_up=0;p_und=0

for(i in 1:ncol(combn_up)){

p_up=p_up+prob(combn_up[,i])

}

for(i in 1:ncol(combn_und)){

p_und=p_und+prob(combn_und[,i])

}

p_up+p_und