離散多変量データの解析 柳川尭先生 共立出版
#p.52
#データ
mat=matrix(c(230,73,65,32),ncol=2)
#期待値
E=matrix(c(224,80,72,24),ncol=2)
#適合度
X2=sum((mat-E)^2/E)
#p.49
#超過リスクを利用する場合
P1=0.008
f=0.015
P2=P1+(1-P1)*f
alpha=0.05;beta=0.1
n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)
#オッズ比を利用する場合
pthi=2
P1=0.008
P2=pthi*P1/(pthi*P1+(1-P1))
n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)
#事例対象研究
pthi=2
#P1:P(OC服用|非CHD)
P1=0.30
#P2:P(OC服用|CHD)
P2=pthi*P1/(pthi*P1+(1-P1))
n=(qnorm(1-alpha/2)+qnorm(1-beta))^2/(2*(asin(sqrt(P1))-asin(sqrt(P2)))^2)
#クロス分類研究の場合
#オッズ比の推定
a=mat[1,1];b=mat[2,1];c=mat[1,2];d=mat[2,2]
pthi=a*d/(b*c)
V=1/a+1/b+1/c+1/d
#信頼区間
alpha=0.05
und=log(pthi)-qnorm(1-alpha/2)*sqrt(V)
up=log(pthi)+qnorm(1-alpha/2)*sqrt(V)
#標本数が非常に小さいとき、および確率に偏りがあるとき
pthi_c=(a+0.5)*(d+0.5)/((c+0.5)*(b+0.5))
V_c=1/(a+0.5)+1/(b+0.5)+1/(c+0.5)+1/(d+0.5)
#直接法
n.1=apply(mat,2,sum)[1]
n.2=apply(mat,2,sum)[2]
n1.=apply(mat,1,sum)[1]
n2.=apply(mat,1,sum)[2]
#newton-method
f=function(p){
value=c()
for(i in 0:(n.1)){
value=c(value,choose(n.1,i)*choose(n.2,n1.-i)*p^(i))
}
Q=sum(value)
return(sum(c(0:n.1)*value)/Q-a)
}
ite=1000
eta=0.01
pthi=1
h=0.001
for(l in 1:ite){
f_val=f(pthi)
df_val=(f(pthi+h)-f(pthi))/h
pthi=pthi-eta*f_val/df_val
print(f_val)
}
#信頼区間の推定
alpha=0.05
#関数定義f
f=function(p,sig){
value=c()
for(i in 0:(n.1)){
value=c(value,choose(n.1,i)*choose(n.2,n1.-i)*p^(i))
}
Q=sum(value)
val1=sum(value[(a+1):(n.1+1)])/Q-alpha/2
val2=sum(value[1:(a+1)])/Q-alpha/2
return(ifelse(sig==1,val1,val2))
}
#信頼区間(下側)
ite=1000
eta=0.01
pthi=1
h=0.001
for(l in 1:ite){
f_val=f(pthi,1)
df_val=(f(pthi+h,1)-f(pthi,1))/h
pthi=pthi-eta*f_val/df_val
print(f_val)
}
pthi_und=pthi
#信頼区間(上側)
ite=1000
eta=0.01
pthi=1
h=0.001
for(l in 1:ite){
f_val=f(pthi,0)
df_val=(f(pthi+h,0)-f(pthi,0))/h
pthi=pthi-eta*f_val/df_val
print(f_val)
}
pthi_up=pthi
#p.66 MH test
#pthi=const
mat1=matrix(c(1,10,4,68),ncol=2)
mat2=matrix(c(3,5,5,22),ncol=2)
mat3=matrix(c(6,5,25,37),ncol=2)
mat4=matrix(c(8,7,17,23),ncol=2)
mat5=matrix(c(1,1,23,20),ncol=2)
mat6=matrix(c(1,0,7,5),ncol=2)
mat=list(mat1,mat2,mat3,mat4,mat5,mat6)
L=length(mat)
#勾配上昇法
pthi=1;sita=rep(1,L)
ite=10000
eta=0.001
ns1=c();ns2=c();xs1=c();xs2=c();Nl=c();pthi_MH1=c();pthi_MH2=c()
for(i in 1:L){
matrix=matrix(unlist(mat[i]),ncol=2)
ns1=c(ns1,apply(matrix,1,sum)[1])
ns2=c(ns2,apply(matrix,1,sum)[2])
xs1=c(xs1,matrix[1,1])
xs2=c(xs2,matrix[2,1])
Nl=c(Nl,sum(matrix))
pthi_MH1=c(pthi_MH1,matrix[1,1]*matrix[2,2]/sum(matrix))
pthi_MH2=c(pthi_MH2,matrix[1,2]*matrix[2,1]/sum(matrix))
}
x.1=xs1+xs2;x.2=Nl-x.1
for(l in 1:ite){
dpthi=-sum(ns1*exp(sita)/(1+pthi*exp(sita)))+sum(xs1)/pthi
dsitas=c()
for(j in 1:L){
dsita=-ns1[j]*pthi*exp(sita[j])/(1+pthi*exp(sita[j]))-ns2[j]*exp(sita[j])/(1+exp(sita[j]))+x.1[j]
dsitas=c(dsitas,dsita)
}
pthi=pthi+eta*dpthi;sita=sita+eta*dsitas
loglik=sum(-ns1*log(1+pthi*exp(sita)))-sum(ns2*log(1+exp(sita)))+log(pthi)*sum(xs1)+sum(sita*x.1)
print(loglik)
}
#MH statics
MH=(abs(sum(xs1)-sum(ns1*x.1/Nl)))^2/sum(ns1*ns2*x.1*x.2/(Nl*Nl*(Nl-1)))
#近似法
pthi_MH=sum(pthi_MH1)/sum(pthi_MH2)
#MH 条件
und=sum(ifelse(x.1-ns2>0,x.1-ns2,0))+5
value=sum(ns1*x.1/Nl)
upp=sum(apply(cbind(ns1,x.1),1,min))-5
und<value&value<upp
#信頼区間
alpha=0.05
conf_und=pthi_MH^(1-qnorm(1-alpha/2)/sqrt(MH))
conf_upp=pthi_MH^(1+qnorm(1-alpha/2)/sqrt(MH))
#ワンステップ改良推定量とBreslow-days test
f=function(e){
return(e*(ns2-x.1+e)-pthi_MH*((ns1-e)*(x.1-e)))
}
#newton-raphson
vec=apply(cbind(x.1,ns1),1,min)/2
eta=10^(-3)
ite=10^4
h=0.01
for(l in 1:ite){
f_val=f(vec)
df_val=(f(vec+h)-f(vec))/h
vec=vec-eta*f_val/df_val
print(f_val)
}
V=(1/vec+1/(ns1-vec)+1/(x.1-vec)+1/(ns2-x.1+vec))^(-1)
XBD=sum((xs1-vec)^2/V)
YBD=sum((xs1-vec)^2/V)-(sum(xs1)-sum(vec))^2/sum(V)
# 第4章 rxk 表の統計解析
mat=matrix(c(2892,983,2625,679,570,134),ncol=3)
n.=apply(mat,1,sum);x.=apply(mat,2,sum)
r=2;k=3;N=sum(n.)
eij=t(t(n.))%*%t(x.)/N
V=-t(t(x.[-1]))%*%t(x.[-1])
diag(V)=diag(V)+c(x.[-1]*N)
V=prod(n.)*V/((N-1)*N^2)
t=(N-1)*sum((mat-eij)^2/eij)/N
qchisq(1-0.05,df=(r-1)*(k-1))
#一方のカテゴリに順序があるrxk表の検定
mat=matrix(c(105,13,72,56,26,32),ncol=3)
n.=apply(mat,1,sum);x.=apply(mat,2,sum)
r=2;k=3;N=sum(n.)
eij=t(t(n.))%*%t(x.)/N
c_vec=c(0,1,2)
ei=eij%*%c_vec
c_bar=sum(c_vec*x./N)
S2=(N-1)*sum(((mat-eij)%*%c_vec)^2/n.)/sum(x.*(c_vec-c_bar)^2)
qchisq(1-0.05,df=r-1)
#ウィルコクスンスコアの方法
mat=matrix(c(32,38,27,21,42,40,42,76,82),ncol=3)
n.=apply(mat,1,sum);x.=apply(mat,2,sum)
r=nrow(mat);k=ncol(mat);c_vec=c()
for(j in 1:k){
if(j==1){
c_vec=c(c_vec,(1+x.[1])/2)
}else{
c_vec=c(c_vec,sum(x.[1:(j-1)])+(1+x.[j])/2)
}
}
N=sum(n.)
eij=t(t(n.))%*%t(x.)/N
ei=eij%*%c_vec
c_bar=sum(c_vec*x./N)
S2=(N-1)*sum(((mat-eij)%*%c_vec)^2/n.)/sum(x.*(c_vec-c_bar)^2)
qchisq(1-0.05,df=r-1)
# 4.6 用量・反応の検定 p.102
mat=matrix(c(4,11,3,30,2,33),ncol=3)
x=mat[1,]
x.=apply(mat,1,sum)[1]
n=apply(mat,2,sum);N=sum(n)
d=c(0,1,2)
res_fun=function(a,b,s){
if(s==1){
return(1-1/(1+exp(-(a+b*d))))
}
if(s==2){
return(pnorm(a+b*d))
}
if(s==3){
return(sin(a+b*d)^2)
}
}
loglik_fun=function(f){
return(sum(x*log(ifelse(f>0,f,1))+(n-x)*log(1-ifelse(f<1,f,0))))
}
ite=1000
eta=0.01
alpha=1;beta=1
h=0.01
k=1
for(l in 1:ite){
dp=c((loglik_fun(res_fun(alpha+h,beta,k))-loglik_fun(res_fun(alpha,beta,k)))/h,(loglik_fun(res_fun(alpha,beta+h,k))-loglik_fun(res_fun(alpha,beta,k)))/h)
ddp=array(0,dim=c(2,2))
ddp[1,1]=(loglik_fun(res_fun(alpha+2*h,beta,k))-2*loglik_fun(res_fun(alpha+h,beta,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)
ddp[1,2]=ddp[2,1]=(loglik_fun(res_fun(alpha+h,beta+h,k))-loglik_fun(res_fun(alpha+h,beta,k))-loglik_fun(res_fun(alpha,beta+h,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)
ddp[2,2]=(loglik_fun(res_fun(alpha,beta+2*h,k))-2*loglik_fun(res_fun(alpha,beta+h,k))+loglik_fun(res_fun(alpha,beta,k)))/(h^2)
param=c(alpha,beta)-eta*solve(ddp)%*%dp
alpha=param[1];beta=param[2]
print(loglik_fun(res_fun(alpha,beta,k)))
}
#cochran-armitage test
p_hat=sum(x)/N
S=(sum(x*d)-sum(n*d)*p_hat)^2/(p_hat*(1-p_hat)*(sum(n*d^2)-sum(n*d)^2/N))
qchisq(1-0.05,df=1)
#p_value
2*(1-pnorm(sqrt(S)))
#直接法(標本数がだいたい全体で50程度で反応率が0.1%~0.2%のとき)
k=length(d)
t0=sum(x*d)
e=p_hat*sum(n*d)
m=abs(t0-e)
library(gtools)
combn=permutations(n=length(c(0:sum(x))),r=k,v=c(0:sum(x)),repeats.allowed = F)
combn=t(combn)
sums=apply(combn,2,sum)
num=c(1:ncol(combn))
num=num[sums==sum(x)]
combn=combn[,num]
value=t(combn)%*%d
num=c(1:ncol(combn))
num_up=num[value>=e+m]
combn_up=combn[,num_up]
num_und=num[value<=e-m]
combn_und=combn[,num_und]
prob=function(X){
val=1
for(i in 1:length(n)){
val=val*choose(n[i],X[i])
}
return(val/choose(N,sum(X)))
}
p_up=0;p_und=0
for(i in 1:ncol(combn_up)){
p_up=p_up+prob(combn_up[,i])
}
for(i in 1:ncol(combn_und)){
p_und=p_und+prob(combn_und[,i])
}
p_up+p_und
Author And Source
この問題について(離散多変量データの解析 柳川尭先生 共立出版), 我々は、より多くの情報をここで見つけました https://qiita.com/kozakai-ryouta/items/fcf5160649b8be4c5590著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .