質的情報の多変量解析 松田紀之先生 朝倉出版



#p.66 質的情報の多変量解析

library(dplyr)

data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))

Y=round(data$y);X=as.matrix(cbind(data$x1,data$x2));X=cbind(rep(1,nrow(X)),X)

n=Y

ite=10^5

eta=0.001

params=rep(0.1,ncol(X))

error=c()

log_lik_pre=-10^7

log_lik=-100

log_lik_vec=c()

for(l in 1:ite){

if(abs(log_lik-log_lik_pre)>0.01){

log_lik_pre=log_lik

dL=c()

for(j in 1:ncol(X)){

dL=c(dL,sum(n*X[,j])-sum(exp(X%*%params)*X[,j]))

}

dLL=array(0,dim=c(ncol(X),ncol(X)))


for(i in 1:ncol(X)){
for(j in 1:ncol(X)){

dLL[i,j]=-sum(exp(X%*%params)*X[,i]*X[,j])

}}

params=params-eta*(svd(dLL)$u%*%diag(1/svd(dLL)$d)%*%t(svd(dLL)$v)%*%dL)

log_lik=sum((X%*%params)*n-exp(X%*%params))

print(log_lik)

log_lik_vec=c(log_lik_vec,log_lik)

}

}  


plot(log_lik_vec)



#p.149

library(dplyr)

mat=matrix(c(50,28,11,14,0.5,45,174,78,150,42,8,84,110,185,72,18,154,223,714,320,8,55,96,447,411),ncol=5)

a=nrow(mat);b=ncol(mat)



log_lik=function(p,u_ab){

u=p[1];p=p[-1];u_a=p[1:a];p=p[-c(1:a)];u_b=p[1:b];p=p[-c(1:b)];

m_mat=array(0,dim=c(a,b))

n_mat=0

for(i in 1:a){
for(j in 1:b){

m_mat[i,j]=u+u_a[i]+u_b[j]+u_ab[i,j]

n_mat=n_mat+sum(log(c(1:mat[i,j])))

}}

val=sum(mat*log(m_mat)-m_mat)-n_mat

return(val)

}

param=rep(0.1,a+b+1)

uab=array(0.1,dim=c(a,b))


#iteration

ite=100000

eta=0.01

h=0.01

for(l in 1:ite){

vec=param

for(j in 1:length(param)){

vec_sub=vec;vec_sub[j]=vec_sub[j]+h

param[j]=param[j]+eta*(log_lik(vec_sub,uab)-log_lik(vec,uab))/h

}

vec=uab

for(i in 1:a){
for(j in 1:b){

vec_sub=vec;vec_sub[i,j]=vec_sub[i,j]+h

uab[i,j]=uab[i,j]+eta*(log_lik(param,vec_sub)-log_lik(param,vec))/h

}}

print(log_lik(param,uab))

}







#p.173



mat=matrix(c(118,58,20,16,74,34,14,20,20,14,30,80,18,22,66,120),ncol=4)

a=nrow(mat);b=ncol(mat);t=2

pij=mat/sum(mat)

pi_A_B1=pij

mat=matrix(c(18,78,20,36,74,84,14,20,50,14,30,80,18,22,66,120),ncol=4)

a=nrow(mat);b=ncol(mat);t=2

pij=mat/sum(mat)

pi_A_B2=pij


pi_x=sample(1:9,t,replace=T);pi_x=pi_x/sum(pi_x)

pi_A=matrix(sample(1:9,a*t,replace=T),nrow=a,ncol=t)

pi_A=t(t(pi_A)/apply(pi_A,2,sum))

pi_B=matrix(sample(1:9,b*t,replace =T),nrow=b,ncol=t)

pi_B=t(t(pi_B)/apply(pi_B,2,sum))



ite=210

for(l in 1:ite){

pi_A_B_sum=array(0,dim=c(a,b))

for(s in 1:t){

pi_Xt=pi_x[s]

pi_A_Xt=pi_A[,s]

pi_B_Xt=pi_B[,s]

pi_A_B_sum=pi_A_B_sum+pi_Xt*t(t(c(pi_A_Xt)))%*%t(c(pi_B_Xt))  

}

df_vec=c()  

dp_vec=c()

for(s in 1:t){ 

if(s==1){

pi_A_B=pi_A_B1

}else{

pi_A_B=pi_A_B2

}

pi_Xt=pi_x[s]

pi_A_Xt=pi_A[,s]

pi_B_Xt=pi_B[,s]

pi_Xt_pre=pi_Xt

pi_A_Xt_pre=pi_A_Xt

pi_B_Xt_pre=pi_B_Xt

pi_A_B_Xt=pi_Xt*t(t(c(pi_A_Xt)))%*%t(c(pi_B_Xt))

pi_Xt_A_B=pi_A_B_Xt/pi_A_B_sum

pi_Xt=sum(pi_A_B*pi_Xt_A_B)

pi_A_Xt=apply(pi_A_B*pi_Xt_A_B,1,sum)/pi_x[s]

pi_B_Xt=apply(pi_A_B*pi_Xt_A_B,2,sum)/pi_x[s]

df=sum(abs(pi_Xt-pi_Xt_pre))+sum(abs(pi_A_Xt-pi_A_Xt_pre))+sum(abs(pi_B_Xt-pi_B_Xt_pre))

dp=sum(abs(pi_A_B-pi_A_B_Xt))

pi_x[s]=pi_Xt

pi_A[,s]=pi_A_Xt

pi_B[,s]=pi_B_Xt

df_vec=c(df_vec,df)

dp_vec=c(dp_vec,dp)

}

#print(sum(df_vec))

print(sum(dp_vec))

}