多次元尺度解析法 --その有効性と問題点--




#多次元尺度解析法 --その有効性と問題点-- 林知己夫、飽戸弘著

#p.39 3.4 個人差の問題と非対称の問題

library(plm);library(dplyr)

data("EmplUK",package="plm")

data=EmplUK%>%filter(firm %in% c(1:4))%>%select(-c("year","sector"))

for(j in 2:ncol(data)){

data[,j]=(data[,j]-mean(data[,j]))/sd(data[,j])

}

K=4;S=4;w=array(1/K,dim=c(K,S))

summary=data%>%group_by(firm)%>%summarise(n=n())


ite=100

x=array(0,dim=c(summary$n[1],S))

for(l in 1:ite){

x_pre=x;w_pre=w

mat=array(0,dim=c(summary$n[1],S))

for(k in 1:K){

sub=data%>%filter(firm==k)%>%select(-firm)

sub=as.matrix(sub)

y=t(t(sub)*w[k,])

w[k,]=apply(y^2,2,sum)

mat=mat+y^2

}

x=sqrt(mat)

w=t(t(w)/apply(mat,2,sum))

print(sum(abs(x-x_pre))+sum(abs(w-w_pre)))

}





#多次元尺度解析法 --その有効性と問題点-- 林知己夫、飽戸弘著

#p.77 kruscal method

m=3

mat=matrix(0,nrow=10,ncol=10)

mat[1,]=c(0,8.89,10.16,15.84,16.84,33.01,32.69,43.47,37.78,39.31)
mat[2,]=c(0,0,9.64,15.37,20.45,30.75,34.09,41.61,39.26,41.47)
mat[3,]=c(0,0,0,6.12,11.2,22.98,24.56,33.61,29.75,31.86)
mat[4,]=c(0,0,0,0,8.48,17.20,18.75,27.64,23.91,26.23)
mat[5,]=c(0,0,0,0,0,20.77,16.25,29.52,21.17,22.49)
mat[6,]=c(0,0,0,0,0,0,13.01,10.86,15.61,19.06)
mat[7,]=c(0,0,0,0,0,0,0,15.99,5.19,7.69)
mat[8,]=c(rep(0,8),14.84,17.68)
mat[9,]=c(rep(0,9),3.45)

mat=mat+t(mat)

d=mat

n=nrow(d)

XX=array(0,dim=c(n,m))

for(j in 1:ncol(XX)){

XX[,j]=rpois(n,lambda=sample(1000,1))  

}

d_hat=array(1,dim=c(n,n))

X_hat=XX

ite=10^6

for(s in 1:ite){

A=sum((d-d_hat)^2)

d_bar=sum(d_hat)/(nrow(d_hat)*(nrow(d_hat)-1))

B=sum((d-d_bar)^2)

eta=sqrt(A/B)

d_eta=2*eta

det=function(i,a){

z=0

num=c(1:n);num=num[num!=i]

for(k in num){

z=z+((d[i,k]-d_hat[i,k])/A-(d[i,k]-d_bar)/B)*(X_hat[i,a]-X_hat[k,a])/d[i,k]  

}

return(2*eta*z)

}

X_hat_sub=X_hat

deta_mat=array(0,dim=dim(X_hat))

for(i in 1:nrow(X_hat)){
for(a in 1:ncol(X_hat)){  

X_hat_sub[i,a]=X_hat[i,a]+det(i,a) 

deta_mat[i,a]=det(i,a)

}
}

X_hat=X_hat_sub

d_hat=array(0,dim=c(n,n))

for(j in 1:n){
for(i in 1:n){  

d_hat[i,j]=sqrt(sum(abs(X_hat[i,]-X_hat[j,])^2) )  

}
}

print(eta)

}




#MDA-UO

data=array("0",dim=c(9,9))

data[1,1:9]=c("A","AB","G","DA","A","G","G","A","A")
data[2,2:9]=c("B","BC","G","B","B","G","G","B")
data[3,3:9]=c("C","CD","G","C","C","G","C")
data[4,4:9]=c("D","G","G","D","D","D")
data[5,5:9]=c("AB","B","G","A","AB")
data[6,6:9]=c("BC","C","G","BC")
data[7,7:9]=c("CD","D","CD")
data[8,8:9]=c("DA","DA")
data[9,9:9]=c("G")

for(j in 1:8){

data[(j+1),1:j]=t(data)[(j+1),1:j]  

}

alphabets=unique(as.character(c(data)))

n=c()

for(j in 1:length(alphabets)){

n=c(n,length(data[data==alphabets[j]]))  

}

n_vec=data.frame(alphabets=alphabets,n_g=n)

N=sqrt(sum(n))

H=array(0,dim=c(length(alphabets),length(alphabets)))

for(i in 1:nrow(H)){
for(j in 1:ncol(H)){  

vec1=as.character(data[i,])

vec2=as.character(data[j,])

value=0

for(l in 1:length(alphabets)){

value=value+(1/N)*(length(vec1[vec1==alphabets[l]])*length(vec2[vec2==alphabets[l]]))/n_vec$n_g[n_vec$alphabets==alphabets[l]]  

}

H[i,j]=value   

}
}

eta2=eigen(H)$values

eta2=eta2[2:3]

X=eigen(H)$vectors[,2:3]

apply(X,2,mean)

apply(X,2,sd)

plot(X[,1],X[,2])

n_vec=n_vec %>% mutate(x_g_bar1=0,x_g_bar2=0)

x1=X[,1];x2=X[,2]

for(l in 1:length(alphabets)){

x_bar_g=c();

for(i in 1:9){

vec=data[i,]

x_bar_g=c(x_bar_g,length(vec[vec==alphabets[l]])) 


}

n_vec$x_g_bar1[n_vec$alphabets==alphabets[l]]=sum(x1*x_bar_g)/n_vec$n_g[n_vec$alphabets==alphabets[l]]

n_vec$x_g_bar2[n_vec$alphabets==alphabets[l]]=sum(x2*x_bar_g)/n_vec$n_g[n_vec$alphabets==alphabets[l]]

}

sigma_b1=sum(n_vec$n_g*n_vec$x_g_bar1^2)/sum(n)

sigma_b2=sum(n_vec$n_g*n_vec$x_g_bar2^2)/sum(n)

sigma1=sum(x1^2)/sqrt(sum(n))-mean(x1)^2

sigma2=sum(x2^2)/sqrt(sum(n))-mean(x2)^2

sigma_b1/sigma1

sigma_b2/sigma2