数量化理論とデータ処理 駒沢勉先生 朝倉書店
#数量化1類 p.10~
library(dummies)
income=c(1,1,2,3,1,2,2,1,3,1,3,2,2,1,3,2,1,2,3,2)
income=dummy(income)
colnames(income)=c("low","normal","high")
job=c(1,3,1,4,4,3,1,3,2,1,1,3,1,3,2,4,2,1,1,2)
job=dummy(job)
colnames(job)=c("office_worker","management","labor","others")
new_or_not=c(1,2,1,2,1,2,1,1,2,2,1,1,2,2,1,1,2,2,1,1)
new_or_not=dummy(new_or_not)
colnames(new_or_not)=c("new","not")
ad=c(2,1,1,2,1,2,1,2,2,1,1,2,2,2,1,1,1,1,2,1)
ad=dummy(ad)
colnames(ad)=c("look","not")
mass=c(1,2,1,2,1,2,1,2,3,2,3,1,2,1,3,3,1,3,1,2)
mass=dummy(mass)
colnames(mass)=c("news_paper","TV","others")
price=c(2,1,5,6,3,2,6,3,5,2,01,4,3,1,9,10,3,4,6,6)
pocket=c(10,8,18,30,15,12,22,12,25,8,20,10,14,6,28,8,10,25,28,28)
Y=cbind(price,pocket)
X=cbind(income,job[,-4],ad[,-1],mass[,-3])
A=t(X)%*%X
beta=solve(A)%*%t(X)%*%Y
Y_hat=X%*%beta
cor=diag(cor(Y,Y_hat))
#t-testによる特徴量の確認 p.36
X=cbind(income,job,ad,mass)
x_ave=(t(X)%*%Y)*c(1/apply(X,2,sum))
A=array(0,dim=c(ncol(X),2))
n=apply(X,2,sum)
for(j in 1:ncol(X)){
A[j,]=(apply(t(Y*X[,j])^2,1,sum)-n[j]*x_ave[j,]^2)
}
t_test=function(X_ave,A2,N){
mat=array(0,dim=c(length(X_ave),length(X_ave)))
pthi=mat;no=mat;qts=mat
for(i in 1:dim(mat)[1]){
for(j in 1:dim(mat)[2]){
mat[i,j]=abs(X_ave[i]-X_ave[j])/sqrt(A2[i]/(N[i]*(N[i]-1))+A2[j]/(N[j]*(N[j]-1)))
C=(A2[i]/(N[i]*(N[i]-1)))/(A2[i]/(N[i]*(N[i]-1))+A2[j]/(N[j]*(N[j]-1)))
pthi[i,j]=C^2/(N[i]-1)+(1-C)^2/(N[j]-1)
no[i,j]=paste0("(",i,",",j,")")
qts[i,j]=qt(1-0.05/2,1/(C^2/(N[i]-1)+(1-C)^2/(N[j]-1)))
}
}
pthi=1/pthi
res=cbind(c(no),c(mat),c(pthi),c(qts))
colnames(res)=c("elements","t","dim","qt_0.05/2")
return(res)
}
t_test(x_ave[4:7,2],A[4:7,2],n[4:7])
#ミニマックス的中率
library(dplyr)
data=data.frame(group1=c("G1","G1","G1","G2","G2"),group2=c("G11","G12","G13","G21","G22"),C11=c(4,3,0,4,3),C12=c(3,3,2,5,3),C13=c(1,1,3,2,3),C21=c(4,2,2,5,3),C22=c(1,1,2,3,1),C23=c(2,3,0,1,4),C24=c(1,1,1,2,1),C31=c(6,2,3,7,4),C32=c(2,5,2,4,5))
n_G1=c(8,7,5);n_G2=c(11,9);N=sum(n_G1,n_G2)
pis=c(sum(n_G1)/N,sum(n_G2)/N)
library(stringr)
mu1=data %>% filter(group1=="G1");mu1=mu1[,str_count(colnames(mu1),"group")==0]
f1=mu1
for(j in 1:3){
no=paste0("C",j)
f1=f1[,!(colnames(f1) %in% paste0(no,"1"))]
}
no_s=apply(mu1,2,sum)
sigma1=sd(as.matrix(mu1))
mu1=mean(as.matrix(mu1))
mu2=data %>% filter(group1=="G2");mu2=mu2[,str_count(colnames(mu2),"group")==0]
f2=mu2
for(j in 1:3){
no=paste0("C",j)
f2=f2[,!(colnames(f2) %in% paste0(no,"1"))]
}
sigma2=sd(as.matrix(mu2))
mu2=mean(as.matrix(mu2))
sigma_t=sd(as.matrix(data[,str_count(colnames(data),"group")==0]))
alpha_dash=abs(mu1-mu2)/(sigma1+sigma2)
p_star=pnorm(alpha_dash)-pnorm(0)+0.5
sigma_B=sqrt(prod(pis)*(mu2-mu1)^2)
eta2=(sigma_B^2)/(sigma_t^2)
#alpha_dash
(sigma_t/(sigma1+sigma2))*sqrt(eta2/prod(pis))
data2=data.frame(group1=c("X1","X1","X1","X2","X2","X2","X2","X3","X3"),group2=c("C11","C12","C13","C21","C22","C23","C24","C31","C32"),C11=c(7,0,0,2,1,3,1,3,4),C12=c(0,8,0,4,1,2,1,5,3),C13=c(0,0,5,2,2,0,1,3,2),C21=c(2,4,2,8,0,0,0,5,3),C22=c(1,1,2,0,4,0,0,2,2),C23=c(3,2,0,0,0,5,0,2,3),C24=c(1,1,1,0,0,0,3,2,1),C31=c(3,5,3,5,2,2,2,11,0),C32=c(4,3,2,3,2,3,1,0,9))
H=as.matrix(data2[,str_count(colnames(data2),"group")==0])
no_s_data=data.frame(num=c(1:length(no_s)),no_s=names(no_s))
no_num=c()
for(j in 1:nrow(no_s_data)){
no=no_s_data$no_s[j]
if(substring(no,3)=="1"){
no_num=c(no_num,j)
}
}
H=H[-no_num,-no_num]
A1=array(0,dim=c(ncol(f1),ncol(f1)))
B1=array(0,dim=c(ncol(f1),ncol(f1)))
for(i in 1:ncol(f1)){
p_i=c(f1[,i]/n_G1,-diag(H)[i]/sum(n_G1))
a_vec=c();b_vec=c()
for(j in 1:ncol(f1)){
q_j=c(f1[,j],diag(H)[j])
a=sum(p_i*q_j);b=H[i,j]-diag(H)[i]*diag(H)[j]/sum(n_G1)
a_vec=c(a_vec,a);b_vec=c(b_vec,b)
}
A1[i,]=a_vec
B1[i,]=b_vec
}
P=c(1/n_G2);P[length(P)]=P[length(P)]*(-1)
d=apply(f2*P,2,sum)
solve(B1,d)
#3.7 的中率pを求める数量化 p.54~p.68
#mu2>=mu1
n1=80;n2=100;n=n1+n2
x1=rnorm(n1,-1,2)
x2=rnorm(n2,8,3)
mu1=mean(x1);mu2=mean(x2)
sd1=sd(x1);sd2=sd(x2);sd_t=sd(c(x1,x2))
f=function(a){
return(pnorm(a,mu1,sd1)-(1-pnorm(a,mu2,sd2)))
}
ite=100
eta=0.1
x=1
h=0.01
for(l in 1:ite){
df=(f(x+h)-f(x))/h
x=x-eta*f(x)/df
print(f(x))
}
x0=(sd2*mu1+sd1*mu2)/(sd1+sd2)
#ミニマックス的中率
x0_seiki=abs(mu1-mu2)/(sd1+sd2)
p=pnorm(abs(mu1-mu2)/(sd1+sd2))
alpha1=(mu2-mu1)/(sd1+sd2)
alpha2=(mu1-mu2)/(sd1+sd2)
pi1=n1/n;pi2=n2/n
#sd_Bは分離度を表す。
sd_B=sqrt(pi1*pi2*(mu1-mu2)^2)
eta2=sd_B^2/(sd_t^2)
plot(c(1:length(x1)),x1,xlim=c(1,max(c(n1,n2))),ylim=c(min(c(x1,x2)),max(c(x1,x2))),xlab="index",ylab="values",col=2)
par(new=T)
plot(c(1:length(x2)),x2,xlim=c(1,max(c(n1,n2))),ylim=c(min(c(x1,x2)),max(c(x1,x2))),xlab="index",ylab="values",col=3,main=paste0("ミニマックス的中率:",round(p,3),",","相関比:",round(eta2,3)))
#数量化理論とデータ処理 p.71 グラフプロットでの確認
library(dplyr)
L=9
p=sample(1:9,L);p=p/sum(p)
q=sample(1:9,L);q=q/sum(q)
c_vec=seq(1,10,0.0001)
plot_data=data.frame(c=c_vec,tau=0,uv=0)
for(l in 1:length(c_vec)){
c=c_vec[l]
u=((sum((p+q)*p/(p-q*c))*(1+c)/2)/(1-sum((p-q*c)*p/(p+q*c))))
if(u<0){
#u=sqrt(u)
plot_data$tau[l]=NaN
plot_data$uv[l]=NaN
}else{
u=sqrt(u)
x=((p-q)*(1+c)/(2*u)+u*(p-q*c))/(p+q*c)
x=x-mean(x)
sig_p=sum(p*(x-u)^2)
sig_q=sum(q*(x-(-u))^2)
tau=sig_p/sig_q
lam=2*u/(1+c)
mu=2*u*c/(1+c)
plot_data$tau[l]=tau
plot_data$uv[l]=2*u
}
#c=mu/lam
#print(tau)
}
plot_data=plot_data%>%filter(is.nan(tau)!=T)
plot(plot_data$c,plot_data$tau,col=2,xlab="c",ylab="tau",main="")
c=plot_data$c[abs(plot_data$tau-1)==min(abs(plot_data$tau-1))]
u=sqrt((sum((p+q)*p/(p-q*c))*(1+c)/2)/(1-sum((p-q*c)*p/(p+q*c))))
x=((p-q)*(1+c)/(2*u)+u*(p-q*c))/(p+q*c)
x=x-mean(x)
sig_p=sum(p*(x-u)^2)
sig_q=sum(q*(x-(-u))^2)
tau=sig_p/sig_q
lam=2*u/(1+c)
mu=2*u*c/(1+c)
mean(x)
#数量化理論とデータ処理 準ニュートン法
library(dplyr)
L=5
p=sample(1:9,L);p=p/sum(p)
q=sample(1:9,L);q=q/sum(q)
#Broyden Quasi-Newton Algorithm p.213
f=function(x){
lambda=x[1];mus=x[2];x=x[-c(1:2)]
c=mus/lambda
f1=sum(p*(x-sum(p*x))^2)-1
f2=sum(q*(x-sum(q*x))^2)-1
U=(lambda+mus)/2
f3=(p-q)-lambda*(p*x-p*U)-mus*(q*x-q*(-U))
return(c(f1,f2,f3))
}
X=rep(1,L+2)
ite=10^5
eta=0.001
H=diag(f(X))
for(l in 1:ite){
if(sum(abs(f(X)))>10^(-9)){
X_pre=X
X=X-eta*H%*%f(X)
#X[-c(1:2)]=X[-c(1:2)]-mean(X[-c(1:2)])
s=X-X_pre
y=f(X)-f(X_pre)
H=H+((s-H%*%y)/as.numeric(t(s)%*%H%*%y))%*%t(s)%*%H
#print(paste0(sum(X[1:2]),",",sum(abs(f(X)))))
#print(sum(abs(f(X))))
}
}
#数量化3類 p.90~
data=data.frame(パターン=1:13,個数=c(1,3,2,5,3,2,4,3,1,1,3,1,1),無口である=c(0,0,0,0,0,0,0,1,0,1,0,0,0),生真面目である=c(1,0,0,0,1,0,0,0,0,1,0,0,0),空想を好む=c(1,1,0,0,0,0,1,0,0,0,1,0,1),活発である=c(1,0,0,1,0,0,1,0,0,0,1,1,1),冗談をいう=c(1,1,0,1,0,1,1,0,1,0,0,0,0),きれい好きである=c(0,0,1,0,1,0,0,1,1,0,0,0,0),粘り強い=c(0,0,0,0,1,0,0,1,1,1,0,1,1))
delta=as.matrix(data[,-c(1:2)])
t=sum(delta)
n=nrow(delta);m=ncol(delta)
c=apply(delta,2,sum)
d=apply(delta,1,sum)
C=diag(c);D=diag(d)
X=t(delta)
A=solve(sqrt(C))%*%X%*%solve(D)%*%t(X)%*%solve(sqrt(C))
lam=eigen(A)$values
z=eigen(A)$vectors
#カテゴリウエイト
u=solve(sqrt(C))%*%z[,2]*sqrt(t)
#個体ウエイト
v=lam[2]*sqrt(t)*solve(D)%*%t(X)%*%solve(sqrt(C))%*%z[,2]
#カテゴリ得点
y=X%*%v
#個体得点
w=t(X)%*%u
#寄与率
lam[-1]/sum(lam[-1])
Author And Source
この問題について(数量化理論とデータ処理 駒沢勉先生 朝倉書店), 我々は、より多くの情報をここで見つけました https://qiita.com/kozakai-ryouta/items/d8af603638fe7b7ae9aa著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .