スパース性に基づく機械学習 富岡亮太先生 講談社
#6.3 繰り返し重み付き縮小法 p.72
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y
eta=rep(1,ncol(X))
mu=10^(-8)
lam=1
w=rep(1,ncol(X))
ite=10^5
for(l in 1:ite){
w_pre=w
cost=100;cost_pre=0
while(abs(cost-cost_pre)>10^(-1))({
cost_pre=cost
w=w-mu*(-2*t(X)%*%(Y-X%*%w)+lam*w/eta)
cost=sum((Y-X%*%w)^2)+lam*sum(w^2/eta)/2
})
cost2=cost+sum(eta)*lam/2
eta=abs(w)
print(cost2)
}
#lasso 再急降下法 ISTA
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y
mu=10^(-8)
lam=1
w=rep(1,ncol(X))
ite=10^2
cost_vec=c()
for(l in 1:ite){
dw=w-mu*(-2*t(X)%*%(Y-X%*%w))
w=ifelse(abs(dw)<=lam*mu,0,ifelse(dw>lam*mu,dw-lam*mu,dw+lam*mu))
cost=sum((Y-X%*%w)^2)+lam*sum(abs(w))
cost_vec=c(cost_vec,cost)
print(cost)
}
cost_vec_ISTA=cost_vec
#lasso 再急降下法 FISTA
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y
mu=10^(-8)
lam=1
w=rep(1,ncol(X))
ite=10^2
eta=0
cost_vec=c()
for(l in 1:ite){
b_pre=w
dw=w-mu*(-2*t(X)%*%(Y-X%*%w))
b=ifelse(abs(dw)<=lam*mu,0,ifelse(dw>lam*mu,dw-lam*mu,dw+lam*mu))
eta_pre=eta
eta=(1+sqrt(1+4*eta^2))/2
w=b+((eta_pre-1)/eta)*(b-b_pre)
cost=sum((Y-X%*%w)^2)+lam*sum(abs(w))
print(cost)
cost_vec=c(cost_vec,cost)
}
cost_vec_FISTA=cost_vec
#costの減少具合の違い
plot(c(1:ite),cost_vec_ISTA,xlim=c(1,ite),ylim=c(min(c(cost_vec_ISTA,cost_vec_FISTA)),max(c(cost_vec_ISTA,cost_vec_FISTA))),col=2,xlab="index",ylab="cost")
par(new=T)
plot(c(1:ite),cost_vec_FISTA,xlim=c(1,ite),ylim=c(min(c(cost_vec_ISTA,cost_vec_FISTA)),max(c(cost_vec_ISTA,cost_vec_FISTA))),col=3,xlab="index",ylab="cost")
#カーネルを用いたlasso回帰(近似的にSVM lasso?)
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),sign=0)
Y=data$y*100
X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])
kernel=array(0,dim=c(nrow(X),nrow(X)))
for(j in 1:nrow(X)){
kernel[j,]=exp(-apply((t(X)-c(X[j,]))^2,2,sum)/2)
}
G=cbind(kernel)
K=t(G)
#lasso 再急降下法 ISTA
X=K
mu=10^(-2)
lam=1
w=rep(1,ncol(X))
ite=10^5
cost_vec=c()
for(l in 1:ite){
dw=w-mu*(-2*t(X)%*%(Y-X%*%w))
w=ifelse(abs(dw)<=lam*mu,0,ifelse(dw>lam*mu,dw-lam*mu,dw+lam*mu))
cost=sum((Y-X%*%w)^2)+lam*sum(abs(w))
cost_vec=c(cost_vec,cost)
print(cost)
}
cost_vec_ISTA=cost_vec
#L1 logistic(lasso)
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y;Y=ifelse(Y-mean(Y)>0,1,0)
lam=5
obs=function(v){
p=1/(1+exp(-X%*%v))
return(sum(-Y*log(ifelse(p>0,p,1))-(1-Y)*log(ifelse(1-p>0,1-p,1)))+lam*sum(abs(v)))
}
gamma=1/(max(svd(X)$d^2))
w=rep(0.01,ncol(X))
ite=10^5
for(l in 1:ite){
p=1/(1+exp(-X%*%w))
dw=w-gamma*t(X)%*%(p-Y)
thresh=gamma*lam
dw[dw >= thresh] = dw[dw >= thresh] - thresh
dw[dw <= -thresh] = dw[dw <= -thresh] + thresh
dw[abs(dw)<thresh]=0
w=dw
observation=obs(w)
print(observation)
}
#L2 logistic(ridge)
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y;Y=ifelse(Y-mean(Y)>0,1,0)
lam=1
obs=function(v){
p=1/(1+exp(-X%*%v))
return(sum(-Y*log(ifelse(p>0,p,1))-(1-Y)*log(ifelse(1-p>0,1-p,1)))+lam*sum(abs(v^2)))
}
gamma=1/(max(svd(X)$d^2))
w=rep(0.01,ncol(X))
ite=10^5
for(l in 1:ite){
p=1/(1+exp(-X%*%w))
dw=w-gamma*t(X)%*%(p-Y)
thresh=gamma*lam
dw=dw/(1+2*thresh)
w=dw
observation=obs(w)
#print(observation)
}
# logistic(ElasticNet)
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y;Y=ifelse(Y-mean(Y)>0,1,0)
lam1=1;lam2=1
obs=function(v){
p=1/(1+exp(-X%*%v))
return(sum(-Y*log(ifelse(p>0,p,1))-(1-Y)*log(ifelse(1-p>0,1-p,1)))+lam1*sum(abs(v))+lam2*sum(abs(v^2)))
}
gamma=1/(max(svd(X)$d^2))
w=rep(0.01,ncol(X))
ite=10^5
for(l in 1:ite){
p=1/(1+exp(-X%*%w))
dw=w-gamma*t(X)%*%(p-Y)
thresh=gamma*lam1
dw[dw >= thresh] = dw[dw >= thresh] - thresh
dw[dw <= -thresh] = dw[dw <= -thresh] + thresh
dw[abs(dw)<thresh]=0
thresh=gamma*lam2
dw=dw/(1+2*thresh)
w=dw
observation=obs(w)
print(observation)
}
#L2 logistic(ridge) Fisher-score method
data=data.frame(num=1:20,y=c(167,167.5,168.4,172,155.3,151.4,163,174,168,160.4,164.7,171,162.6,164.8,163.3,167.6,169.2,168,167.4,172),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56))
X=as.matrix(data[,colnames(data) %in% c("x1","x2")])
X=cbind(rep(1,nrow(X)),X)
Y=data$y;Y=ifelse(Y-mean(Y)>0,1,0)
n=length(Y);m=ncol(X)
lam=0.01;K=diag(rep(1,m))
obs=function(v){
p=1/(1+exp(-X%*%v))
return(sum(Y*log(ifelse(p>0,p,1))+(1-Y)*log(ifelse(1-p>0,1-p,1)))-lam*n*as.numeric(t(c(v))%*%K%*%t(t(c(v))))/2)
}
gamma=1/(max(svd(X)$d^2))
w=rep(0.01,ncol(X))
ite=10^5
for(l in 1:ite){
p=1/(1+exp(-X%*%w))
pi=diag(c(p))
lamda=diag(c(Y-pi))
w=solve(t(X)%*%pi%*%(diag(rep(1,n))-pi)%*%X+n*lam*K)%*%t(X)%*%pi%*%(diag(rep(1,n))-pi)%*%(X%*%w+solve(pi%*%(diag(rep(1,n))-pi))%*%(Y-p))
print(obs(w))
}
Author And Source
この問題について(スパース性に基づく機械学習 富岡亮太先生 講談社), 我々は、より多くの情報をここで見つけました https://qiita.com/kozakai-ryouta/items/0454ceed1bdccd79c92e著者帰属:元の著者の情報は、元のURLに含まれています。著作権は原作者に属する。
Content is automatically searched and collected through network algorithms . If there is a violation . Please contact us . We will adjust (correct author information ,or delete content ) as soon as possible .