denoising auto-encoders (SdA)
19128 ワード
ノイズ除去エンコーダの背後にある考えは簡単だ.隠蔽層ユニットによりロバスト性の良い特徴を発見させ、恒等関数の学習を阻止するために、損傷した入力をエンコーダから入力を再構成するように訓練した.ノイズ低減オートエンコーダはオートエンコーダのランダム版である.直感的に言えば、ノイズ低減自動エンコーダは、入力符号化(入力情報の保存)を試みることと、自動エンコーダ入力にランダムに適用される破損処理の影響を除去しようとすることとの2つのことを行う.後者は入力間の統計的相関のみで実現できる.ノイズ低減オートエンコーダは異なる角度から理解できる.
セルフエンコーダクラスをノイズ除去セルフエンコーダクラスに変換するには、入力にランダム破損(stochastic corruption)操作を追加する必要がある.
セルフエンコーダクラスをノイズ除去セルフエンコーダクラスに変換するには、入力にランダム破損(stochastic corruption)操作を追加する必要がある.
//dA.h
class dA {
public:
int N;
int n_visible;
int n_hidden;
double **W;
double *hbias;
double *vbias;
dA(int, int, int , double**, double*, double*);
~dA();
void get_corrupted_input(int*, int*, double);
void get_hidden_values(int*, double*);
void get_reconstructed_input(double*, double*);
void train(int*, double, double);
void reconstruct(int*, double*);
};
//dA.cpp
#include <iostream>
#include <math.h>
#include "dA.h"
using namespace std;
double uniform(double min, double max) {
return rand() / (RAND_MAX + 1.0) * (max - min) + min;
}
int binomial(int n, double p) {
if(p < 0 || p > 1) return 0;
int c = 0;
double r;
for(int i=0; i<n; i++) {
r = rand() / (RAND_MAX + 1.0);
if (r < p) c++;
}
return c;
}
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
N = size;
n_visible = n_v;
n_hidden = n_h;
if(w == NULL) {
W = new double*[n_hidden];
for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
double a = 1.0 / n_visible;
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}
if(hb == NULL) {
hbias = new double[n_hidden];
for(int i=0; i<n_hidden; i++) hbias[i] = 0;
} else {
hbias = hb;
}
if(vb == NULL) {
vbias = new double[n_visible];
for(int i=0; i<n_visible; i++) vbias[i] = 0;
} else {
vbias = vb;
}
}
dA::~dA() {
for(int i=0; i<n_hidden; i++) delete[] W[i];
delete[] W;
delete[] hbias;
delete[] vbias;
}
void dA::get_corrupted_input(int *x, int *tilde_x, double p) {
for(int i=0; i<n_visible; i++) {
if(x[i] == 0) {
tilde_x[i] = 0;
} else {
tilde_x[i] = binomial(1, p);
}
}
}
// Encode
void dA::get_hidden_values(int *x, double *y) {
for(int i=0; i<n_hidden; i++) {
y[i] = 0;
for(int j=0; j<n_visible; j++) {
y[i] += W[i][j] * x[j];
}
y[i] += hbias[i];
y[i] = sigmoid(y[i]);
}
}
// Decode
void dA::get_reconstructed_input(double *y, double *z) {
for(int i=0; i<n_visible; i++) {
z[i] = 0;
for(int j=0; j<n_hidden; j++) {
z[i] += W[j][i] * y[j];
}
z[i] += vbias[i];
z[i] = sigmoid(z[i]);
}
}
void dA::train(int *x, double lr, double corruption_level) {
int *tilde_x = new int[n_visible];
double *y = new double[n_hidden];
double *z = new double[n_visible];
double *L_vbias = new double[n_visible];
double *L_hbias = new double[n_hidden];
double p = 1 - corruption_level;
get_corrupted_input(x, tilde_x, p);
get_hidden_values(tilde_x, y);
get_reconstructed_input(y, z);
// vbias
for(int i=0; i<n_visible; i++) {
L_vbias[i] = x[i] - z[i];
vbias[i] += lr * L_vbias[i] / N;
}
// hbias
for(int i=0; i<n_hidden; i++) {
L_hbias[i] = 0;
for(int j=0; j<n_visible; j++) {
L_hbias[i] += W[i][j] * L_vbias[j];
}
L_hbias[i] *= y[i] * (1 - y[i]);
hbias[i] += lr * L_hbias[i] / N;
}
// W
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
}
}
delete[] L_hbias;
delete[] L_vbias;
delete[] z;
delete[] y;
delete[] tilde_x;
}
void dA::reconstruct(int *x, double *z) {
double *y = new double[n_hidden];
get_hidden_values(x, y);
get_reconstructed_input(y, z);
delete[] y;
}
void test_dA() {
srand(0);
double learning_rate = 0.1;
double corruption_level = 0.3;
int training_epochs = 100;
int train_N = 10;
int test_N = 2;
int n_visible = 20;
int n_hidden = 5;
// training data
int train_X[10][20] = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0}
};
// construct dA
dA da(train_N, n_visible, n_hidden, NULL, NULL, NULL);
// train
for(int epoch=0; epoch<training_epochs; epoch++) {
for(int i=0; i<train_N; i++) {
da.train(train_X[i], learning_rate, corruption_level);
}
}
// test data
int test_X[2][20] = {
{1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0}
};
double reconstructed_X[2][20];
// test
for(int i=0; i<test_N; i++) {
da.reconstruct(test_X[i], reconstructed_X[i]);
for(int j=0; j<n_visible; j++) {
printf("%.5f ", reconstructed_X[i][j]);
}
cout << endl;
}
cout << endl;
}
int main() {
test_dA();
return 0;
}
//SdA.h
class SdA {
public:
int N;
int n_ins;
int *hidden_layer_sizes;
int n_outs;
int n_layers;
HiddenLayer **sigmoid_layers;
dA **dA_layers;
LogisticRegression *log_layer;
SdA(int, int, int*, int, int);
~SdA();
void pretrain(int*, double, double, int);
void finetune(int*, int*, double, int);
void predict(int*, double*);
};
//SdA.cpp
#include <iostream>
#include <math.h>
#include "HiddenLayer.h"
#include "dA.h"
#include "LogisticRegression.h"
#include "SdA.h"
using namespace std;
double uniform(double min, double max) {
return rand() / (RAND_MAX + 1.0) * (max - min) + min;
}
int binomial(int n, double p) {
if(p < 0 || p > 1) return 0;
int c = 0;
double r;
for(int i=0; i<n; i++) {
r = rand() / (RAND_MAX + 1.0);
if (r < p) c++;
}
return c;
}
double sigmoid(double x) {
return 1.0 / (1.0 + exp(-x));
}
// SdA
SdA::SdA(int size, int n_i, int *hls, int n_o, int n_l) {
int input_size;
N = size;
n_ins = n_i;
hidden_layer_sizes = hls;
n_outs = n_o;
n_layers = n_l;
sigmoid_layers = new HiddenLayer*[n_layers];
dA_layers = new dA*[n_layers];
// construct multi-layer
for(int i=0; i<n_layers; i++) {
if(i == 0) {
input_size = n_ins;
} else {
input_size = hidden_layer_sizes[i-1];
}
// construct sigmoid_layer
sigmoid_layers[i] = new HiddenLayer(N, input_size, hidden_layer_sizes[i], NULL, NULL);
// construct dA_layer
dA_layers[i] = new dA(N, input_size, hidden_layer_sizes[i],\
sigmoid_layers[i]->W, sigmoid_layers[i]->b, NULL);
}
// layer for output using LogisticRegression
log_layer = new LogisticRegression(N, hidden_layer_sizes[n_layers-1], n_outs);
}
SdA::~SdA() {
delete log_layer;
for(int i=0; i<n_layers; i++) {
delete sigmoid_layers[i];
delete dA_layers[i];
}
delete[] sigmoid_layers;
delete[] dA_layers;
}
void SdA::pretrain(int *input, double lr, double corruption_level, int epochs) {
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;
int *train_X = new int[n_ins];
for(int i=0; i<n_layers; i++) { // layer-wise
for(int epoch=0; epoch<epochs; epoch++) { // training epochs
for(int n=0; n<N; n++) { // input x1...xN
// initial input
for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
// layer input
for(int l=0; l<=i; l++) {
if(l == 0) {
layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++) layer_input[j] = train_X[j];
} else {
if(l == 1) prev_layer_input_size = n_ins;
else prev_layer_input_size = hidden_layer_sizes[l-2];
prev_layer_input = new int[prev_layer_input_size];
for(int j=0; j<prev_layer_input_size; j++) prev_layer_input[j] = layer_input[j];
delete[] layer_input;
layer_input = new int[hidden_layer_sizes[l-1]];
sigmoid_layers[l-1]->sample_h_given_v(prev_layer_input, layer_input);
delete[] prev_layer_input;
}
}
dA_layers[i]->train(layer_input, lr, corruption_level);
}
}
}
delete[] train_X;
delete[] layer_input;
}
void SdA::finetune(int *input, int *label, double lr, int epochs) {
int *layer_input;
int prev_layer_input_size;
int *prev_layer_input;
int *train_X = new int[n_ins];
int *train_Y = new int[n_outs];
for(int epoch=0; epoch<epochs; epoch++) {
for(int n=0; n<N; n++) { // input x1...xN
// initial input
for(int m=0; m<n_ins; m++) train_X[m] = input[n * n_ins + m];
for(int m=0; m<n_outs; m++) train_Y[m] = label[n * n_outs + m];
// layer input
for(int i=0; i<n_layers; i++) {
if(i == 0) {
prev_layer_input = new int[n_ins];
for(int j=0; j<n_ins; j++) prev_layer_input[j] = train_X[j];
} else {
prev_layer_input = new int[hidden_layer_sizes[i-1]];
for(int j=0; j<hidden_layer_sizes[i-1]; j++) prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}
layer_input = new int[hidden_layer_sizes[i]];
sigmoid_layers[i]->sample_h_given_v(prev_layer_input, layer_input);
delete[] prev_layer_input;
}
log_layer->train(layer_input, train_Y, lr);
}
// lr *= 0.95;
}
delete[] layer_input;
delete[] train_X;
delete[] train_Y;
}
void SdA::predict(int *x, double *y) {
double *layer_input;
int prev_layer_input_size;
double *prev_layer_input;
double linear_output;
prev_layer_input = new double[n_ins];
for(int j=0; j<n_ins; j++) prev_layer_input[j] = x[j];
// layer activation
for(int i=0; i<n_layers; i++) {
layer_input = new double[sigmoid_layers[i]->n_out];
for(int k=0; k<sigmoid_layers[i]->n_out; k++) {
linear_output = 0.0;
for(int j=0; j<sigmoid_layers[i]->n_in; j++) {
linear_output += sigmoid_layers[i]->W[k][j] * prev_layer_input[j];
}
linear_output += sigmoid_layers[i]->b[k];
layer_input[k] = sigmoid(linear_output);
}
delete[] prev_layer_input;
if(i < n_layers-1) {
prev_layer_input = new double[sigmoid_layers[i]->n_out];
for(int j=0; j<sigmoid_layers[i]->n_out; j++) prev_layer_input[j] = layer_input[j];
delete[] layer_input;
}
}
for(int i=0; i<log_layer->n_out; i++) {
y[i] = 0;
for(int j=0; j<log_layer->n_in; j++) {
y[i] += log_layer->W[i][j] * layer_input[j];
}
y[i] += log_layer->b[i];
}
log_layer->softmax(y);
delete[] layer_input;
}
// HiddenLayer
HiddenLayer::HiddenLayer(int size, int in, int out, double **w, double *bp) {
N = size;
n_in = in;
n_out = out;
if(w == NULL) {
W = new double*[n_out];
for(int i=0; i<n_out; i++) W[i] = new double[n_in];
double a = 1.0 / n_in;
for(int i=0; i<n_out; i++) {
for(int j=0; j<n_in; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}
if(bp == NULL) {
b = new double[n_out];
} else {
b = bp;
}
}
HiddenLayer::~HiddenLayer() {
for(int i=0; i<n_out; i++) delete W[i];
delete[] W;
delete[] b;
}
double HiddenLayer::output(int *input, double *w, double b) {
double linear_output = 0.0;
for(int j=0; j<n_in; j++) {
linear_output += w[j] * input[j];
}
linear_output += b;
return sigmoid(linear_output);
}
void HiddenLayer::sample_h_given_v(int *input, int *sample) {
for(int i=0; i<n_out; i++) {
sample[i] = binomial(1, output(input, W[i], b[i]));
}
}
// dA
dA::dA(int size, int n_v, int n_h, double **w, double *hb, double *vb) {
N = size;
n_visible = n_v;
n_hidden = n_h;
if(w == NULL) {
W = new double*[n_hidden];
for(int i=0; i<n_hidden; i++) W[i] = new double[n_visible];
double a = 1.0 / n_visible;
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] = uniform(-a, a);
}
}
} else {
W = w;
}
if(hb == NULL) {
hbias = new double[n_hidden];
for(int i=0; i<n_hidden; i++) hbias[i] = 0;
} else {
hbias = hb;
}
if(vb == NULL) {
vbias = new double[n_visible];
for(int i=0; i<n_visible; i++) vbias[i] = 0;
} else {
vbias = vb;
}
}
dA::~dA() {
// for(int i=0; i<n_hidden; i++) delete[] W[i];
// delete[] W;
// delete[] hbias;
delete[] vbias;
}
void dA::get_corrupted_input(int *x, int *tilde_x, double p) {
for(int i=0; i<n_visible; i++) {
if(x[i] == 0) {
tilde_x[i] = 0;
} else {
tilde_x[i] = binomial(1, p);
}
}
}
// Encode
void dA::get_hidden_values(int *x, double *y) {
for(int i=0; i<n_hidden; i++) {
y[i] = 0;
for(int j=0; j<n_visible; j++) {
y[i] += W[i][j] * x[j];
}
y[i] += hbias[i];
y[i] = sigmoid(y[i]);
}
}
// Decode
void dA::get_reconstructed_input(double *y, double *z) {
for(int i=0; i<n_visible; i++) {
z[i] = 0;
for(int j=0; j<n_hidden; j++) {
z[i] += W[j][i] * y[j];
}
z[i] += vbias[i];
z[i] = sigmoid(z[i]);
}
}
void dA::train(int *x, double lr, double corruption_level) {
int *tilde_x = new int[n_visible];
double *y = new double[n_hidden];
double *z = new double[n_visible];
double *L_vbias = new double[n_visible];
double *L_hbias = new double[n_hidden];
double p = 1 - corruption_level;
get_corrupted_input(x, tilde_x, p);
get_hidden_values(tilde_x, y);
get_reconstructed_input(y, z);
// vbias
for(int i=0; i<n_visible; i++) {
L_vbias[i] = x[i] - z[i];
vbias[i] += lr * L_vbias[i] / N;
}
// hbias
for(int i=0; i<n_hidden; i++) {
L_hbias[i] = 0;
for(int j=0; j<n_visible; j++) {
L_hbias[i] += W[i][j] * L_vbias[j];
}
L_hbias[i] *= y[i] * (1 - y[i]);
hbias[i] += lr * L_hbias[i] / N;
}
// W
for(int i=0; i<n_hidden; i++) {
for(int j=0; j<n_visible; j++) {
W[i][j] += lr * (L_hbias[i] * tilde_x[j] + L_vbias[j] * y[i]) / N;
}
}
delete[] L_hbias;
delete[] L_vbias;
delete[] z;
delete[] y;
delete[] tilde_x;
}
void dA::reconstruct(int *x, double *z) {
double *y = new double[n_hidden];
get_hidden_values(x, y);
get_reconstructed_input(y, z);
delete[] y;
}
// LogisticRegression
LogisticRegression::LogisticRegression(int size, int in, int out) {
N = size;
n_in = in;
n_out = out;
W = new double*[n_out];
for(int i=0; i<n_out; i++) W[i] = new double[n_in];
b = new double[n_out];
for(int i=0; i<n_out; i++) {
for(int j=0; j<n_in; j++) {
W[i][j] = 0;
}
b[i] = 0;
}
}
LogisticRegression::~LogisticRegression() {
for(int i=0; i<n_out; i++) delete[] W[i];
delete[] W;
delete[] b;
}
void LogisticRegression::train(int *x, int *y, double lr) {
double *p_y_given_x = new double[n_out];
double *dy = new double[n_out];
for(int i=0; i<n_out; i++) {
p_y_given_x[i] = 0;
for(int j=0; j<n_in; j++) {
p_y_given_x[i] += W[i][j] * x[j];
}
p_y_given_x[i] += b[i];
}
softmax(p_y_given_x);
for(int i=0; i<n_out; i++) {
dy[i] = y[i] - p_y_given_x[i];
for(int j=0; j<n_in; j++) {
W[i][j] += lr * dy[i] * x[j] / N;
}
b[i] += lr * dy[i] / N;
}
delete[] p_y_given_x;
delete[] dy;
}
void LogisticRegression::softmax(double *x) {
double max = 0.0;
double sum = 0.0;
for(int i=0; i<n_out; i++) if(max < x[i]) max = x[i];
for(int i=0; i<n_out; i++) {
x[i] = exp(x[i] - max);
sum += x[i];
}
for(int i=0; i<n_out; i++) x[i] /= sum;
}
void LogisticRegression::predict(int *x, double *y) {
for(int i=0; i<n_out; i++) {
y[i] = 0;
for(int j=0; j<n_in; j++) {
y[i] += W[i][j] * x[j];
}
y[i] += b[i];
}
softmax(y);
}
void test_sda() {
srand(0);
double pretrain_lr = 0.1;
double corruption_level = 0.3;
int pretraining_epochs = 1000;
double finetune_lr = 0.1;
int finetune_epochs = 500;
int train_N = 10;
int test_N = 4;
int n_ins = 28;
int n_outs = 2;
int hidden_layer_sizes[] = {15, 15};
int n_layers = sizeof(hidden_layer_sizes) / sizeof(hidden_layer_sizes[0]);
// training data
int train_X[10][28] = {
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}
};
int train_Y[10][2] = {
{1, 0},
{1, 0},
{1, 0},
{1, 0},
{1, 0},
{0, 1},
{0, 1},
{0, 1},
{0, 1},
{0, 1}
};
// construct SdA
SdA sda(train_N, n_ins, hidden_layer_sizes, n_outs, n_layers);
// pretrain
sda.pretrain(*train_X, pretrain_lr, corruption_level, pretraining_epochs);
// finetune
sda.finetune(*train_X, *train_Y, finetune_lr, finetune_epochs);
// test data
int test_X[4][28] = {
{1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}
};
double test_Y[4][28];
// test
for(int i=0; i<test_N; i++) {
sda.predict(test_X[i], test_Y[i]);
for(int j=0; j<n_outs; j++) {
printf("%.5f ", test_Y[i][j]);
}
cout << endl;
}
}
int main() {
test_sda();
return 0;
}