DeepLearning to digit recognizer in kaggle

20813 ワード

DeepLearning to digit recongnizer in kaggle
最近deeplearningを見て、kaggleの文字認識を探して練習しました.ここでは主に2つのツールボックスで解き,両者の結果を比較した.2つのツールボックスはそれぞれDeepLearningToolboxとcaffeです.DeeplearningToolboxソースコード解析を参照:http://blog.csdn.net/lu597203933/article/details/46576017
Caffe学習見:http://caffe.berkeleyvision.org/
 
一:DeeplearningToolbox
DeeplearningToolboxはmatlabに基づいて、非常に簡単で、ソースコードを読んで、ボリュームニューラルネットワークなどの過程を理解するのに非常に役立ちます.ここでは主にdigit recongnizerが与えたデータセットを前処理して、私たちのdeeplearningToolboxツールボックスに適用します.主に2つを含む.mファイル、それぞれpredeal.mとcnntest.mファイル.必要なのはaddpathのパスを変えることです.コード注釈は詳しくて、自分で見てください.
コード#コード#
predeal.m
 
% use the deeplearnToolbox to solve the digit recongnizer in kaggle!
clear;clc
trainFile = 'train.csv';
testFile = 'test.csv';
fidId = fopen(trainFile);

M = csvread(trainFile, 1);   %   csv             
train_x = M(:, 2:end);    % 2      data
label = M(:,1)';  %      
label(label == 0) = 10;   %    10         
train_y = full(sparse(label, 1:size(train_x, 1), 1));   %         

train_x = double(reshape(train_x',28,28,size(train_x, 1)))/255;  



fidId = fopen('test.csv');     %%        
M = csvread(testFile, 1);   %   csv             
test_x = double(reshape(M',28,28,size(M, 1)))/255;  
clear fidId label testFile M testFile trainFile


addpath D:\DeepLearning\DeepLearnToolbox-master\data\      %      
addpath D:\DeepLearning\DeepLearnToolbox-master\CNN\
addpath D:\DeepLearning\DeepLearnToolbox-master\util\

rand('state',0)
cnn.layers = {        %%%     feature maps            
    struct('type', 'i') %input layer
    struct('type', 'c', 'outputmaps', 6, 'kernelsize', 5) %convolution layer
    struct('type', 's', 'scale', 2) %sub sampling layer
    struct('type', 'c', 'outputmaps', 12, 'kernelsize', 5) %convolution layer
    struct('type', 's', 'scale', 2) %subsampling layer
};

opts.alpha = 0.01;   %       
opts.batchsize = 50;   %    50               ,     50       
opts.numepochs = 25;   %    
cnn = cnnsetup(cnn, train_x, train_y);      %                  
cnn = cnntrain(cnn, train_x, train_y, opts);  %     ,  bp       

test_y = cnntest(cnn, test_x);      %          
test_y(test_y == 10) = 0;      %  10      0
test_y = test_y';
M = [(1:length(test_y))' test_y(:)];  
csvwrite('test_y.csv', M);
figure; plot(cnn.rL);

cnntest.m
 
  function [test_y] = cnntest(net, x)
    %  feedforward
    net = cnnff(net, x);
    [~, test_y] = max(net.o);
end

結果:deeplearningToolboxで得られた結果は良くなく0.94586のみであった
 
二:caffe to digit recongnizer
caffeはmnistを持参して例を処理しているが,公式サイトから与えられたデータはバイナリのファイルであり,得られた結果も単純な精度であるため,無制限に適用することはできない.手順は次のとおりです.
1:与えられたcsvデータをlmdbフォーマットに変換
ここでmnistのフォルダの下にconvert_を書きましたdata_to_lmdb.cppのプログラムはデータを処理します:
コードは次のとおりです.
 
#include 
#include 
#include 
#include 


#include "boost/scoped_ptr.hpp"
#include "gflags/gflags.h"
#include "glog/logging.h"

#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/io.hpp"
#include "caffe/util/rng.hpp"

using namespace caffe;
using namespace std;
using std::pair;
using boost::scoped_ptr;

/* edited by Zack
 * argv[1] the input file, argv[2] the output file*/

DEFINE_string(backend, "lmdb", "The backend for storing the result");  // get Flags_backend == lmdb

int main(int argc, char **argv){
	::google::InitGoogleLogging(argv[0]);

	#ifndef GFLAGS_GFLAGS_H_
	  namespace gflags = google;
	#endif

	if(argc < 3){
		LOG(ERROR)<< "please check the input arguments!";
		return 1;
	}
	ifstream infile(argv[1]);
	if(!infile){
		LOG(ERROR)<< "please check the input arguments!";
		return 1;
	}
	string str;
	int count = 0;
	int rows = 28;
	int cols = 28;
	unsigned char *buffer = new  unsigned char[rows*cols];
	stringstream ss;

	Datum datum;             // this data structure store the data and label
	datum.set_channels(1);    // the channels
	datum.set_height(rows);    // rows
	datum.set_width(cols);     // cols

	scoped_ptr<:db> db(db::GetDB(FLAGS_backend));         // new DB object
	db->Open(argv[2], db::NEW);                    // open the lmdb file to store the data
	scoped_ptr<:transaction> txn(db->NewTransaction());   // new Transaction object to put and commit the data

	const int kMaxKeyLength = 256;           // to save the key
	char key_cstr[kMaxKeyLength];

	bool flag= false;
	while(getline(infile, str)){
		if(flag == false){
			flag = true;
			continue;
		}
		int beg = 0;
		int end = 0;
		int str_index = 0;
		//test  need to add this----------1
		//datum.set_label(0);
		while((end = str.find_first_of(',', beg)) != string::npos){
			//cout << end << endl;
			string dig_str = str.substr(beg, end - beg);
			int pixes;
			ss.clear();
			ss << dig_str;
			ss >> pixes;
			// test need to delete this--------------2
			if(beg == 0){
				datum.set_label(pixes);
				beg = ++ end;
				continue;
			}
			buffer[str_index++] = (unsigned char)pixes;
			beg = ++end;
		}
		string dig_str = str.substr(beg);
		int pixes;
		ss.clear();
		ss << dig_str;
		ss >> pixes;
		buffer[str_index++] = (unsigned char)pixes;
		datum.set_data(buffer, rows*cols);

		int length = snprintf(key_cstr, kMaxKeyLength, "%08d", count);

		    // Put in db
		string out;
		CHECK(datum.SerializeToString(&out));              // serialize to string
		txn->Put(string(key_cstr, length), out);        // put it, both the key and value

		if (++count % 1000 == 0) {       // to commit every 1000 iteration
		  // Commit db
		  txn->Commit();
		  txn.reset(db->NewTransaction());
		  LOG(ERROR) << "Processed " << count << " files.";
		}

	}
	// write the last batch
	  if (count % 1000 != 0) {            // commit the last batch
		txn->Commit();
		LOG(ERROR) << "Processed " << count << " files.";
	  }

	return 0;
}

次にmake all-j 8を実行してコードをコンパイルします.これによりbuildフォルダの下に対応するバイナリファイルが生成されます.
図:
  DeepLearning to digit recognizer in kaggle_第1张图片
次に./build/examples/mnist/convert_data_to_lmdb.bin examples/mnist/kaggle/data/train.csvexamples/mnist/kaggle/mnist_train_lmdb --backend=lmdb
トレーニングファイルが得られるlmdb形式のファイルが得られます.テストの場合csv、test.csvにはラベルがないので、コードを細かく調整する必要があります.2箇所の調整は上記のコードに表示されています.
 
次にmake all–j 8を同様に実行し、./build/examples/mnist/convert_data_to_lmdb.bin examples/mnist/kaggle/data/test.csvexamples/mnist/kaggle/mnist_test_lmdb --backend=lmdb
対応するテストデータのlmdbフォーマットファイルが得られます.
 
 
2:訓練データで訓練してモデルを得る
Caffeはmodelを訓練する時、コードはtest_ごとに必要ですiter時間はテストデータセットをテストするのでtrain.csvの最初の1000個のデータは、上記と同様にクロス検証されたデータセットlmdbを作成します.
 
mnistディレクトリの下にあるlenet_をそれぞれsolver.prototxtとlenet_train_test.prototxtはkaggleディレクトリの下にコピーし、対応するファイルが含まれているディレクトリと対応するbatch sizeを変更します.具体的には、ダウンロードアドレスを参照してください.
次に./build/tools/caffe train –solver=examples/mnist/kaggle/lenet_solver.prototxt、これで私たちのlenet_を得ることができます.iter_10000.caffemodelになりました.
 
3:テストセットprob層の特徴を抽出する.
ここではtoolsファイルの下にあるextractを使用します.features.cppのソースファイルですが、このソースファイルの結果はlmdbのフォーマットなので、ソースコードを以下のように変更しました.
 
#include   // for snprintf
#include 
#include 
#include 

#include "boost/algorithm/string.hpp"
#include "google/protobuf/text_format.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/net.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/util/db.hpp"
#include "caffe/util/io.hpp"
#include "caffe/vision_layers.hpp"

using caffe::Blob;
using caffe::Caffe;
using caffe::Datum;
using caffe::Net;
using boost::shared_ptr;
using std::string;
namespace db = caffe::db;

template
int feature_extraction_pipeline(int argc, char** argv);

int main(int argc, char** argv) {
  return feature_extraction_pipeline(argc, argv);
//  return feature_extraction_pipeline(argc, argv);
}

template
int feature_extraction_pipeline(int argc, char** argv) {
  ::google::InitGoogleLogging(argv[0]);
  const int num_required_args = 7;     /// the parameters must be not less 7
  if (argc < num_required_args) {
    LOG(ERROR)<<
    "This program takes in a trained network and an input data layer, and then"
    " extract features of the input data produced by the net.
" "Usage: extract_features pretrained_net_param" " feature_extraction_proto_file extract_feature_blob_name1[,name2,...]" " save_feature_dataset_name1[,name2,...] num_mini_batches db_type" " [CPU/GPU] [DEVICE_ID=0]
" "Note: you can extract multiple features in one pass by specifying" " multiple feature blob names and dataset names seperated by ','." " The names cannot contain white space characters and the number of blobs" " and datasets must be equal."; return 1; } int arg_pos = num_required_args; //the necessary nums of parameters arg_pos = num_required_args; if (argc > arg_pos && strcmp(argv[arg_pos], "GPU") == 0) { // whether use GPU------ -gpu 0 LOG(ERROR)<< "Using GPU"; uint device_id = 0; if (argc > arg_pos + 1) { device_id = atoi(argv[arg_pos + 1]); CHECK_GE(device_id, 0); } LOG(ERROR) << "Using Device_id=" << device_id; Caffe::SetDevice(device_id); Caffe::set_mode(Caffe::GPU); } else { LOG(ERROR) << "Using CPU"; Caffe::set_mode(Caffe::CPU); } arg_pos = 0; // the name of the executable std::string pretrained_binary_proto(argv[++arg_pos]); // the mode had been trained // Expected prototxt contains at least one data layer such as // the layer data_layer_name and one feature blob such as the // fc7 top blob to extract features. /* layers { name: "data_layer_name" type: DATA data_param { source: "/path/to/your/images/to/extract/feature/images_leveldb" mean_file: "/path/to/your/image_mean.binaryproto" batch_size: 128 crop_size: 227 mirror: false } top: "data_blob_name" top: "label_blob_name" } layers { name: "drop7" type: DROPOUT dropout_param { dropout_ratio: 0.5 } bottom: "fc7" top: "fc7" } */ std::string feature_extraction_proto(argv[++arg_pos]); // get the net structure shared_ptr > feature_extraction_net( new Net(feature_extraction_proto, caffe::TEST)); //new net object and set each layers------feature_extraction_net feature_extraction_net->CopyTrainedLayersFrom(pretrained_binary_proto); // init the weights std::string extract_feature_blob_names(argv[++arg_pos]); //exact which blob's feature std::vector<:string> blob_names; boost::split(blob_names, extract_feature_blob_names, boost::is_any_of(",")); //you can exact many blobs' features and to store them in different dirname std::string save_feature_dataset_names(argv[++arg_pos]); // to store the features std::vector<:string> dataset_names; boost::split(dataset_names, save_feature_dataset_names, // each dataset_names to store one blob's feature boost::is_any_of(",")); CHECK_EQ(blob_names.size(), dataset_names.size()) << " the number of blob names and dataset names must be equal"; size_t num_features = blob_names.size(); // how many features you exact for (size_t i = 0; i < num_features; i++) { CHECK(feature_extraction_net->has_blob(blob_names[i])) << "Unknown feature blob name " << blob_names[i] << " in the network " << feature_extraction_proto; } int num_mini_batches = atoi(argv[++arg_pos]); // each exact num_mini_batches of images // init the DB and Transaction for all blobs you want to extract features std::vector > feature_dbs; // new DB object, is a vector maybe has many blogs' feature std::vector > txns; // new Transaction object, is a vectore maybe has many blob's feature // edit by Zack //std::string strfile = "/home/hadoop/caffe/textileImage/features/probTest"; std::string strfile = argv[argc-1]; std::vector<:ofstream> vec(num_features, 0); const char* db_type = argv[++arg_pos]; //the data to store style == lmdb for (size_t i = 0; i < num_features; ++i) { LOG(INFO)<< "Opening dataset " << dataset_names[i]; // dataset_name[i] to store the feature which type is lmdb shared_ptr<:db> db(db::GetDB(db_type)); // the type of the db db->Open(dataset_names.at(i), db::NEW); // open the dir to store the feature feature_dbs.push_back(db); // put the db to the vector shared_ptr<:transaction> txn(db->NewTransaction()); // the transaction to the db txns.push_back(txn); // put the transaction to the vector // edit by Zack std::stringstream ss; ss.clear(); string index; ss << i; ss >> index; std::string str = strfile + index + ".txt"; vec[i] = new std::ofstream(str.c_str()); } LOG(ERROR)<< "Extacting Features"; Datum datum; const int kMaxKeyStrLength = 100; char key_str[kMaxKeyStrLength]; // to store the key std::vector*> input_vec; std::vector image_indices(num_features, 0); /// how many blogs' feature you exact for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) { feature_extraction_net->Forward(input_vec); for (int i = 0; i < num_features; ++i) { // to exact the blobs' name maybe fc7 fc8 const shared_ptr > feature_blob = feature_extraction_net ->blob_by_name(blob_names[i]); int batch_size = feature_blob->num(); // the nums of images-------batch size int dim_features = feature_blob->count() / batch_size; // this dim of this feature of each image in this blob const Dtype* feature_blob_data; // float is the features for (int n = 0; n < batch_size; ++n) { datum.set_height(feature_blob->height()); // set the height datum.set_width(feature_blob->width()); // set the width datum.set_channels(feature_blob->channels()); // set the channel datum.clear_data(); // clear data datum.clear_float_data(); // clear float_data feature_blob_data = feature_blob->cpu_data() + feature_blob->offset(n); //the features of which image for (int d = 0; d < dim_features; ++d) { datum.add_float_data(feature_blob_data[d]); (*vec[i]) << feature_blob_data[d] << " "; // save the features } (*vec[i]) << std::endl; //LOG(ERROR)<< "dim" << dim_features; int length = snprintf(key_str, kMaxKeyStrLength, "%010d", image_indices[i]); // key di ji ge tupian string out; CHECK(datum.SerializeToString(&out)); // serialize to string txns.at(i)->Put(std::string(key_str, length), out); // put to transaction ++image_indices[i]; // key++ if (image_indices[i] % 1000 == 0) { // when it reach to 1000 ,we commit it txns.at(i)->Commit(); txns.at(i).reset(feature_dbs.at(i)->NewTransaction()); LOG(ERROR)<< "Extracted features of " << image_indices[i] << " query images for feature blob " << blob_names[i]; } } // for (int n = 0; n < batch_size; ++n) } // for (int i = 0; i < num_features; ++i) } // for (int batch_index = 0; batch_index < num_mini_batches; ++batch_index) // write the last batch for (int i = 0; i < num_features; ++i) { if (image_indices[i] % 1000 != 0) { // commit the last path images txns.at(i)->Commit(); } // edit by Zack vec[i]->close(); delete vec[i]; LOG(ERROR)<< "Extracted features of " << image_indices[i] << " query images for feature blob " << blob_names[i]; feature_dbs.at(i)->Close(); } LOG(ERROR)<< "Successfully extracted the features!"; return 0; }

最後に得られたprob層(すなわち最後に得られた確率)をtxtに格納した.
 
また、ネットワーク構造を調整し、予測するだけで、ネットワーク内のパラメータを削除することができます.
deploy.prototxtコードは次のとおりです.
 
name: "LeNet"
layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "examples/mnist/kaggle/mnist_test_lmdb"
    batch_size: 100
    backend: LMDB
  }
}

layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
 
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
   
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"

  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
   
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  
  inner_product_param {
    num_output: 500
    
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"

  inner_product_param {
    num_output: 10
   
  }
}
layer {
  name: "prob"
  type: "Softmax"
  bottom: "ip2"
  top: "prob"
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "prob"
  bottom: "label"
  top: "accuracy"
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}

そして実行
./build/tools/extract_features.bin examples/mnist/kaggle/lenet_iter_10000.caffemodel examples/mnist/kaggle/deploy.prototxt prob examples/mnist/kaggle/features 280 lmdb/home/hadoop/caffe/caffe-master/examples/mnist/kaggle/feature
ここで280はdeploy.prototxtでbatch_sizeは100に設定されているので、合計のテストデータセットのサイズ=28000./home/hadoop/caffe/caffe-master/examples/mnist/kaggle/featureは、最終的な抽出フィーチャーがtxtに保存されるパスである.examples/mnist/kaggle/lenet_iter_10000.caffemodelはトレーニングの重みパラメータであり、examples/mnist/kaggle/deploy.prototxtはネットワーク構造です.
 
4:得られたtxtに対して後処理を行う
上記の3つのステップによりfeture 0を得ることができる.txt,格納されたデータビット28000*10サイズは,各サンプルがどのクラスに属するかの発生確率に対応する.次に以下のmatlabコードを実行するとkaggleに必要なコミット結果が得られます.最后の精度は0.98986で、ランキングも400+、great!!
 
% caffe toolbox, the postprocessing of the data 
clear;clc;
feature = load('feature0.txt');
feature = feature';
[~,test_y] = max(feature);
[M,N] = size(test_y);
test_y = test_y - repmat([1], M, N);
test_y = test_y';
M = [(1:length(test_y))' test_y(:)];  
csvwrite('test_y3.csv', M);

すべてのファイルコードのダウンロードを参照:https://github.com/zack6514/zackcoding