tensorflowによるCIFAR 10分類ネットワークの構築

25972 ワード

一、準備データ:cifar 10のデータセットは全部で6万枚の32*32サイズの画像があり、10種類に分けられ、各種類は6000枚で、そのうち5万枚は訓練に用いられ、1万枚はテストに用いられる.データセットは5つの訓練batchesと1つのテストbatchに分けられた.各batchの画像はランダムに並べられています.公式サイトでは、Pythonバージョン、Matlabバージョン、バイナリファイルバージョンの3つのバージョンのダウンロードリンクが提供されています.このうち、Pythonバージョンのデータフォーマットは、公式サイトでデータを読み取るコードが与えられており、MatlabバージョンのデータとPythonバージョンのデータフォーマットの差は少ない.バイナリバージョンのデータは、5つのトレーニング用batches,data_があります.batch_1.bin ~ data_batch_5.binとテスト用test_batch.bin. TensorFlowが固定長フォーマットのデータ(tf.FixedLengthRecordReaderで)を読み取ることができることを考慮して、バイナリフォーマットのデータをダウンロードします.新規フォルダ/home/your_name/TensorFlow/cifar 10/data、cifar 10公式サイトからバイナリ形式のファイル圧縮パッケージをダウンロードし、このフォルダに解凍し、cifar-10-batches-binフォルダを得、中に8つのファイル、6つがある.binファイル、readme、1つ.txtはカテゴリを説明した.
from __future__ import absolute_import        #     
from __future__ import division                #     ,/    ,//    
from __future__ import print_function        #     

import os
import tensorflow as tf




#      cifar10_data   ,        ,   labels  images
class cifar10_data(object):

    def __init__(self, filename_queue):        #     

        #               ,       
        self.height = 32
        self.width = 32
        self.depth = 3
        # label     
        self.label_bytes = 1
        #    32*32*3 = 3072   
        self.image_bytes = self.height * self.width * self.depth
        #            3072 + 1 = 3073 
        self.record_bytes = self.label_bytes + self.image_bytes
        self.label, self.image = self.read_cifar10(filename_queue)

    def read_cifar10(self, filename_queue):

        #         
        reader = tf.FixedLengthRecordReader(record_bytes = self.record_bytes)
        key, value = reader.read(filename_queue)
        record_bytes = tf.decode_raw(value, tf.uint8)
        # tf.slice(record_bytes,     ,   )
        label = tf.cast(tf.slice(record_bytes, [0], [self.label_bytes]), tf.int32)
        #   label  ,   self.image_bytes = 3072      
        image_raw = tf.slice(record_bytes, [self.label_bytes], [self.image_bytes])
        #       3*32*32
        image_raw = tf.reshape(image_raw, [self.depth, self.height, self.width])
        #       32*32*3
        image = tf.transpose(image_raw, (1,2,0))        
        image = tf.cast(image, tf.float32)
        return label, image


def inputs(data_dir, batch_size, train = True, name = 'input'):

    #      tf.name_scope,           。
    with tf.name_scope(name):
        if train: 
            #          
            filenames = [os.path.join(data_dir,'data_batch_%d.bin' % ii) 
                        for ii in range(1,6)]
            #            
            for f in filenames:
                if not tf.gfile.Exists(f):
                    raise ValueError('Failed to find file: ' + f)
            #           
            filename_queue = tf.train.string_input_producer(filenames)
            #    cifar10_data   
            read_input = cifar10_data(filename_queue)
            images = read_input.image
            #       ,        ,         。
            # images = tf.image.per_image_whitening(images)
            labels = read_input.label
            #    batch   ,16     ,   20192,min_after_dequeue  
            #      ,           ,         min_after_dequeue
            #     ,     capacity = min_after_dequeue + batch_size * 3
            num_preprocess_threads = 16
            image, label = tf.train.shuffle_batch(
                                    [images,labels], batch_size = batch_size, 
                                    num_threads = num_preprocess_threads, 
                                    min_after_dequeue = 20000, capacity = 20192)


            return image, tf.reshape(label, [batch_size])

        else:
            filenames = [os.path.join(data_dir,'test_batch.bin')]
            for f in filenames:
                if not tf.gfile.Exists(f):
                    raise ValueError('Failed to find file: ' + f)

            filename_queue = tf.train.string_input_producer(filenames)
            read_input = cifar10_data(filename_queue)
            images = read_input.image
            images = tf.image.per_image_whitening(images)
            labels = read_input.label
            num_preprocess_threads = 16
            image, label = tf.train.shuffle_batch(
                                    [images,labels], batch_size = batch_size, 
                                    num_threads = num_preprocess_threads, 
                                    min_after_dequeue = 20000, capacity = 20192)


            return image, tf.reshape(label, [batch_size])

二、構想は以下のネットワーク構造を考慮してcifar 10の分類を行う:毎回1つのbatchの64枚の画像を入力して、64*32*32*3の4次元のテンソルに転化して、ステップ長が1で、ボリュームコアの大きさが5*5で、Feature mapsが64のボリューム操作で、64*32*32*64の4次元のテンソルになって、それから1つのステップ長が2のmax_poolのプール化層は、64*16*16*64サイズの4次元テンソルとなり、再び類似のボリューム化プール化操作を経て、64*8*8*64サイズの4次元テンソルとなり、さらに2つの全接続層を経て、64*192の2次元テンソルにマッピングされ、その後sortmax層を経て、64*10のテンソルとなり、最後にラベルlabelと交差エントロピーの損失関数となる.
ステップ1:ウェイトとバイアスの定義
def variable_on_cpu(name, shape, initializer = tf.constant_initializer(0.1)):
    with tf.device('/cpu:0'):
        dtype = tf.float32
        var = tf.get_variable(name, shape, initializer = initializer, 
                              dtype = dtype)
    return var

 #   get_variable   CPU      
def variables(name, shape, stddev): 
    dtype = tf.float32
    var = variable_on_cpu(name, shape, 
                          tf.truncated_normal_initializer(stddev = stddev, 
                                                          dtype = dtype))
    return var

ステップ2、ネットワーク構造の定義
#       
def inference(images):
    '''       batch  64    ,     64*32*32*3      ,      1,       5*5 ,
    Feature maps  64     ,   64*32*32*64      ,          2   max_pool     ,
       64*16*16*64        ,              ,
       64*8*8*64    4   ,         ,    64*192      ,       sortmax  ,
       64*10    ,      label            '''
    #      
    with tf.variable_scope('conv1') as scope:
        #   5*5     ,64   Feature maps
        weights = variables('weights', [5,5,3,64], 5e-2)
        #   ,    1*1
        conv = tf.nn.conv2d(images, weights, [1,1,1,1], padding = 'SAME')
        biases = variable_on_cpu('biases', [64])
        #     
        bias = tf.nn.bias_add(conv, biases)
        #    ReLu     
        conv1 = tf.nn.relu(bias, name = scope.name)
        #       conv1
        tf.summary.histogram(scope.name + '/activations', conv1)  
    with tf.variable_scope('pooling1_lrn') as scope:
        #     ,3*3     ,2*2    
        pool1 = tf.nn.max_pool(conv1, ksize = [1,3,3,1], strides = [1,2,2,1],
                               padding = 'SAME', name='pool1')
        #        
        norm1 = tf.nn.lrn(pool1, 4, bias = 1.0, alpha = 0.001/9.0, 
                          beta = 0.75, name = 'norm1')
    #      
    with tf.variable_scope('conv2') as scope:
        weights = variables('weights', [5,5,64,64], 5e-2)
        conv = tf.nn.conv2d(norm1, weights, [1,1,1,1], padding = 'SAME')
        biases = variable_on_cpu('biases', [64])
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name = scope.name)
        tf.summary.histogram(scope.name + '/activations', conv2)
    with tf.variable_scope('pooling2_lrn') as scope:
        norm2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001/9.0, 
                          beta = 0.75, name = 'norm1')        
        pool2 = tf.nn.max_pool(norm2, ksize = [1,3,3,1], strides = [1,2,2,1],
                               padding = 'SAME', name='pool1')

    with tf.variable_scope('local3') as scope:
        #       
        reshape = tf.reshape(pool2, [BATCH_SIZE,-1])
        dim = reshape.get_shape()[1].value
        weights = variables('weights', shape=[dim,384], stddev=0.004)
        biases = variable_on_cpu('biases', [384])
        # ReLu     
        local3 = tf.nn.relu(tf.matmul(reshape, weights)+biases, 
                            name = scope.name)
        #       local3
        tf.summary.histogram(scope.name + '/activations', local3)

    with tf.variable_scope('local4') as scope:
        #       
        weights = variables('weights', shape=[384,192], stddev=0.004)
        biases = variable_on_cpu('biases', [192])
        local4 = tf.nn.relu(tf.matmul(local3, weights)+biases, 
                            name = scope.name)
        tf.summary.histogram(scope.name + '/activations', local4)

    with tf.variable_scope('softmax_linear') as scope:
        # softmax  ,         softmax ,    softmax     
        weights = variables('weights', [192, 10], stddev=1/192.0)
        biases = variable_on_cpu('biases', [10])
        softmax_linear = tf.add(tf.matmul(local4, weights), biases, 
                                name = scope.name)
        tf.summary.histogram(scope.name + '/activations', softmax_linear)

    return softmax_linear


def losses(logits, labels):
    with tf.variable_scope('loss') as scope:
        labels = tf.cast(labels, tf.int64)
        #      ,          ,     。
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=labels, logits=logits, name='cross_entropy_per_example')
        loss = tf.reduce_mean(cross_entropy, name = 'loss')
        tf.add_to_collection('losses', loss)

    return tf.add_n(tf.get_collection('losses'), name='total_loss')

ステップ3、トレーニング開始
BATCH_SIZE = 64      #  batch              batch  64    
LEARNING_RATE = 0.1    #   
MAX_STEP = 50000    #    
def train():
    # global_step
    global_step = tf.Variable(0, name = 'global_step', trainable=False)
    # cifar10      
    data_dir = 'F:/python3.5/Machine Learning/tensorflow/CNN/cifar10/data/cifar-10-batches-bin/'
    #       logs  ,           
    train_dir = 'F:/python3.5/Machine Learning/tensorflow/CNN/cifar10/data/cifar-10-batches-bin/'
    #    images,labels
    images, labels = cifar10_input.inputs(data_dir, BATCH_SIZE)

    #   loss
    loss = losses(inference(images), labels)
    #       ,    SGD        ,     
    optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE)
    # global_step        
    train_op = optimizer.minimize(loss, global_step = global_step)
    #     
    saver = tf.train.Saver(tf.all_variables())
    #     
    summary_op = tf.summary.merge_all()
    #              
    init = tf.initialize_all_variables()

    os.environ['CUDA_VISIBLE_DEVICES'] = str(0)
    config = tf.ConfigProto()
    #    GPU   20%   
    config.gpu_options.per_process_gpu_memory_fraction = 0.2
    #       ,  InteractiveSession       ,   
    sess = tf.InteractiveSession(config=config)
    #      
    sess.run(init)

    #         
    coord = tf.train.Coordinator()       
    #    Queue Runners (     )
    threads = tf.train.start_queue_runners(sess = sess, coord = coord)
    #       train_dir,         
    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)

    #       
    try:        
        for step in xrange(MAX_STEP):
            if coord.should_stop():
                break
            start_time = time.time()
            #        loss
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time
            #     
            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'                
            if step % 30 == 0:
                #                 ,     
                num_examples_per_step = BATCH_SIZE
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)                    
                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print (format_str % (datetime.now(), step, loss_value, 
                                     examples_per_sec, sec_per_batch))

            if step % 100 == 0:
                #       ,     
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)                

            if step % 1000 == 0 or (step + 1) == MAX_STEP:
                #             train_dir,global_step         
                checkpoint_path = os.path.join(train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

    except ShortInputException as e:
        coord.request_stop(e)
    finally:
        coord.request_stop()
        coord.join(threads)

    sess.close()

ステップ4、評価
def evaluate():

    data_dir = '/Machine Learning/tensorflow/      /  /data/cifar-10-batches-bin/'
    train_dir = '/Machine Learning/tensorflow/      /  /cifar10_train/'
    images, labels = cifar10_input.inputs(data_dir, BATCH_SIZE, train = False)

    logits = inference(images) 
    saver = tf.train.Saver(tf.all_variables())        

    os.environ['CUDA_VISIBLE_DEVICES'] = str(0)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.2
    sess = tf.InteractiveSession(config=config)
    coord = tf.train.Coordinator()       
    threads = tf.train.start_queue_runners(sess = sess, coord = coord)

    #       
    print("Reading checkpoints...")
    ckpt = tf.train.get_checkpoint_state(train_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]         
        saver.restore(sess, os.path.join(train_dir, ckpt_name))
        print('Loading success, global_step is %s' % global_step)


    try:     
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        true_count = 0
        step = 0
        while step < 157:
            if coord.should_stop():
                break
            predictions = sess.run(top_k_op)
            true_count += np.sum(predictions)
            step += 1

        precision = true_count / 10000
        print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
    except tf.errors.OutOfRangeError:
        coord.request_stop()
    finally:
        coord.request_stop()
        coord.join(threads)

    sess.close()

終了:詳細コードWebサイト