DCGANs:DCGANでピクチャを生成(MNIST)


次は医学画像のものを作るので、ボスはまず私にTFを走って経験を積ませてくれました.非コンピュータ科のクラス出身者として、コードを書くこととコードを理解する能力が特に重要です.本文は参考ブログ(もちろん元のブログの多くの詳細は強調されていないし、間違っている)に基づいてコードの修正と体得を行い、その中で収益が多い.
強調:本ブログを学ぶには、元のブログの詳細な過程の解釈を基礎とし、同時に私のコードの中の注釈を結合してこそ、最適な学習効果を達成することができる.
ポイント:
  • generatorの理解については、DCGANの元arxiv論文を参照してください.その中で注目すべきは、U-netにかつて現れた「deconvolution layer」が実際に「fractionally-strided convolution layer」
  • と呼ばれていることです.
  • 参考ブログのgeneratorに存在する問題を修正し、主にtfに現れた.add(H_conv 1,b_conv 1)の修正
  • reuseでよく発生する問題を深く掘り起こすと、自分が「reuse」の使用に慣れていないif (reuse):tf.get_variable_scope().reuse_variables()が発見された.discriminnatorが画像生成に適用する際には、必ず元のパラメータが必要であるため、reuse=Trueを保証するため、Dg = model.discriminator(Gz, reuse=True)のコードがあった.
  • は多く1つ言います:tf.variable_scope()ワークドメインの中のreuse=Falseはデフォルトで、しかもreuseが継承の性質を持っていることを知っていて、詳細はこの簡単な本を参考にして、書くのはかなり良いです!
  • tf.nn.conv2d_transpose()の理解については、なぜoutputが必要なのかよく理解してください.shape,指定しないと出力のshape形式が一意でない
  • poolでのsize変換を学ばなければならない.これはボリュームだけでなく、「逆ボリューム」のsize変換
  • にも関係する.
  • コードは2つのセグメントに分けられ、第1のセグメントはmodelである.pyはモデリングに重点を置く.2段目はtrainです.pyは訓練に重点を置いている.
  • 本のブログのコードは、元のブログのコードとは異なり、ブロガー自身の「思考」
  • を表しています.
    研削刀は間違いなく薪を切ると言われていますが、まず元のブログと結びつけてtfプログラミングの考え方やDCGANの実現の詳細をよく理解してこそ、肝心な問題がどこに現れるかを知ることができます.実际には、自分のデータセットをプレイしてこそ、问题がどこにあるかを知ることができ、DCGANで医学画像を生成するコードを更新し、バカの成长の道を体得することができると言われています.
    # -*- coding: utf-8 -*-
    """
    Created on Tue Jul 24 20:33:14 2018
    E-mail: [email protected]
    @author: DidiLv
    File name: model.py
    """
    
    
    import tensorflow as tf
    import numpy as np
    
    
    # import data
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/") # MNIST dataset
    
    # pooling and convolution definition
    def conv2d(x, W):
        return tf.nn.conv2d(input = x, filter = W, strides = [1,1,1,1], padding = 'SAME')
    
    def avg_pool_2x2(x):
        return tf.nn.avg_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
    
    def xavier_init(size):
        in_dim = size[0]
        xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
        return tf.random_normal(shape=size, stddev=xavier_stddev)
    
    # discriminator
    def discriminator(x_image, reuse=False):
        with tf.variable_scope('discriminator') as scope:
        ## here omit the reuse since the tf.variable_scope().reuse == False by default
            if (reuse):
                tf.get_variable_scope().reuse_variables()
            #First Conv and Pool Layers
            W_conv1 = tf.get_variable('d_wconv1', shape = [5, 5, 1, 8], initializer=tf.truncated_normal_initializer(stddev=0.02))
            b_conv1 = tf.get_variable('d_bconv1', shape = [8], initializer=tf.constant_initializer(0))
            h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)        
            h_pool1 = avg_pool_2x2(h_conv1)
    
            #Second Conv and Pool Layers
            W_conv2 = tf.get_variable('d_wconv2', shape = [5, 5, 8, 16], initializer=tf.truncated_normal_initializer(stddev=0.02))
            b_conv2 = tf.get_variable('d_bconv2', shape = [16], initializer=tf.constant_initializer(0))
            h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
            h_pool2 = avg_pool_2x2(h_conv2)
    
            #First Fully Connected Layer
            W_fc1 = tf.get_variable('d_wfc1', [7 * 7 * 16, 32], initializer=tf.truncated_normal_initializer(stddev=0.02))
            b_fc1 = tf.get_variable('d_bfc1', [32], initializer=tf.constant_initializer(0))
            h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16])
            h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
            #Second Fully Connected Layer
            W_fc2 = tf.get_variable('d_wfc2', [32, 1], initializer=tf.truncated_normal_initializer(stddev=0.02))
            b_fc2 = tf.get_variable('d_bfc2', [1], initializer=tf.constant_initializer(0))
    
            #Final Layer
            y_conv=(tf.matmul(h_fc1, W_fc2) + b_fc2)
        return y_conv
    
    
    # generator from DCGAN, take a d-dimensional vector as input and upsample it to become a 28*28 image
    # the structure is from https://arxiv.org/pdf/1511.06434v2.pdf
    def generator(z, batch_size, z_dim, reuse = False):
        with tf.variable_scope('generator') as scope:
            if (reuse):
                tf.get_variable_scope().reuse_variables()
            ## number of filters for the first layer of generator 
            g_dim = 64
            ## color dimension of output 
            c_dim = 1
            ## size of output image
            s = 28
            s2, s4, s8, s16 = int(s/2), int(s/4), int(s/8), int(s/16)
    
            # h0 dimension is [batch_size, z_width, z_height, z_channel] 
            h0 = tf.reshape(z, [batch_size, s16+1, s16+1, 25])
            h0 = tf.nn.relu(h0)
            ##Dimensions of h0 = batch_size x 2 x 2 x 25
    
            # first decovolution layer (fractionally-strided convolution layer)
    
            ## useful link for convolution :
            ## https://blog.csdn.net/mao_xiao_feng/article/details/71713358
            output1_shape = [batch_size, s8, s8, g_dim*4]
            ## W_conv1 shape = [filter_height, filter_width, out_channels, in_channels]
            W_conv1 = tf.get_variable('g_wconv1', shape = [5,5,output1_shape[-1],int(h0.get_shape()[-1])],
                                        initializer=tf.truncated_normal_initializer(stddev = 0.1)
                                        )
            b_conv1 = tf.get_variable('g_bconv1', shape = [output1_shape[-1]], initializer=tf.constant_initializer(.1))
            ## H_conv1: h0 * W_conv1.T
            H_conv1 = tf.nn.conv2d_transpose(h0, W_conv1, output_shape = output1_shape, strides = [1,2,2,1], 
                                             padding = 'SAME')
            H_conv1 = tf.add(H_conv1, b_conv1)
            H_conv1 = tf.contrib.layers.batch_norm(inputs = H_conv1, center=True, scale=True, is_training=True, scope="g_bn1")
            H_conv1 = tf.nn.relu(H_conv1)
            ##Dimensions of H_conv1 = batch_size x 3 x 3 x 256
    
            # second deconvolution layer
            output2_shape = [batch_size, s4-1, s4-1, g_dim*2]
            W_conv2 = tf.get_variable('g_wconv2', shape = [5,5,output2_shape[-1], int(H_conv1.get_shape()[-1])],
                                      initializer=tf.truncated_normal_initializer(stddev = 0.1))
            b_conv2 = tf.get_variable('g_bconv2', shape = [output2_shape[-1]], initializer=tf.truncated_normal_initializer(0.1))
            H_conv2 = tf.nn.conv2d_transpose(H_conv1, W_conv2, output_shape = output2_shape, strides = [1,2,2,1],
                                   padding = 'SAME')
            H_conv2 = tf.add(H_conv2, b_conv2)
            H_conv2 = tf.contrib.layers.batch_norm(inputs = H_conv2, center=True, scale=True, is_training=True, scope="g_bn2")    
            ##Dimensions of H_conv2 = batch_size x 6 x 6 x 128
            H_conv2 = tf.nn.relu(H_conv2)
    
    
            #third DeConv Layer
            output3_shape = [batch_size, s2 - 2, s2 - 2, g_dim*1]
            W_conv3 = tf.get_variable('g_wconv3', [5, 5, output3_shape[-1], int(H_conv2.get_shape()[-1])], 
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
            b_conv3 = tf.get_variable('g_bconv3', [output3_shape[-1]], initializer=tf.constant_initializer(.1))
            H_conv3 = tf.nn.conv2d_transpose(H_conv2, W_conv3, output_shape=output3_shape, strides=[1, 2, 2, 1], 
                                             padding='SAME')
            H_conv3 = tf.add(H_conv3, b_conv3)
            H_conv3 = tf.contrib.layers.batch_norm(inputs = H_conv3, center=True, scale=True, is_training=True, scope="g_bn3")
            H_conv3 = tf.nn.relu(H_conv3)
            #Dimensions of H_conv3 = batch_size x 12 x 12 x 64
    
            #Fourth DeConv Layer
            output4_shape = [batch_size, s, s, c_dim]
            W_conv4 = tf.get_variable('g_wconv4', [5, 5, output4_shape[-1], int(H_conv3.get_shape()[-1])], 
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
            b_conv4 = tf.get_variable('g_bconv4', [output4_shape[-1]], initializer=tf.constant_initializer(.1))
            H_conv4 = tf.nn.conv2d_transpose(H_conv3, W_conv4, output_shape=output4_shape, strides=[1, 2, 2, 1], 
                                             padding='VALID')
            H_conv4 = tf.add(H_conv4, b_conv4)
            H_conv4 = tf.nn.tanh(H_conv4)
            #Dimensions of H_conv4 = batch_size x 28 x 28 x 1
            return H_conv4
    
    
    
    # -*- coding: utf-8 -*-
    """
    Created on Wed Jul 25 09:42:35 2018
    E-mail: [email protected]
    @author: DidiLv
    File name: train.py
    """
    import model
    
    import tensorflow as tf
    import numpy as np
    import matplotlib.pyplot as plt
    import random
    
    # import data
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/") # MNIST dataset
    
    
    # reset the graph to reset all variables we test before
    tf.reset_default_graph()
    
    batch_size = 16
    z_dimensions = 2*2*25 # details can be found in module of model: reshape of h0
    
    
    # discriminator for input
    x_placeholder = tf.placeholder(dtype = tf.float32, shape = [None, 28, 28, 1])
    z_placeholder = tf.placeholder(dtype = tf.float32, shape = [None,z_dimensions])
    
    Dx = model.discriminator(x_placeholder) # for real training data
    Gz = model.generator(z_placeholder, batch_size, z_dimensions)
    Dg = model.discriminator(Gz, reuse=True)
    
    
    g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dg, labels=tf.ones_like(Dg)))
    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dx, labels = tf.ones_like(Dx)))
    d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=Dg, labels = tf.zeros_like(Dx)))
    d_loss = d_loss_real + d_loss_fake
    
    tvars = tf.trainable_variables()
    d_vars = [var for var in tvars if 'd_' in var.name]
    g_vars = [var for var in tvars if 'g_' in var.name]
    
    with tf.variable_scope(tf.get_variable_scope(), reuse = False):
        # var_list: tf.Variable to update to minimize loss
        trainerD = tf.train.AdadeltaOptimizer(learning_rate = 1e-3).minimize(d_loss, var_list = d_vars)
        trainerG = tf.train.AdadeltaOptimizer(learning_rate = 1e-3).minimize(g_loss, var_list = g_vars)
    
    
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    iterations = 3000
    for i in range(iterations):
        z_batch = np.random.normal(-1, 1, size=[batch_size, z_dimensions])
        real_image_batch = mnist.train.next_batch(batch_size)
        real_image_batch = np.reshape(real_image_batch[0],[batch_size,28,28,1])
        _,dLoss = sess.run([trainerD, d_loss],feed_dict={z_placeholder:z_batch,x_placeholder:real_image_batch}) #Update the discriminator
        _,gLoss = sess.run([trainerG, g_loss],feed_dict={z_placeholder:z_batch}) #Update the generator
        print((dLoss+gLoss))
    
    

    以上のコードはDCGANコードの全てで、次のtest.pyファイルはブロガーが自分でtfを試してみる.Variable()が変数を構築する際に採用するテストコードも、okです!注意:
  • は、新たな初期化xavier_init()関数を用いて初期化
  • を行う.
  • tf.nn.conv2d()に適用されるコードは通常tf.float32の形式であるため、x_イメージ変換
  • tensorflowの変数を表示するにはsessionを確立してから表示する必要があります.例を挙げると、import tensorflow as tf
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    print(sess.run(D_noise))
    は次のtestを見ることができます.pyファイルのD_noiseの数値です.そうしないと、彼の形とタイプ
  • しか見えません.
    # -*- coding: utf-8 -*-
    """
    Created on Wed Jul 25 16:02:22 2018
    E-mail: [email protected]
    @author: DidiLv
    """
    
    
    import tensorflow as tf
    import numpy as np
    
    # pooling and convolution definition
    def conv2d(x, W):
        return tf.nn.conv2d(input = x, filter = W, strides = [1,1,1,1], padding = 'SAME')
    
    def avg_pool_2x2(x):
        return tf.nn.avg_pool(x, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
    
    def xavier_init(size):
        in_dim = size[0]
        xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
        return tf.random_normal(shape=size, stddev=xavier_stddev)
    def sample_z(shape):
        return np.random.uniform(-1., 1., size=shape)
    
    
    def discriminator(x_image):
    #    with tf.variable_scope('discriminator') as scope:
    #        if (reuse):
    #            tf.get_variable_scope().reuse_variables()
        # first conv and pool layers: 
        ## W: [filter_Width, filter_height, filter_channel, filter_numbers]
        ## b:[filter_number]
    #    W_conv1 = tf.get_variable('d_wconv1', shape = [5,5,1,8], initializer = tf.truncated_normal_initializer(stddev = 0.02))
        W_conv1 = tf.Variable(xavier_init([5,5,1,8]))
        b_conv1 = tf.Variable(xavier_init([8]))
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
        h_pool1= avg_pool_2x2(h_conv1)
    
        # second conv and pool layers:
        ## h_pool channel number is 8 
        W_conv2 = tf.Variable(xavier_init([5,5,8,16]))
        b_conv2 = tf.Variable(xavier_init([16]))
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = avg_pool_2x2(h_conv2)
    
        # first fully connected layer
        ## h_pool channel number is 16 and the [weight, width] = [x_image_width, x_image_height] ./ [strides = 2] ./ [strides = 2]
        ## [28,28]./2./2 = [7,7] 
        W_fc1 = tf.Variable(xavier_init([7 * 7 * 16, 32]))
        b_fc1 = tf.Variable(xavier_init([32]))
        ## since the following layer is fully connected, we have to reshape the image to a vector
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*16])
        ## the following is the matrix multiply 
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
        # second fully connected layer
        W_fc2 = tf.Variable(xavier_init([32,1]))
        b_fc2 = tf.Variable(xavier_init([1]))
    
        y_conv = tf.add(tf.matmul(h_fc1, W_fc2), b_fc2)
        return y_conv
    
    # random create a image(in fact, it's a noise)
    x_image = tf.Variable(sample_z([1,28,28,1]),dtype = tf.float32)
    
    D_noise = discriminator(x_image)