自分でGANをまねてピクチャーを生成します

7752 ワード

ネット上の多くのコードはMINISTデータベースを利用しており、生成効果が高い.コードを見ると、その中の操作はすべて全接続の基本であることがわかります.もし自分で3つのチャネルの図を生成するならば、まさか全接続ですか?いけないわけでもないけど.しかし、逆ボリュームという操作はやはり使いたいです.構築の過程で、次元の不整合の問題に遭遇し、主に反ボリュームの理解が不十分であるため、実験を通じて次元の大きさを確認し、証明してから直接構築することができ、以下に自分で変更したコードを記録する.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from PIL import Image
#from tensorflow_backend import *


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

def get_image_matrix():
    #get image
    image_directory = '....'#    
    files = os.listdir(image_directory)
    image_matrix = []
    for file in files:
        if file.split('.')[1] == 'tif':
            print(file)
            img = Image.open(os.path.join(image_directory, file))
            img_resize = img.resize((28, 28))
            img_array = np.asarray(img_resize)
            img_norm = (img_array/255.0)*2 -1
            image_matrix.append(img_norm)
    return np.array(image_matrix)

#testa = get_image_matrix()

def get_next_batch(x, k):
    #get_features for batch
    #len = x.shape[0]
    #list = [i for i in range(len)]
    #ids = np.random.choice(list, k)
    image_batch = x[16*k:16*(k+1),:]
    return image_batch

X = tf.placeholder(tf.float32, shape=[None, 28, 28, 3])

def get_dim_weight(dim, dim21):
    D_W1 = tf.Variable(xavier_init([dim, dim21]))
    D_b1 = tf.Variable(tf.zeros(shape=[dim21]))
    return D_W1, D_b1

D_W1 = tf.Variable(xavier_init([784, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))

D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W2, D_b2]


Z = tf.placeholder(tf.float32, shape=[16, 16])
c = tf.placeholder(tf.float32, shape=[16, 21])

G_W1 = tf.Variable(xavier_init([37, 588]))
G_b1 = tf.Variable(tf.zeros(shape=[588]))

G_W2 = tf.Variable(xavier_init([256, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


Q_W1 = tf.Variable(xavier_init([784, 128]))
Q_b1 = tf.Variable(tf.zeros(shape=[128]))

Q_W2 = tf.Variable(xavier_init([128, 10]))
Q_b2 = tf.Variable(tf.zeros(shape=[10]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def sample_Z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def sample_c(m):
    return np.random.multinomial(1, 21*[1/21.], size=m)

trans_kernel = tf.Variable(tf.random_normal(shape=[3,3,3,1]))
trans_kernel2 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel3 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel4 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel5 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel6 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel7 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
def generator(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    G_h1 = tf.nn.leaky_relu(tf.matmul(inputs, G_W1) + G_b1)
    reshape = tf.reshape(G_h1, [-1, 14, 14, 3])
    dim = reshape.get_shape()[0].value
    trans6 = tf.nn.conv2d_transpose(reshape, trans_kernel6, [16, 28, 28, 3], [1, 2, 2, 1], padding='SAME')
    trans7 = tf.nn.tanh(trans6)
    return trans7

fileter_spec = tf.Variable(tf.random_normal(shape=[5,5,3,64]))
fileter_bias = tf.Variable(tf.zeros(shape=[64]))

fileter_spec2 = tf.Variable(tf.random_normal(shape=[5,5,64,64]))
fileter_bias2 = tf.Variable(tf.zeros(shape=[64]))

D_W1, D_b1 = get_dim_weight(3136, 1)

def discriminator(x):
    conv_1 = tf.nn.conv2d(x, fileter_spec, strides=[1, 1, 1, 1], padding='SAME')
    pre_activate = tf.nn.bias_add(conv_1, fileter_bias)
    conv1_relu = tf.nn.relu(pre_activate)
    pool1 = tf.nn.max_pool(conv1_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
    norm1 = tf.nn.lrn(pool1, 4, 1.0, 0.001/9.0, 0.75)
    # second conv
    conv_2 = tf.nn.conv2d(norm1, fileter_spec2, strides=[1, 1, 1, 1], padding='SAME')
    pre_activate2 = tf.nn.bias_add(conv_2, fileter_bias2)
    conv1_relu2 = tf.nn.relu(pre_activate2)
    pool2 = tf.nn.max_pool(conv1_relu2, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
    norm2 = tf.nn.lrn(pool2, 4, 1.0, 0.001/9.0, 0.75)
    dim = norm2.get_shape()[0].value
    reshape = tf.reshape(norm2, [-1, 3136])
    D_prob = tf.nn.sigmoid(tf.matmul(reshape, D_W1) + D_b1)
    return D_prob

Q_W1, Q_b1 = get_dim_weight(3136, 21)
def Q(x):
    conv_1 = tf.nn.conv2d(x, fileter_spec, strides=[1, 1, 1, 1], padding='SAME')
    pre_activate = tf.nn.bias_add(conv_1, fileter_bias)
    conv1_relu = tf.nn.relu(pre_activate)
    pool1 = tf.nn.max_pool(conv1_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
    norm1 = tf.nn.lrn(pool1, 4, 1.0, 0.001/9.0, 0.75)
    # second conv
    conv_2 = tf.nn.conv2d(norm1, fileter_spec2, strides=[1, 1, 1, 1], padding='SAME')
    pre_activate2 = tf.nn.bias_add(conv_2, fileter_bias2)
    conv1_relu2 = tf.nn.relu(pre_activate2)
    pool2 = tf.nn.max_pool(conv1_relu2, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
    norm2 = tf.nn.lrn(pool2, 4, 1.0, 0.001/9.0, 0.75)
    dim = norm2.get_shape()[0].value
    reshape = tf.reshape(norm2, [-1, 3136])
    Q_prob = tf.nn.softmax(tf.matmul(reshape, Q_W1) + Q_b1)
    return Q_prob


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        sample = (sample + 1)/2
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28, 3))#, cmap='Greys_r'

    return fig


G_sample = generator(Z, c)
D_real = discriminator(X)
D_fake = discriminator(G_sample)
Q_c_given_x = Q(G_sample)

D_loss = -tf.reduce_mean(tf.log(D_real + 1e-8) + tf.log(1 - D_fake + 1e-8))
G_loss = -tf.reduce_mean(tf.log(D_fake + 1e-8))

cross_ent = tf.reduce_mean(-tf.reduce_sum(tf.log(Q_c_given_x + 1e-8) * c, 1))
ent = tf.reduce_mean(-tf.reduce_sum(tf.log(c + 1e-8) * c, 1))
Q_loss = cross_ent + ent

D_solver = tf.train.AdamOptimizer().minimize(D_loss)
G_solver = tf.train.AdamOptimizer().minimize(G_loss)
Q_solver = tf.train.AdamOptimizer().minimize(Q_loss)

mb_size = 16
Z_dim = 16

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out2/'):
    os.makedirs('out2/')
#from tqdm import  tqdm
i = 0
X_all = get_image_matrix()
for it in range(1000000):
    if it % 1000 == 0:
        Z_noise = sample_Z(16, Z_dim)#

        idx = 1#np.random.randint(0, 10)
        c_noise = np.zeros([16, 21])#21+16=37
        c_noise[range(16), idx] = 1

        samples = sess.run(G_sample,
                           feed_dict={Z: Z_noise, c: c_noise})

        fig = plot(samples)
        plt.savefig('out2/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)
    for b_i in range(int(2100/16)):
        X_mb = get_next_batch(X_all, b_i)
        Z_noise = sample_Z(mb_size, Z_dim)
        c_noise = sample_c(mb_size)

        _, D_loss_curr = sess.run([D_solver, D_loss],
                              feed_dict={X: X_mb, Z: Z_noise, c: c_noise})

        _, G_loss_curr = sess.run([G_solver, G_loss],
                              feed_dict={Z: Z_noise, c: c_noise})

        sess.run([Q_solver], feed_dict={Z: Z_noise, c: c_noise})

    if it % 100 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()