自分でGANをまねてピクチャーを生成します
7752 ワード
ネット上の多くのコードはMINISTデータベースを利用しており、生成効果が高い.コードを見ると、その中の操作はすべて全接続の基本であることがわかります.もし自分で3つのチャネルの図を生成するならば、まさか全接続ですか?いけないわけでもないけど.しかし、逆ボリュームという操作はやはり使いたいです.構築の過程で、次元の不整合の問題に遭遇し、主に反ボリュームの理解が不十分であるため、実験を通じて次元の大きさを確認し、証明してから直接構築することができ、以下に自分で変更したコードを記録する.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from PIL import Image
#from tensorflow_backend import *
def xavier_init(size):
in_dim = size[0]
xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
return tf.random_normal(shape=size, stddev=xavier_stddev)
def get_image_matrix():
#get image
image_directory = '....'#
files = os.listdir(image_directory)
image_matrix = []
for file in files:
if file.split('.')[1] == 'tif':
print(file)
img = Image.open(os.path.join(image_directory, file))
img_resize = img.resize((28, 28))
img_array = np.asarray(img_resize)
img_norm = (img_array/255.0)*2 -1
image_matrix.append(img_norm)
return np.array(image_matrix)
#testa = get_image_matrix()
def get_next_batch(x, k):
#get_features for batch
#len = x.shape[0]
#list = [i for i in range(len)]
#ids = np.random.choice(list, k)
image_batch = x[16*k:16*(k+1),:]
return image_batch
X = tf.placeholder(tf.float32, shape=[None, 28, 28, 3])
def get_dim_weight(dim, dim21):
D_W1 = tf.Variable(xavier_init([dim, dim21]))
D_b1 = tf.Variable(tf.zeros(shape=[dim21]))
return D_W1, D_b1
D_W1 = tf.Variable(xavier_init([784, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))
D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))
theta_D = [D_W2, D_b2]
Z = tf.placeholder(tf.float32, shape=[16, 16])
c = tf.placeholder(tf.float32, shape=[16, 21])
G_W1 = tf.Variable(xavier_init([37, 588]))
G_b1 = tf.Variable(tf.zeros(shape=[588]))
G_W2 = tf.Variable(xavier_init([256, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))
theta_G = [G_W1, G_W2, G_b1, G_b2]
Q_W1 = tf.Variable(xavier_init([784, 128]))
Q_b1 = tf.Variable(tf.zeros(shape=[128]))
Q_W2 = tf.Variable(xavier_init([128, 10]))
Q_b2 = tf.Variable(tf.zeros(shape=[10]))
theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]
def sample_Z(m, n):
return np.random.uniform(-1., 1., size=[m, n])
def sample_c(m):
return np.random.multinomial(1, 21*[1/21.], size=m)
trans_kernel = tf.Variable(tf.random_normal(shape=[3,3,3,1]))
trans_kernel2 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel3 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel4 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel5 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel6 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
trans_kernel7 = tf.Variable(tf.random_normal(shape=[3,3,3,3]))
def generator(z, c):
inputs = tf.concat(axis=1, values=[z, c])
G_h1 = tf.nn.leaky_relu(tf.matmul(inputs, G_W1) + G_b1)
reshape = tf.reshape(G_h1, [-1, 14, 14, 3])
dim = reshape.get_shape()[0].value
trans6 = tf.nn.conv2d_transpose(reshape, trans_kernel6, [16, 28, 28, 3], [1, 2, 2, 1], padding='SAME')
trans7 = tf.nn.tanh(trans6)
return trans7
fileter_spec = tf.Variable(tf.random_normal(shape=[5,5,3,64]))
fileter_bias = tf.Variable(tf.zeros(shape=[64]))
fileter_spec2 = tf.Variable(tf.random_normal(shape=[5,5,64,64]))
fileter_bias2 = tf.Variable(tf.zeros(shape=[64]))
D_W1, D_b1 = get_dim_weight(3136, 1)
def discriminator(x):
conv_1 = tf.nn.conv2d(x, fileter_spec, strides=[1, 1, 1, 1], padding='SAME')
pre_activate = tf.nn.bias_add(conv_1, fileter_bias)
conv1_relu = tf.nn.relu(pre_activate)
pool1 = tf.nn.max_pool(conv1_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, 1.0, 0.001/9.0, 0.75)
# second conv
conv_2 = tf.nn.conv2d(norm1, fileter_spec2, strides=[1, 1, 1, 1], padding='SAME')
pre_activate2 = tf.nn.bias_add(conv_2, fileter_bias2)
conv1_relu2 = tf.nn.relu(pre_activate2)
pool2 = tf.nn.max_pool(conv1_relu2, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
norm2 = tf.nn.lrn(pool2, 4, 1.0, 0.001/9.0, 0.75)
dim = norm2.get_shape()[0].value
reshape = tf.reshape(norm2, [-1, 3136])
D_prob = tf.nn.sigmoid(tf.matmul(reshape, D_W1) + D_b1)
return D_prob
Q_W1, Q_b1 = get_dim_weight(3136, 21)
def Q(x):
conv_1 = tf.nn.conv2d(x, fileter_spec, strides=[1, 1, 1, 1], padding='SAME')
pre_activate = tf.nn.bias_add(conv_1, fileter_bias)
conv1_relu = tf.nn.relu(pre_activate)
pool1 = tf.nn.max_pool(conv1_relu, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, 1.0, 0.001/9.0, 0.75)
# second conv
conv_2 = tf.nn.conv2d(norm1, fileter_spec2, strides=[1, 1, 1, 1], padding='SAME')
pre_activate2 = tf.nn.bias_add(conv_2, fileter_bias2)
conv1_relu2 = tf.nn.relu(pre_activate2)
pool2 = tf.nn.max_pool(conv1_relu2, [1, 3, 3, 1], [1, 2, 2, 1], padding='SAME')
norm2 = tf.nn.lrn(pool2, 4, 1.0, 0.001/9.0, 0.75)
dim = norm2.get_shape()[0].value
reshape = tf.reshape(norm2, [-1, 3136])
Q_prob = tf.nn.softmax(tf.matmul(reshape, Q_W1) + Q_b1)
return Q_prob
def plot(samples):
fig = plt.figure(figsize=(4, 4))
gs = gridspec.GridSpec(4, 4)
gs.update(wspace=0.05, hspace=0.05)
for i, sample in enumerate(samples):
sample = (sample + 1)/2
ax = plt.subplot(gs[i])
plt.axis('off')
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_aspect('equal')
plt.imshow(sample.reshape(28, 28, 3))#, cmap='Greys_r'
return fig
G_sample = generator(Z, c)
D_real = discriminator(X)
D_fake = discriminator(G_sample)
Q_c_given_x = Q(G_sample)
D_loss = -tf.reduce_mean(tf.log(D_real + 1e-8) + tf.log(1 - D_fake + 1e-8))
G_loss = -tf.reduce_mean(tf.log(D_fake + 1e-8))
cross_ent = tf.reduce_mean(-tf.reduce_sum(tf.log(Q_c_given_x + 1e-8) * c, 1))
ent = tf.reduce_mean(-tf.reduce_sum(tf.log(c + 1e-8) * c, 1))
Q_loss = cross_ent + ent
D_solver = tf.train.AdamOptimizer().minimize(D_loss)
G_solver = tf.train.AdamOptimizer().minimize(G_loss)
Q_solver = tf.train.AdamOptimizer().minimize(Q_loss)
mb_size = 16
Z_dim = 16
sess = tf.Session()
sess.run(tf.global_variables_initializer())
if not os.path.exists('out2/'):
os.makedirs('out2/')
#from tqdm import tqdm
i = 0
X_all = get_image_matrix()
for it in range(1000000):
if it % 1000 == 0:
Z_noise = sample_Z(16, Z_dim)#
idx = 1#np.random.randint(0, 10)
c_noise = np.zeros([16, 21])#21+16=37
c_noise[range(16), idx] = 1
samples = sess.run(G_sample,
feed_dict={Z: Z_noise, c: c_noise})
fig = plot(samples)
plt.savefig('out2/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
i += 1
plt.close(fig)
for b_i in range(int(2100/16)):
X_mb = get_next_batch(X_all, b_i)
Z_noise = sample_Z(mb_size, Z_dim)
c_noise = sample_c(mb_size)
_, D_loss_curr = sess.run([D_solver, D_loss],
feed_dict={X: X_mb, Z: Z_noise, c: c_noise})
_, G_loss_curr = sess.run([G_solver, G_loss],
feed_dict={Z: Z_noise, c: c_noise})
sess.run([Q_solver], feed_dict={Z: Z_noise, c: c_noise})
if it % 100 == 0:
print('Iter: {}'.format(it))
print('D loss: {:.4}'. format(D_loss_curr))
print('G_loss: {:.4}'.format(G_loss_curr))
print()