25513 ワード
注意事項:コードの要求に従って、VGG-19モデルとトレーニングセットをダウンロードしてください.実行ダウンロードが遅いです. MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat' DATA_URL = 'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip' コードは修正を経てTensorFlow 1で実行できる.4上面 訓練モデルはpython FCNを実行するだけである.py 学習率1 e-5を修正してさらに小さくしないとlossは3程度フローティング になります. debugフラグは、トレーニング中に設定され、アクティブ化関数、勾配、変数などの情報を追加することができる.



# coding=utf-8
from __future__ import print_function
import tensorflow as tf
import numpy as np
import TensorflowUtils as utils
import read_MITSceneParsingData as scene_parsing
import datetime
import BatchDatsetReader as dataset
from six.moves import xrange
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer("batch_size", "2", "batch size for training")
tf.flags.DEFINE_string("logs_dir", "logs/", "path to logs directory")
tf.flags.DEFINE_string("data_dir", "Data_zoo/MIT_SceneParsing/", "path to dataset")
tf.flags.DEFINE_float("learning_rate", "1e-6", "Learning rate for Adam Optimizer")
tf.flags.DEFINE_string("model_dir", "Model_zoo/", "Path to vgg model mat")
tf.flags.DEFINE_bool('debug', "True", "Debug mode: True/ False")
tf.flags.DEFINE_string('mode', "train", "Mode train/ test/ visualize")
MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat'
NUM_OF_CLASSESS = 151 # 151
IMAGE_SIZE = 224 # 224
fine_tuning = False
# VGG ,weights , image
def vgg_net(weights, image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4'
net = {}
current = image #
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") # conv1_1_w
bias = utils.get_variable(bias.reshape(-1), name=name + "_b") # conv1_1_b
current = utils.conv2d_basic(current, kernels, bias) # current
elif kind == 'relu':
current = tf.nn.relu(current, name=name) # relu1_1
if FLAGS.debug: # debug true / false
utils.add_activation_summary(current) #
elif kind == 'pool':
# vgg 5 stride 2, 5 size 1
# 4 stride,
# 5 pool ,
# pool1 size 2
# pool2 size 4
# pool3 size 8
# pool4 size 16
current = utils.avg_pool_2x2(current)
net[name] = current # net ,
return net
# ,image ,keep_prob dropout
def inference(image, keep_prob):
Semantic segmentation network definition #
:param image: input image. Should have values in range 0-255
:param keep_prob:
print("setting up vgg initialized conv layers ...")
# model_dir Model_zoo/
model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) # VGG19
mean = model_data['normalization'][0][0][0] #
mean_pixel = np.mean(mean, axis=(0, 1)) # RGB
weights = np.squeeze(model_data['layers']) # VGG , 1
processed_image = utils.process_image(image, mean_pixel) #
with tf.variable_scope("inference"): # inference
image_net = vgg_net(weights, processed_image) # ,
conv_final_layer = image_net["conv5_3"] #
pool5 = utils.max_pool_2x2(conv_final_layer) # /32 32
W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") # 6 w b
b6 = utils.bias_variable([4096], name="b6")
conv6 = utils.conv2d_basic(pool5, W6, b6)
relu6 = tf.nn.relu(conv6, name="relu6")
if FLAGS.debug:
relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)
W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") # 7
b7 = utils.bias_variable([4096], name="b7")
conv7 = utils.conv2d_basic(relu_dropout6, W7, b7)
relu7 = tf.nn.relu(conv7, name="relu7")
if FLAGS.debug:
relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)
W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8")
b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")
conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # 8 151
# annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")
# now to upscale to actual image size
deconv_shape1 = image_net["pool4"].get_shape() # pool4 1/16 [b,h,w,c]
# W,B [H, W, OUTC, INC] pool4 , conv8
# stride = 2 kernel_size = 4
W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
# conv8 , , pool4
conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))
fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") #
# pool3 1/8
deconv_shape2 = image_net["pool3"].get_shape()
# pool3 , pool4
W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
# fuse_1 , pool3
conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"]))
# deconv(fuse_1) + pool3
fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")
shape = tf.shape(image) #
# , ,[b, H, W, ]
deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])
# w[8 ks=16, , pool3 ]
W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3")
b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")
# ,fuse_2 , [b, H, W, ]
conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)
# conv_t3 size size,
# , 3 ( ) argmax
# 21 ,
# , shape=[b,h,w]
annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction")
# [b,h,w,c] c=1, conv_t3 21
return tf.expand_dims(annotation_pred, dim=3), conv_t3
def train(loss_val, var_list):
:param loss_val:
:param var_list:
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
if FLAGS.debug:
# print(len(var_list))
for grad, var in grads:
utils.add_gradient_summary(grad, var)
return optimizer.apply_gradients(grads) #
def main(argv=None):
# dropout
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
# batch [b,h,w,c=1] [b,h,w,c=151]
pred_annotation, logits = inference(image, keep_probability)
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
# [b,h,w,c=151] labels[b,h,w]
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
tf.summary.scalar("entropy", loss)
trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
train_op = train(loss, trainable_var)
print("Setting up summary op...")
summary_op = tf.summary.merge_all()
print("Setting up image reader...")
# data_dir = Data_zoo/MIT_SceneParsing/
# training: [{image: , annotation: , filename: }] [{}][{}]
train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records)) #
print("Setting up dataset reader")
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
if FLAGS.mode == 'train':
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)
sess = tf.Session()
print("Setting up Saver...")
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)
# logs/
if fine_tuning:
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) #
if ckpt and ckpt.model_checkpoint_path: # checkpoint sess
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")
if FLAGS.mode == "train":
for itr in range(MAX_ITERATION):
# batch
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}
sess.run(train_op, feed_dict=feed_dict)
if itr % 10 == 0:
# 10
train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
summary_writer.add_summary(summary_str, itr)
if itr % 500 == 0:
# 500
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
elif FLAGS.mode == "visualize":
valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size)
# pred_annotation
pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
valid_annotations = np.squeeze(valid_annotations, axis=3)
pred = np.squeeze(pred, axis=3)
for itr in range(FLAGS.batch_size):
utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr))
print("Saved image: %d" % itr)
if __name__ == "__main__":
# coding=utf-8
__author__ = 'charlie'
import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob
import TensorflowUtils as utils
# DATA_URL = 'http://sceneparsing.csail.mit.edu/data/ADEChallengeData2016.zip'
DATA_URL = 'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip'
def read_dataset(data_dir):
# data_dir = Data_zoo / MIT_SceneParsing /
pickle_filename = "MITSceneParsing.pickle"
# Data_zoo / MIT_SceneParsing / MITSceneParsing.pickle
pickle_filepath = os.path.join(data_dir, pickle_filename)
if not os.path.exists(pickle_filepath):
utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True) #
SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] # ADEChallengeData2016
# result = {training: [{image: , annotation: , filename: }] [][]
# validation:[{image: , annotation: , filename: }] [] []}
result = create_image_lists(os.path.join(data_dir, SceneParsing_folder)) # Data_zoo / MIT_SceneParsing / ADEChallengeData2016
print ("Pickling ...") # pickle
with open(pickle_filepath, 'wb') as f:
pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
print ("Found pickle file!")
with open(pickle_filepath, 'rb') as f: # pickle
result = pickle.load(f) #
training_records = result['training']
validation_records = result['validation']
del result
# training: [{image: , annotation: , filename: }] [{}][{}]
return training_records, validation_records
def create_image_lists(image_dir):
:param image_dir: Data_zoo / MIT_SceneParsing / ADEChallengeData2016
if not gfile.Exists(image_dir):
print("Image directory '" + image_dir + "' not found.")
return None
directories = ['training', 'validation']
image_list = {} # training:[] validation:[]
for directory in directories: #
file_list = []
image_list[directory] = []
# Data_zoo/MIT_SceneParsing/ADEChallengeData2016/images/training/*.jpg
file_glob = os.path.join(image_dir, "images", directory, '*.' + 'jpg')
# + Data_zoo/MIT_SceneParsing/ADEChallengeData2016/images/training/hi.jpg
if not file_list: #
print('No files found')
for f in file_list: # f
# hi
filename = os.path.splitext(f.split("/")[-1])[0]
# Data_zoo/MIT_SceneParsing/ADEChallengeData2016/annotations/training/*.png
annotation_file = os.path.join(image_dir, "annotations", directory, filename + '.png')
if os.path.exists(annotation_file): #
# image: , annotation: , filename:
record = {'image': f, 'annotation': annotation_file, 'filename': filename}
# image_list{training:[{image: , annotation: , filename: }] [] []
# validation:[{image: , annotation: , filename: }] [] []}
print("Annotation file not found for %s - Skipping" % filename)
no_of_images = len(image_list[directory]) #
print ('No. of %s files: %d' % (directory, no_of_images))
return image_list
# coding=utf-8
__author__ = 'Charlie'
# Utils used with tensorflow implemetation
import tensorflow as tf
import numpy as np
import scipy.misc as misc
import os, sys
from six.moves import urllib
import tarfile
import zipfile
import scipy.io
def get_model_data(dir_path, model_url):
# model_dir Model_zoo/
maybe_download_and_extract(dir_path, model_url) # ,
filename = model_url.split("/")[-1] # url / ,
filepath = os.path.join(dir_path, filename) # dir_path/filename
if not os.path.exists(filepath): #
raise IOError("VGG Model not found!")
data = scipy.io.loadmat(filepath) # io VGG.mat
return data
def maybe_download_and_extract(dir_path, url_name, is_tarfile=False, is_zipfile=False):
# dir_path Model_zoo/
# url_name VGG19
if not os.path.exists(dir_path): # ,
filename = url_name.split('/')[-1] # url / ,
filepath = os.path.join(dir_path, filename) # = dir_path/filename
if not os.path.exists(filepath): # ( ), ,
def _progress(count, block_size, total_size): #
'\r>> Downloading %s %.1f%%' % (filename, float(count * block_size) / float(total_size) * 100.0))
filepath, _ = urllib.request.urlretrieve(url_name, filepath, reporthook=_progress) # url filepath
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
if is_tarfile: # tar ,
tarfile.open(filepath, 'r:gz').extractall(dir_path)
elif is_zipfile: # zip
with zipfile.ZipFile(filepath) as zf:
zip_dir = zf.namelist()[0]
# coding=utf-8
Code ideas from https://github.com/Newmu/dcgan and tensorflow mnist dataset reader
import numpy as np
import scipy.misc as misc
class BatchDatset:
files = []
images = []
annotations = []
image_options = {}
batch_offset = 0
epochs_completed = 0
def __init__(self, records_list, image_options={}):
Intialize a generic file reader with batching for list of files
:param records_list: list of file records to read -
sample record: {'image': f, 'annotation': annotation_file, 'filename': filename}
:param image_options: A dictionary of options for modifying the output image
Available options:
resize = True/ False
resize_size = #size of output image - does bilinear resize
print("Initializing Batch Dataset Reader...")
self.files = records_list #
self.image_options = image_options # resize 224
def _read_images(self):
self.__channels = True
# files image
# , RGB
self.images = np.array([self._transform(filename['image']) for filename in self.files])
self.__channels = False
# files annotation
# ,
self.annotations = np.array(
[np.expand_dims(self._transform(filename['annotation']), axis=3) for filename in self.files])
print (self.images.shape)
print (self.annotations.shape)
def _transform(self, filename):
image = misc.imread(filename)
if self.__channels and len(image.shape) < 3: # make sure images are of shape(h,w,3)
image = np.array([image for i in range(3)])
if self.image_options.get("resize", False) and self.image_options["resize"]:
resize_size = int(self.image_options["resize_size"])
# resize
resize_image = misc.imresize(image,
[resize_size, resize_size], interp='nearest')
resize_image = image
return np.array(resize_image) # resize
def get_records(self):
return self.images, self.annotations
def reset_batch_offset(self, offset=0):
:param offset:
self.batch_offset = offset
def next_batch(self, batch_size):
# batch
start = self.batch_offset
# batch offset +batch_size
self.batch_offset += batch_size
# iamges images.shape(len, h, w)
if self.batch_offset > self.images.shape[0]: # batch epoch
# Finished epoch
self.epochs_completed += 1 # epochs +1
print("****************** Epochs completed: " + str(self.epochs_completed) + "******************")
# Shuffle the data
perm = np.arange(self.images.shape[0]) # arange (0 - len-1)
np.random.shuffle(perm) #
self.images = self.images[perm] #
self.annotations = self.annotations[perm]
# Start next epoch
start = 0 # epoch 0
self.batch_offset = batch_size # batch
end = self.batch_offset # self.batch_offset self.batch_offset+batch_size
return self.images[start:end], self.annotations[start:end] # batch
def get_random_batch(self, batch_size):
# batch_size ,
indexes = np.random.randint(0, self.images.shape[0], size=[batch_size]).tolist()
return self.images[indexes], self.annotations[indexes]