コード学習(2):Unsupervised Monocular Depth Estimation with Left-Right Consistency
29298 ワード
コード学習ノート
Unsupervised Monocular Depth Estimation with Left-Right Consistency :monodepth_dataloader.pyソース:monodepth
Unsupervised Monocular Depth Estimation with Left-Right Consistency :monodepth_dataloader.pyソース:monodepth
"""
song
stay hungry stay foolish
"""
from __future__ import absolute_import, division, print_function
import tensorflow as tf
def string_length_tf(t): # tensor
return tf.py_func(len, [t], [tf.int64]) # tf.py_func tensor
class MonodepthDataloader(object): # MonodepthDataloader
"""monodepth dataloader"""
def __init__(self, data_path, filenames_file, params, dataset, mode):
self.data_path = data_path #
self.params = params #
self.dataset = dataset #
self.mode = mode #
self.left_image_batch = None #
self.right_image_batch = None
input_queue = tf.train.string_input_producer([filenames_file], shuffle=False)
"""
。 KITTI (queue)
Tip: shuffle , TRUE 。 input
"""
line_reader = tf.TextLineReader() # TextLineReader
_, line = line_reader.read(input_queue) #
"""
:
key:
b'kitti_train_files.txt:11987'
value: ,
b'2011_09_30/2011_09_30_drive_0033_sync/image_02/data/0000001585.jpg 2011_09_30/2011_09_30_drive_0033_sync/image_03/data/0000001585.jpg'
"""
split_line = tf.string_split([line]).values #
# we load only one image for test, except if we trained a stereo model
if mode == 'test' and not self.params.do_stereo: #
left_image_path = tf.string_join([self.data_path, split_line[0]])
left_image_o = self.read_image(left_image_path) #
else: # stereo
left_image_path = tf.string_join([self.data_path, split_line[0]])
right_image_path = tf.string_join([self.data_path, split_line[1]])
left_image_o = self.read_image(left_image_path)
right_image_o = self.read_image(right_image_path)
if mode == 'train': #
# randomly flip images #
do_flip = tf.random_uniform([], 0, 1)
left_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(right_image_o), lambda: left_image_o)
right_image = tf.cond(do_flip > 0.5, lambda: tf.image.flip_left_right(left_image_o), lambda: right_image_o)
# randomly augment images #
do_augment = tf.random_uniform([], 0, 1)
left_image, right_image = tf.cond(do_augment > 0.5, lambda: self.augment_image_pair(left_image, right_image), lambda: (left_image, right_image))
left_image.set_shape( [None, None, 3])
right_image.set_shape([None, None, 3])
"""
set_shpape reshape
set_shape placeholder shape
reshape shape
"""
# capacity = min_after_dequeue + (num_threads + a small safety margin) * batch_size
min_after_dequeue = 2048
capacity = min_after_dequeue + 4 * params.batch_size
self.left_image_batch, self.right_image_batch = tf.train.shuffle_batch([left_image, right_image],
params.batch_size, capacity, min_after_dequeue, params.num_threads)
# batch tensor
"""
def shuffle_batch(tensors: Any, #
batch_size: Any, # tensor
capacity: {__sub__}, #
capacity=(min_after_dequeue+(num_threads+a small safety margin∗batchsize)
min_after_dequeue: Any, # , , .
, ,
num_threads: int = 1, #
seed: Any = None,
enqueue_many: bool = False,
shapes: Any = None,
allow_smaller_final_batch: bool = False,
shared_name: Any = None,
name: Any = None)
"""
elif mode == 'test': #
self.left_image_batch = tf.stack([left_image_o, tf.image.flip_left_right(left_image_o)], 0)
"""
left_image_o tensor , 。 0 , [ 512,512,3 ]
"""
self.left_image_batch.set_shape( [2, None, None, 3]) # batch 。
if self.params.do_stereo: # ,
self.right_image_batch = tf.stack([right_image_o, tf.image.flip_left_right(right_image_o)], 0)
self.right_image_batch.set_shape( [2, None, None, 3])
def augment_image_pair(self, left_image, right_image): #
# randomly shift gamma
random_gamma = tf.random_uniform([], 0.8, 1.2)
left_image_aug = left_image ** random_gamma # (0.8,1.2)
right_image_aug = right_image ** random_gamma
# randomly shift brightness
random_brightness = tf.random_uniform([], 0.5, 2.0)
left_image_aug = left_image_aug * random_brightness #
right_image_aug = right_image_aug * random_brightness
# randomly shift color
random_colors = tf.random_uniform([3], 0.8, 1.2)
white = tf.ones([tf.shape(left_image)[0], tf.shape(left_image)[1]]) # tf.ones 0 [left_image]
color_image = tf.stack([white * random_colors[i] for i in range(3)], axis=2) #
left_image_aug *= color_image
right_image_aug *= color_image # color
# saturate
left_image_aug = tf.clip_by_value(left_image_aug, 0, 1)
right_image_aug = tf.clip_by_value(right_image_aug, 0, 1) # (0,1)
return left_image_aug, right_image_aug
def read_image(self, image_path): # decode image
# tf.decode_image does not return the image size, this is an ugly workaround to handle both jpeg and png
path_length = string_length_tf(image_path)[0]
file_extension = tf.substr(image_path, path_length - 3, 3) #
file_cond = tf.equal(file_extension, 'jpg') # (TRUE or FALSE), jpg
image = tf.cond(file_cond, lambda: tf.image.decode_jpeg(tf.read_file(image_path)), lambda: tf.image.decode_png(tf.read_file(image_path)))
#
# if the dataset is cityscapes, we crop the last fifth to remove the car hood
if self.dataset == 'cityscapes':
o_height = tf.shape(image)[0]
crop_height = (o_height * 4) // 5
image = image[:crop_height,:,:] # cityscapes 4/5
image = tf.image.convert_image_dtype(image, tf.float32) #
image = tf.image.resize_images(image, [self.params.height, self.params.width], tf.image.ResizeMethod.AREA)
# resize 256 * 512
return image
"""
monodepthload.py :
Train :
== left_image_o == left_image( ) ( set_shape, ) == left_image_batch( )
Test:
== left_image_batch ( , , )
"""