pix 2 pixHDまとめ

20812 ワード

pix 2 pixHDまとめ
 
deeplab部分学習参考DeepLabソース分析のdeeplab_demo.ipynb、そして部分的な修正をして適応します
論文:pix 2 pixHDコード:GitHub
 
1.サンプルデータをテストし、ダウンロードしたGネットワークを使用して、./scripts内スクリプト
Datasetsフォルダには、Cityscapesテスト画像の例があります.
#!./scripts/test_1024p.sh

python test.py --name label2city_1024p --netG local --ngf 32 --resize_or_crop none

テスト結果はhtmlファイルに保存されます:./results/label2city_1024p/test_latest/index.html内
 
2.トレーニングサンプルデータ、参照./scripts内スクリプト
訓練解像度1024 x 512のモデル
#!./scripts/train_512p.sh

python train.py --name label2city_512p

train結果を表示するには、中間結果を表示します./checkpoints/label2city_512p/web/index.html.
tensorflowがインストールされている場合は./checkpoints/label2city_512 p/logsを追加--tf_logからtrainスクリプトにtensorboardログインを表示
ファイルディレクトリの下でtensorboard--logdir=logsを実行してtensorboardを開きます.
 
マルチGPUによるtrain
#!./scripts/train_512p_multigpu.sh

python train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7

--batchSizeサイズと--gpu_ids数は同じ
 
3.自己構築データセット(deeplabによる画像の意味分割を含む)
コード:GitHub
     models:Checkpoints and frozen inference graphs.
上記リンクからソースコードおよびモデルをダウンロードして実行
# deeplab_demo_test.py

import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib

from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import datetime

import tensorflow as tf
from deeplab_demo import *

LABEL_NAMES = np.asarray([
    'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
    'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
    'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
])

FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)

pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_dm05_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_dm05_pascal_trainaug/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_ade20k_train/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_xception_ade20k_train/frozen_inference_graph.pb'

MODEL = DeepLabModel(pb_path)
print('model loaded successfully!')

# global starttime

for num in range(0,2):
	starttime = datetime.datetime.now()

	# IMAGE_PATH = 'E:/data/img/20190522/img/image%d.jpg' % num
	# OUT_PATH = 'E:/data/img/20190522/seg_img/seg_image%d.png' % num
	IMAGE_PATH = 'E:/data/img/test/img/image%d.jpg' % num
	OUT_PATH = 'E:/data/img/test/seg_map/seg_image%d.png' % num
	# print(IMAGE_PATH)
	path = IMAGE_PATH

	try:
		oringnal_im = Image.open(path)
		print('running deeplab on image %s...' % path)
		# starttime = datetime.datetime.now()
		resized_im, seg_map = MODEL.run(oringnal_im)
	except IOError:
		print('Cannot retrieve image. Please check path: ' + path)

	# endtime = datetime.datetime.now()
	# print (endtime - starttime)

	seg_image = label_to_color_image(seg_map).astype(np.uint8)
	# im = Image.fromarray(seg_image)
	im = Image.fromarray(seg_map.astype(np.uint8))
	im.save(OUT_PATH)

および
# -*- coding: utf-8 -*-
"""
DeepLab Demo.ipynb
https://blog.csdn.net/lifengcai_/article/details/80270409
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/github/tensorflow/models/blob/master/research/deeplab/deeplab_demo.ipynb
# DeepLab Demo
This demo will demostrate the steps to run deeplab semantic segmentation model on sample input images.
"""

#@title Imports
import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib

from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import datetime

import tensorflow as tf


#@title Helper methods
global starttime

class DeepLabModel(object):
	"""Class to load deeplab model and run inference."""

	INPUT_TENSOR_NAME = 'ImageTensor:0'
	OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
	INPUT_SIZE = 512
	FROZEN_GRAPH_NAME = 'frozen_inference_graph'

	def __init__(self, pb_path):
		"""Creates and loads pretrained deeplab model."""
		self.graph = tf.Graph()
		graph_def = None
		graph_def = tf.GraphDef.FromString(open(pb_path, 'rb').read())#change1:input frozen_inference_graph.pb
		if graph_def is None:
			raise RuntimeError('Cannot find inference graph in tar archive.')
		with self.graph.as_default():
			tf.import_graph_def(graph_def, name='')
		self.sess = tf.Session(graph=self.graph)

	def run(self, image):
		"""
		Runs inference on a single image.

		Args:
			image: A PIL.Image object, raw input image.
		Returns:
			resized_image: RGB image resized from original input image.
			seg_map: Segmentation map of `resized_image`.
		"""
		#  resize 2019 5 21 09:43:40
		width, height = image.size
		# print(width, height)
		resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
		# print(resize_ratio)
		# target_size = (int(resize_ratio * width), int(resize_ratio * height))
		target_size = (512,256)
		# print(target_size)
		resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
		# resized_image = image.convert('RGB')
		batch_seg_map = self.sess.run(
			self.OUTPUT_TENSOR_NAME,
			feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
		seg_map = batch_seg_map[0]
		return resized_image, seg_map

def create_pascal_label_colormap():
	"""
	Creates a label colormap used in PASCAL VOC segmentation benchmark.
	Returns:
		A Colormap for visualizing segmentation results.
	"""
	colormap = np.zeros((256, 3), dtype=int)
	ind = np.arange(256, dtype=int)

	for shift in reversed(range(8))	
	for channel in range(3):
			colormap[:, channel] |= ((ind >> channel) & 1) << shift
		ind >>= 3

	return colormap

def label_to_color_image(label):
	"""
	Adds color defined by the dataset colormap to the label.
	Args:
		label: A 2D array with integer type, storing the segmentation label.
	Returns:
		result: A 2D array with floating type. The element of the array
		is the color indexed by the corresponding element in the input label
		to the PASCAL color map.
	Raises:
		ValueError: If label is not of rank 2 or its value is larger than color
		map maximum entry.
	"""
	if label.ndim != 2:
		raise ValueError('Expect 2-D input label')

	colormap = create_pascal_label_colormap()

	if np.max(label) >= len(colormap):
		raise ValueError('label value too large.')

	return colormap[label]

def vis_segmentation(image, seg_map):
	"""Visualizes input image, segmentation map and overlay view."""
	plt.figure(figsize=(15, 5))
	grid_spec = gridspec.GridSpec(1, 4, width_ratios=[6, 6, 6, 1])

	plt.subplot(grid_spec[0])
	plt.imshow(image)
	plt.axis('off')
	plt.title('input image')

	plt.subplot(grid_spec[1])
	seg_image = label_to_color_image(seg_map).astype(np.uint8)
	plt.imshow(seg_image)
	plt.axis('off')
	plt.title('segmentation map')

	plt.subplot(grid_spec[2])
	plt.imshow(image)
	plt.imshow(seg_image, alpha=0.7)
	plt.axis('off')
	plt.title('segmentation overlay')

	unique_labels = np.unique(seg_map)
	ax = plt.subplot(grid_spec[3])
	plt.imshow(FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation='nearest')
	ax.yaxis.tick_right()
	plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])
	plt.xticks([], [])
	ax.tick_params(width=0.0)
	plt.grid('off')#Turn off axis
	plt.show()
	#image.save('C:/image1.png')
	im = Image.fromarray(seg_image)
	im.save('E:/data/img/seg_img/seg_image1.png')

# LABEL_NAMES = np.asarray([
#     'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
#     'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
#     'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
# ])

# FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
# FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)

# # pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# MODEL = DeepLabModel(pb_path)
# print('model loaded successfully!')


"""
Run on sample images
Select one of sample images (leave `IMAGE_URL` empty) or feed any internet image url for inference.
Note that we are using single scale inference in the demo for fast computation, so the results may slightly differ from the visualizations in
[README](https://github.com/tensorflow/models/blob/master/research/deeplab/README.md),
which uses multi-scale and left-right flipped inputs.
"""

def run_visualization(path):
	global starttime
	"""Inferences DeepLab model and visualizes result."""
	try:
		oringnal_im = Image.open(path)
		print('running deeplab on image %s...' % path)
		# starttime = datetime.datetime.now()
		resized_im, seg_map = MODEL.run(oringnal_im)
	except IOError:
		print('Cannot retrieve image. Please check path: ' + path)
		return

	vis_segmentation(resized_im, seg_map)

# IMAGE_PATH = 'E:/data/img/img/image126.jpg'
# run_visualization(IMAGE_PATH)
# endtime = datetime.datetime.now()
# print (endtime - starttime)

PS:instance mapを作成して同類の異なる個体を区別することもできます
 
4.符号化特徴encode_features
特徴図を予測するクラスタリングして生成する.npyのファイル、後続の読み取り用
python encode_features.py --name butel_data20190516_feat_20190523 --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190516_feat_20190523

5.推定特徴図precompute_feature_maps
予測フィーチャーマップと保存
python precompute_feature_maps.py --name butel_20190522_feat --dataroot ./datasets/train_20190520

 
6.自己構築データセットのトレーニングテスト
トレーニング
python train.py --name butel_data20190522_feat_20190524 --instance_feat --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190522_feat_20190524 --gpu_ids 0,1 --batchSize 2 --tf_log --load_pretrain /home/yangd/work/python/pix2pixHD_yangd/checkpoints/butel_data20190516_feat_20190523 --niter 300 --niter_decay 300

    
テスト
python test.py --name butel_data20190522_feat_20190524 --instance_feat --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190522_feat_20190524 --use_encoded_image


共通パラメータ
--name

--gpu_ids

--checkpoints_dir

--batchSize

--label_nc

--dataroot

--tf_log

--no_instance

--instance_feat

--results_dir

--how_many

--use_encoded_image

 
ふろく
    
パラメータのまとめ:
  • base_options

  •       
      # experiment specifics
    
        '--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models'        
    
        '--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU'
    
        '--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here'
    
        '--model', type=str, default='pix2pixHD', help='which model to use'
    
        '--norm', type=str, default='instance', help='instance normalization or batch normalization'        
    
        '--use_dropout', action='store_true', help='use dropout for the generator'
    
        '--data_type', default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit"
    
        '--verbose', action='store_true', default=False, help='toggles verbose'
    
        '--fp16', action='store_true', default=False, help='train with AMP'
    
        '--local_rank', type=int, default=0, help='local rank for distributed training'
    
    
    
    
    
            # input/output sizes       
    
        '--batchSize', type=int, default=1, help='input batch size'
    
        '--loadSize', type=int, default=1024, help='scale images to this size'
    
        '--fineSize', type=int, default=512, help='then crop to this size'
    
        '--label_nc', type=int, default=35, help='# of input label channels'
    
        '--input_nc', type=int, default=3, help='# of input image channels'
    
        '--output_nc', type=int, default=3, help='# of output image channels'
    
    
    
    
    
            # for setting inputs
    
        '--dataroot', type=str, default='./datasets/cityscapes/'
    
        '--resize_or_crop', type=str, default='scale_width', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]'
    
        '--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly'        
    
        '--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation'
    
        '--nThreads', default=2, type=int, help='# threads for loading data'                
    
        '--max_dataset_size', type=int, default=float("inf", help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.'
    
    
    
    
    
            # for displays
    
        '--display_winsize', type=int, default=512,  help='display window size'
    
        '--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed'
    
    
    
    
    
            # for generator
    
        '--netG', type=str, default='global', help='selects model to use for netG'
    
        '--ngf', type=int, default=64, help='# of gen filters in first conv layer'
    
        '--n_downsample_global', type=int, default=4, help='number of downsampling layers in netG'
    
        '--n_blocks_global', type=int, default=9, help='number of residual blocks in the global generator network'
    
        '--n_blocks_local', type=int, default=3, help='number of residual blocks in the local enhancer network'
    
        '--n_local_enhancers', type=int, default=1, help='number of local enhancers to use'        
    
        '--niter_fix_global', type=int, default=0, help='number of epochs that we only train the outmost local enhancer'  
    
    
    
          
    
            # for instance-wise features
    
        '--no_instance', action='store_true', help='if specified, do *not* add instance map as input'        
    
        '--instance_feat', action='store_true', help='if specified, add encoded instance features as input'
    
        '--label_feat', action='store_true', help='if specified, add encoded label features as input'        
    
        '--feat_num', type=int, default=3, help='vector length for encoded features'        
    
        '--load_features', action='store_true', help='if specified, load precomputed feature maps'
    
        '--n_downsample_E', type=int, default=4, help='# of downsampling layers in encoder'
    
        '--nef', type=int, default=16, help='# of encoder filters in the first conv layer'        
    
        '--n_clusters', type=int, default=10, help='number of clusters for features'   

         
  • test_options

  •       
      '--ntest', type=int, default=float("inf", help='# of test examples.'
    
            '--results_dir', type=str, default='./results/', help='saves results here.'
    
            '--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images'
    
            '--phase', type=str, default='test', help='train, val, test, etc'
    
            '--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model'
    
            '--how_many', type=int, default=50, help='how many test images to run'       
    
            '--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features'
    
            '--use_encoded_image', action='store_true', help='if specified, encode the real image to get the feature map'
    
            "--export_onnx", type=str, help="export ONNX model to a given file"
    
            "--engine", type=str, help="run serialized TRT engine"
    
            "--onnx", type=str, help="run ONNX model via TRT"        
    
            
    
            isTrain = False
    
    
  • train_options

  •         
    # for displays
    
            '--display_freq', type=int, default=100, help='frequency of showing training results on screen'
    
            '--print_freq', type=int, default=100, help='frequency of showing training results on console'
    
            '--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results'
    
            '--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs'        
    
            '--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/'
    
            '--debug', action='store_true', help='only do one epoch and displays at each iteration'
    
    
    
    
    
            # for training
    
            '--continue_train', action='store_true', help='continue training: load the latest model'
    
            '--load_pretrain', type=str, default='', help='load the pretrained model from the specified location'
    
            '--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model'
    
            '--phase', type=str, default='train', help='train, val, test, etc'
    
            '--niter', type=int, default=100, help='# of iter at starting learning rate'
    
            '--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero'
    
            '--beta1', type=float, default=0.5, help='momentum term of adam'
    
            '--lr', type=float, default=0.0002, help='initial learning rate for adam'
    
    
    
    
    
            # for discriminators        
    
            '--num_D', type=int, default=2, help='number of discriminators to use'
    
            '--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers'
    
            '--ndf', type=int, default=64, help='# of discrim filters in first conv layer'    
    
            '--lambda_feat', type=float, default=10.0, help='weight for feature matching loss'                
    
            '--no_ganFeat_loss', action='store_true', help='if specified, do *not* use discriminator feature matching loss'
    
            '--no_vgg_loss', action='store_true', help='if specified, do *not* use VGG feature matching loss'        
    
            '--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN'
    
            '--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images'
    
            
    
            isTrain = True