tensorflow最適化

7961 ワード

ソースの場所:
tensorflow/python/tools
tensorflow/tools/quantization
bin位置:
bazel-bin/tensorflow/python/tools
bazel-bin/tensorflow/tools/
出力pbタイプgraph:

from tensorflow.python.framework import graph_io
graph_io.write_graph(self.sess.graph,"./tests/", "input_graph.pb")

固化freeze_graph:

bazel build tensorflow/python/tools:freeze_graph&& \
bazel-bin/tensorflow/python/tools/freeze_graph \
--input_graph=some_graph_def.pb \
--input_checkpoint=model.ckpt-8361242 \
--output_graph=/tmp/frozen_graph.pb
--output_node_names=softmax

例:

bazel-bin/tensorflow/python/tools/freeze_graph
--input_graph=./input_graph.pb
--input_checkpoint=./VGGnet_fast_rcnn_iter_52700.ckpt 
--output_graph=./froze_graph1.pb 
--output_node_names=rois/Reshape,rois/PyFunc

最適化optimize_for_inference:

bazel build tensorflow/python/tools:optimize_for_inference&& \
bazel-bin/tensorflow/python/tools/optimize_for_inference \
--input=frozen_inception_graph.pb \
--output=optimized_inception_graph.pb \
--frozen_graph=True \
--input_names=Mul \
--output_names=softmax

例:

bazel-bin/tensorflow/python/tools/optimize_for_inference
--input=froze_ctc.pb
--output=optimized_ctc.pb
--frozen_graph=True 
--input_names=Placeholder,seq_len
--output_names=CTCGreedyDecoder
--placeholder_type_enum=13

13の原理は以下の通りである.

from tensorflow.python.framework import dtypes
print(dtypes.float32.as_datatype_enum)#1
print(dtypes.int32.as_datatype_enum)#3

量子化quantize_graph:

bazel build tensorflow/tools/quantization:quantize_graph \
&&bazel-bin/tensorflow/tools/quantization/quantize_graph \
--input=tensorflow_inception_graph.pb
--output_node_names="softmax2" 
--print_nodes
--output=/tmp/quantized_graph.pb \
--mode=eightbit
--logtostderr

例:

bazel-bin/tensorflow/tools/quantization/quantize_graph
--input=./optimized_ctc.pb
--output_node_names=CTCGreedyDecoder
--print_nodes
--output=./quantized_graph.pb
--mode=eightbit
--logtostderr

Tensorboardはpbファイルを表示します:

tensorflow/python/tools /import_pb_to_tensorboard.py

例:

python import_pb_to_tensorboard.py --model_dir=./input.pb --log_dir=./log

ノード計算時間profileを表示するには:

bazel build -c opt tensorflow/tools/benchmark:benchmark_model&& \
bazel-bin/tensorflow/tools/benchmark/benchmark_model \
--graph=/tmp/tensorflow_inception_graph.pb
--input_layer="Mul" \
--input_layer_shape="1,299,299,3" 
--input_layer_type="float" \
--output_layer="softmax:0" 
--show_run_order=false 
--show_time=false \
--show_memory=false 
--show_summary=true 
--show_flops=true 
--logtostderr

ckptモデルプログラムをpbモデルプログラムに変更する:

def load_graph(self, model_file):
    graph = tf.Graph()
    graph_def = tf.GraphDef()

    with open(model_file, "rb") as f:
        graph_def.ParseFromString(f.read())
    with graph.as_default():
        tf.import_graph_def(graph_def)

    return graph

def __init__(self):
    ......
    #......        
    model_file = "./input.pb"
    self.graph = self.load_graph(model_file)
    with self.graph.as_default():
    #import/input_1:0            
    self.inputs = self.graph.get_tensor_by_name("import/input_1:0")
    #import/output_node0:0            
    self.output = self.graph.get_tensor_by_name("import/output_node0:0")
    self.session = tf.Session(graph=self.graph)

def proc(self, input_image):
    ......
    #......        
    test_feed = {self.inputs: input_image}
    predictions = self.session.run(self.output, test_feed)

Kerasモデル回転pb:
https://github.com/amir-abdi/keras_to_tensorflow
テスト方法:

python3 keras_to_tensorflow.py -input_model_file model.h5 -output_model_file model.pb

keras_to_tensorflow.py

# coding: utf-8
# # Set parameters

"""
Copyright (c) 2017, by the Authors: Amir H. Abdi
This software is freely available under the MIT Public License. 
Please see the License file in the root for details.

The following code snippet will convert the keras model file,
which is saved using model.save('kerasmodel_weight_file'),
to the freezed .pb tensorflow weight file which holds both the 
network architecture and its associated weights.
""";

# setting input arguments
import argparse
parser = argparse.ArgumentParser(description='set input arguments')
parser.add_argument('-input_fld', action="store", 
                    dest='input_fld', type=str, default='.')

parser.add_argument('-output_fld', action="store", 
                    dest='output_fld', type=str, default='.')

parser.add_argument('-input_model_file', action="store", 
                    dest='input_model_file', type=str, default='model.h5')

parser.add_argument('-output_model_file', action="store", 
                    dest='output_model_file', type=str, default='model.pb')

parser.add_argument('-output_graphdef_file', action="store", 
                    dest='output_graphdef_file', type=str, default='model.ascii')

parser.add_argument('-num_outputs', action="store", 
                    dest='num_outputs', type=int, default=1)

parser.add_argument('-graph_def', action="store", 
                    dest='graph_def', type=bool, default=False)

parser.add_argument('-output_node_prefix', action="store", 
dest='output_node_prefix', type=str, default='output_node')

parser.add_argument('-f')
args = parser.parse_args()
print('input args: ', args)

# uncomment the following lines to alter the default values set above
# args.input_fld = '.'
# args.output_fld = '.'
# args.input_model_file = 'model.h5'
# args.output_model_file = 'model.pb'

# num_output: this value has nothing to do with the number of classes, batch_size, etc., 
# and it is mostly equal to 1. 
# If you have a multi-stream network (forked network with multiple outputs), 
# set the value to the number of outputs.
num_output = args.num_outputs


# # initialize
from keras.models import load_model
import tensorflow as tf
import os
import os.path as osp
from keras import backend as K

output_fld =  args.output_fld
if not os.path.isdir(output_fld):
    os.mkdir(output_fld)
weight_file_path = osp.join(args.input_fld, args.input_model_file)

# # Load keras model and rename output
K.set_learning_phase(0)
net_model = load_model(weight_file_path)

pred = [None]*num_output
pred_node_names = [None]*num_output
for i in range(num_output):
    pred_node_names[i] = args.output_node_prefix+str(i)
    pred[i] = tf.identity(net_model.outputs[i], name=pred_node_names[i])
print('output nodes names are: ', pred_node_names)


# #### [optional] write graph definition in ascii
sess = K.get_session()

if args.graph_def:
    f = args.output_graphdef_file 
    tf.train.write_graph(sess.graph.as_graph_def(), output_fld, f, as_text=True)
    print('saved the graph definition in ascii format at: ', osp.join(output_fld, f))


# #### convert variables to constants and save
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
graph_io.write_graph(constant_graph, output_fld, args.output_model_file, as_text=False)
print('saved the freezed graph (ready for inference) at: ', osp.join(output_fld, args.output_model_file))

pythonでコードがWebサイトにアクセス

メモ html.template