tenssorflowが長くなるシーケンス記憶例

20400 ワード

tenssor flow 長いシーケンス保存

問題
問題はこのようにして、1つの配列をtfrecordに保存してから読み込むことです。


a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]])

写真は全部保存しました。これはつまらないです。


import tensorflow as tf
import numpy as np

def _int64_feature(value):
 if not isinstance(value,list):
 value = [value]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]])

writer = tf.python_io.TFRecordWriter('file')

for i in range(a.shape[0]):
 feature = {'i' : _int64_feature(i), 
  'data': _int64_feature(a[i])}

 # Create an example protocol buffer
 example = tf.train.Example(features=tf.train.Features(feature=feature))

 # Serialize to string and write on the file
 writer.write(example.SerializeToString())

writer.close()


# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {'i':tf.FixedLenFeature([],tf.int64),
   'data':tf.FixedLenFeature([],tf.int64)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 return parsed_features['i'], parsed_features['data']

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.batch(1)
iterator = dataset.make_one_shot_iterator()
i, data = iterator.get_next()
with tf.Session() as sess:
 print(sess.run([i, data]))
 print(sess.run([i, data]))
 print(sess.run([i, data]))

変なエラーを報告しました。Name:Key:data、Index:0.Number of int 64 values！expected.Values size:6 but output shop:[]これは私のデータの長さが6という意味ですが、読んだのは[]です。これは一体どこが間違っていますか？先に読み取ったコードを注釈してみます。tfrockが書いたかどうか確認しました。これは読み取りの問題です。書き込みの長さが変化したからか？画像のサイズが全部違っていますので、まだ読めると思います。この画像を保存している時はimg.tobytesです。一つの配列をbytesに変えました。そして使っているのもbytesメモリです。tens flowはこのbytesを一つの要素にするのですか？しかし、tobytes後はtens flowが一つの要素として認識され、それを読み取る時は（height，width，chanel）によって画像を解析します。
私が試してみます。int 64ではなく、bytesに預けます。またすごい操作です。
データをbytesに変換


# -*- coding: utf-8 -*-

import tensorflow as tf
import numpy as np

def _byte_feature(value):
 return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
 if not isinstance(value,list):
 value = [value]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]])

writer = tf.python_io.TFRecordWriter('file')

for i in range(a.shape[0]): # i = 0 ~ 4
 feature = {'len' : _int64_feature(len(a[i])), #      i  len，      
  'data': _byte_feature(np.array(a[i]).tobytes())} #         a[i] list（      ），  bytes  numpy  

 # Create an example protocol buffer
 example = tf.train.Example(features=tf.train.Features(feature=feature))

 # Serialize to string and write on the file
 writer.write(example.SerializeToString())

writer.close()

#
# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {'len':tf.FixedLenFeature([],tf.int64),
   'data':tf.FixedLenFeature([],tf.string)} #   string
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 return parsed_features['len'], parsed_features['data']

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.batch(1)
iterator = dataset.make_one_shot_iterator()
i, data = iterator.get_next()
with tf.Session() as sess:
 print(sess.run([i, data]))
 print(sess.run([i, data]))
 print(sess.run([i, data]))


"""
[array([6], dtype=int64), array([b'\x00\x00\x00\x006\x00\x00\x00[\x00\x00\x00\x99\x00\x00\x00\xb1\x00\x00\x00\x01\x00\x00\x00'],
 dtype=object)]
[array([5], dtype=int64), array([b'\x00\x00\x00\x002\x00\x00\x00Y\x00\x00\x00\x93\x00\x00\x00\xc4\x00\x00\x00'],
 dtype=object)]
[array([4], dtype=int64), array([b'\x00\x00\x00\x00&\x00\x00\x00O\x00\x00\x00\x9d\x00\x00\x00'],
 dtype=object)]
"""

bytesデータ復号
願い通りの負けですが、このbytesはどうやって復号しますか？
方法一、自分で解決します。


 a,b= sess.run([i,data])
 c = np.frombuffer(b[0],dtype=np.int,count=a[0])

方法二はtenssorflowの解析関数を使います。


def _parse_function(example_proto):
 keys_to_features = {'len':tf.FixedLenFeature([],tf.int64),
   'data':tf.FixedLenFeature([],tf.string)} #   string
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 dat = tf.decode_raw(parsed_features['data'],tf.int64) #          ，    int64      ，          int64
 return parsed_features['len'], dat
"""
[array([6]), array([[ 0, 54, 91, 153, 177, 1]])]
[array([5]), array([[ 0, 50, 89, 147, 196]])]
[array([4]), array([[ 0, 38, 79, 157]])]
"""

二次元配列が見られます。これはバッチ出力を使っています。size=1ですが、二次元リスト形式で出力されます。手下のものをもっと直します。


def _parse_function(example_proto):
 keys_to_features = {'len':tf.FixedLenFeature([1],tf.int64),
   'data':tf.FixedLenFeature([1],tf.string)} 
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 dat = tf.decode_raw(parsed_features['data'],tf.int64)
 return parsed_features['len'], dat

"""
[array([[6]]), array([[[ 0, 54, 91, 153, 177, 1]]])]
[array([[5]]), array([[[ 0, 50, 89, 147, 196]]])]
[array([[4]]), array([[[ 0, 38, 79, 157]]])]
"""

えっと、また3次元になりました。間違いを報告してみてください。


def _parse_function(example_proto):
 keys_to_features = {'len':tf.FixedLenFeature([2],tf.int64), # 1     2
   'data':tf.FixedLenFeature([1],tf.string)} #   string
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 return parsed_features['len'], parsed_features['data']

"""
InvalidArgumentError: Key: len. Can't parse serialized Example.
 [[Node: ParseSingleExample/ParseSingleExample = ParseSingleExample[Tdense=[DT_STRING, DT_INT64], dense_keys=["data", "len"], dense_shapes=[[1], [2]], num_sparse=0, sparse_keys=[], sparse_types=[]](arg0, ParseSingleExample/Const, ParseSingleExample/Const_1)]]
 [[Node: IteratorGetNext_22 = IteratorGetNext[output_shapes=[[?,2], [?,1]], output_types=[DT_INT64, DT_STRING], _device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator_22)]]
"""

dense_が見られますkeys=[data]，“len”，dense_sharpes=[1]，[2]，tf.FixedlenFeatureは固定長さのデータを読みます。つまり、[1]はデータを読みます。各データは[1,2]，[3,3,4]，[2]，[2]，[2]など多くのデータが含まれています。
tenssor flowが長くなる配列の記憶
どうせ読めます。しかし、自分で定義した長い配列であれば、毎回自分で解析するのが面倒です。だから、tensflowは長い行列になる解析方法を定義しました。tf.VarLenFeatureは辺の長い配列をbytesに変えて解析する必要がありません。また操作です。


import tensorflow as tf
import numpy as np

def _int64_feature(value):
 if not isinstance(value,list):
 value = [value]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

# Write an array to TFrecord.
# a is an array which contains lists of variant length.
a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]])

writer = tf.python_io.TFRecordWriter('file')

for i in range(a.shape[0]): # i = 0 ~ 4
 feature = {'i' : _int64_feature(i), 
  'data': _int64_feature(a[i])}

 # Create an example protocol buffer
 example = tf.train.Example(features=tf.train.Features(feature=feature))

 # Serialize to string and write on the file
 writer.write(example.SerializeToString())

writer.close()


# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {'i':tf.FixedLenFeature([],tf.int64),
   'data':tf.VarLenFeature(tf.int64)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 return parsed_features['i'], tf.sparse_tensor_to_dense(parsed_features['data'])

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.batch(1)
iterator = dataset.make_one_shot_iterator()
i, data = iterator.get_next()
with tf.Session() as sess:
 print(sess.run([i, data]))
 print(sess.run([i, data]))
 print(sess.run([i, data]))

"""
[array([0], dtype=int64), array([[ 0, 54, 91, 153, 177, 1]], dtype=int64)]
[array([1], dtype=int64), array([[ 0, 50, 89, 147, 196]], dtype=int64)]
[array([2], dtype=int64), array([[ 0, 38, 79, 157]], dtype=int64)]
"""

batch出力
出力は行列ですか？ハハハ。もう一回操作してください


dataset = dataset.batch(2)
"""
Cannot batch tensors with different shapes in component 1. First element had shape [6] and element 1 had shape [5].
"""

これは一つのバッチの中のデータのshapeが一致していなければならないからです。最初の元素の長さは6、二つ目の元素の長さは5で、エラーが発生します。方法は同じ長さを補って、その前に他のものをテストします。


a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]])


for i in range(a.shape[0]):
 print(type(a[i]))

"""
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>
"""

長さの異なるarrayの各データはlistであることが分かります。その後補充します


a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196,0],
  [0, 38, 79, 157,0,0],
  [0, 49, 89, 147, 177,0],
  [0, 32, 73, 145,0,0]])


for i in range(a.shape[0]):
 print(type(a[i]))

"""
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
"""

戻ってきたのはnumpyです。なぜこのことをしますか？


def _int64_feature(value):
 if not isinstance(value,list):
 value = [value]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

tenssorflowは私達が入力するのがlistですか？それとも直接numpy.ndarryですか？listの中でnumpy.darry[numpy.darry]を含むなら、エラーが発生します。上の配列は長いです。戻る時はリストです。何の間違いもないです。私達は補足してみます。


a = np.array([[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196,0],
  [0, 38, 79, 157,0,0],
  [0, 49, 89, 147, 177,0],
  [0, 32, 73, 145,0,0]])

"""
TypeError: only size-1 arrays can be converted to Python scalars
"""

これはリストではなくnumpy.darryに戻るためです。int 64_feature関数ではnumpy.ndrryがlistではないと判断されていますので、numpy.ndrryに変更したらエラーが発生します。いくつかの修正ができます。一つの方法はnumpy.ndaryをリストにすることです。


for i in range(a.shape[0]): # i = 0 ~ 4
 feature = {'i' : _int64_feature(i), 
  'data': _int64_feature(a[i].tolist())}

このように補完すれば、私達はbatchの値を修正できます。


dataset = dataset.batch(2)

"""
[array([0, 2], dtype=int64), array([[ 0, 54, 91, 153, 177, 1],
 [ 0, 38, 79, 157, 0, 0]], dtype=int64)]
[array([1, 3], dtype=int64), array([[ 0, 50, 89, 147, 196, 0],
 [ 0, 49, 89, 147, 177, 0]], dtype=int64)]
[array([4, 0], dtype=int64), array([[ 0, 32, 73, 145, 0, 0],
 [ 0, 54, 91, 153, 177, 1]], dtype=int64)]
"""

もちろんtenssorflowは自分で補完することはできません。もう補完関数を提供しました。


# -*- coding: utf-8 -*-

import tensorflow as tf

def _int64_feature(value):
 if not isinstance(value,list):
 value = [value]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=value))

a = [[0, 54, 91, 153, 177,1],
  [0, 50, 89, 147, 196],
  [0, 38, 79, 157],
  [0, 49, 89, 147, 177],
  [0, 32, 73, 145]]

writer = tf.python_io.TFRecordWriter('file')

for v in a: # i = 0 ~ 4
 feature = {'data': _int64_feature(v)}

 # Create an example protocol buffer
 example = tf.train.Example(features=tf.train.Features(feature=feature))

 # Serialize to string and write on the file
 writer.write(example.SerializeToString())

writer.close()


# Use Dataset API to read the TFRecord file.
filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {'data':tf.VarLenFeature(tf.int64)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 return tf.sparse_tensor_to_dense( parsed_features['data'])

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.padded_batch(2,padded_shapes=([None]))
iterator = dataset.make_one_shot_iterator()
data = iterator.get_next()
with tf.Session() as sess:
 print(sess.run([data]))
 print(sess.run([data]))
 print(sess.run([data]))


"""
[array([[ 0, 54, 91, 153, 177, 1],
 [ 0, 50, 89, 147, 196, 0]])]
[array([[ 0, 38, 79, 157, 0],
 [ 0, 49, 89, 147, 177]])]
[array([[ 0, 32, 73, 145, 0, 0],
 [ 0, 54, 91, 153, 177, 1]])]
"""

確かに自動補完です。
写真バッチ
直接に画像データをテストしてみます。


# -*- coding: utf-8 -*-

import tensorflow as tf
import matplotlib.pyplot as plt
def _byte_feature(value):
 return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

files = tf.gfile.Glob('*.jpeg')
writer = tf.python_io.TFRecordWriter('file')
for file in files:

 with tf.gfile.FastGFile(file,'rb') as f:
 img_buff = f.read()
 feature = {'img': _byte_feature(tf.compat.as_bytes(img_buff))}
 example = tf.train.Example(features=tf.train.Features(feature=feature))
 writer.write(example.SerializeToString())
writer.close()


filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {'img':tf.FixedLenFeature([], tf.string)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 image = tf.image.decode_jpeg(parsed_features['img'])
 return image

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.batch(2)
iterator = dataset.make_one_shot_iterator()
image = iterator.get_next()

with tf.Session() as sess:
 img = sess.run([image])
 print(len(img))
 print(img[0].shape)
 plt.imshow(img[0][0])

"""
Cannot batch tensors with different shapes in component 0. First element had shape [440,440,3] and element 1 had shape [415,438,3].
"""

見ましたか？batchの中の写真のサイズが違っています。batchの写真を同じサイズにしなければなりません。


def _parse_function(example_proto):
 keys_to_features = {'img':tf.FixedLenFeature([], tf.string)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 image = tf.image.decode_jpeg(parsed_features['img'])
 image = tf.image.convert_image_dtype(image,tf.float32)#   resize，  uint8  float  ，  plt.imshow    uint8  0-1  float  ，       uint8  0-1   float  ，     255.0
 image = tf.image.resize_images(image,(224,224))
 return image

でも、時々私達は写真のサイズを入力したいです。reizeはいらないです。これでbatchuしかできません。size=1です。一つのbatchの中の写真shopは同じでなければなりません。このように折衷トレーニングができます。tenssorflowで提供される動的充填インターフェースを使って、一つのbatchの中の写真を同じshopに充填します。


dataset = dataset.padded_batch(2,padded_shapes=([None,None,3]))

画像の名前をラベルとして保存したいなら、どうすればいいですか？


# -*- coding: utf-8 -*-

import tensorflow as tf
import matplotlib.pyplot as plt
import os

out_charset="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"

def _byte_feature(value):
 return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(values):
 if not isinstance(values,list):
 values = [values]
 return tf.train.Feature(int64_list=tf.train.Int64List(value=values))

files = tf.gfile.Glob('*.jpg')
writer = tf.python_io.TFRecordWriter('file')
for file in files:
 with tf.gfile.FastGFile(file,'rb') as f:
 img_buff = f.read()
 filename = os.path.basename(file).split('.')[0]
 label = list(map(lambda x:out_charset.index(x),filename))
 feature = {'label':_int64_feature(label),
  'filename':_byte_feature(tf.compat.as_bytes(filename)),
  'img': _byte_feature(tf.compat.as_bytes(img_buff))}
 example = tf.train.Example(features=tf.train.Features(feature=feature))
 writer.write(example.SerializeToString())
writer.close()


filenames = ["file"]
dataset = tf.data.TFRecordDataset(filenames)
def _parse_function(example_proto):
 keys_to_features = {
  'label':tf.VarLenFeature(tf.int64),
  'filename':tf.FixedLenFeature([],tf.string),
  'img':tf.FixedLenFeature([], tf.string)}
 parsed_features = tf.parse_single_example(example_proto, keys_to_features)
 label = tf.sparse_tensor_to_dense(parsed_features['label'])
 filename = parsed_features['filename']
 image = tf.image.decode_jpeg(parsed_features['img'])
 return image,label,filename

dataset = dataset.map(_parse_function)
dataset = dataset.shuffle(buffer_size=1)
dataset = dataset.repeat() 
dataset = dataset.padded_batch(3,padded_shapes=([None,None,3],[None],[]))
#       ，        padded_shapes,      image label     
#    pad None, filename    ，    byte   ，     ，     pad
iterator = dataset.make_one_shot_iterator()
image,label,filename = iterator.get_next()

with tf.Session() as sess:
 print(label.eval())

盲滅法な試み
書き込んだデータがリストだったらどうなりますか？


a = np.arange(16).reshape(2,4,2)

"""
TypeError: [0, 1] has type list, but expected one of: int, long
"""

でも考えてみれば、tf.train.Feature（int 64_）リスト=tf.train.Int 64 List(value=value)という関数は、データの種類を記憶するint 64のリストです。もし単語ベクトルを記憶したいなら、どうすればいいですか？例えば、サンプルs 1='愛しています'を使用して、one-hotコードを使用すれば、私=[0,0,1]、愛=[0,1,0]、あなた=[1,0,0]、s 1=[[0,1]，[0,1,0,0]，[1,0,0,0]です。このサンプルはどうやって保存しますか？
以上のこのtenssor flowの長いシーケンスの記憶の実例は小さい編集がみんなにあげるすべての内容を共有して、みんなに1つの参考をあげることができることを望んで、みんながよけいに私達を支持することをも望みます。

PHP IF ELESE簡略/三元一回式の使用

PHP用SAX解析XMLの実現コードと問題分析