lstm予測テストバージョン
9963 ワード
#--coding:utf-8--#!/bin/env python#auth:kailkaka#data:2019-3-3#############################import tensorflow as tffrom tensorflow import kerasimport numpy as npimport warningsimport pandas as pdfrom sklearn import preprocessingfrom sklearn.preprocessing import StandardScaler import matplotlibmatplotlib.use('qt4agg')#import matplotlib.pyplot as plt#plt.switch_backend('agg')
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['font.family']='sans-serif'matplotlib.rcParams['axes.unicode_minus'] = False import systry:parameter=sys.argv[1]except:print "Please Enter parameter..."exit(1)warnings.filterwarnings("ignore")reload(sys)sys.setdefaultencoding('UTF-8')class ZcSummary:def read_csv(self):with open('config', 'r') as fileline:list1 = fileline.readlines()for i in range(0, len(list1)):if list1[i] !='':list1[i] = list1[i].rstrip('')if list1[i].rstrip(':') == parameter:print "-----",ibreaktry: datafile=list1[i+1].split("=")[1].replace('\'','').rstrip('')attribute=list1[i+2].split("=")[1]tag=list1[i+3].split("=")[1]except:print "Please check parameter...,system is not support"exit(2)v_dataframe = pd.read_csv(datafile,encoding='UTF-8')#v_dataframe = v_dataframe.reindex(np.random.permutation(v_dataframe.index))return v_dataframe,attribute,tagdef preprocess_features(self, california_housing_dataframe,attribute,tag):df = california_housingdataframetrain = df[0:df.shape[0]-1000]test_ = df[df.shape[0]-1000:]predictors = attribute.rstrip('').replace('\'','').replace('[','').replace(']','')predictors=predictors.split(',')tag=tag.rstrip('').replace('\'','').replace('[','').replace(']','')Xtrain = train[predictors]ytrain = train[tag]Xtest = test[predictors]ytest = test[tag]num=len(predictors)test_y_disorder = preprocessing.scale(y_test).reshape(-1, 1)train_y_disorder = preprocessing.scale(y_train).reshape(-1, 1)ss_x = preprocessing.StandardScaler()train_x_disorder = ss_x.fit_transform(X_train)test_x_disorder = ss_x.transform(X_test)return train_x_disorder,train_y_disorder,test_x_disorder,test_y_disorder,numdef main(self):california_housing_dataframe,attribute,tag = self.read_csv()X,y,X_test,y_test,num=self.preprocess_features(california_housing_dataframe,attribute,tag)return X,y,X_test,y_test,numt=ZcSummary()train_x,train_y,X_test,y_test,num=t.main()
BATCH_START=0#batch data確立時のindexTIME_STEP=10#backpropagation through timeのtime_stepsBATCH_SIZE = 30INPUT_SIZE=num#sinデータ入力sizeOUTPUT_SIZE=1#cosデータ出力sizeCELL_SIZE=10#RNNのhidden unit sizeLR=0.006#learning ratedef get_batch_boston():global train_x, train_y,BATCH_START, TIME_STEPSx_part1 = train_x[BATCH_START : BATCH_START+TIME_STEPSBATCH_SIZE]y_part1 = train_y[BATCH_START:BATCH_START+TIME_STEPsBATCH_SIZE]#print(u'期間=',BATCH_START,BATCH_START+TIME_STEPS BATCH_SIZE)seq=x_part1.reshape((BATCH_SIZE, TIME_STEPS ,INPUT_SIZE))res =y_part1.reshape((BATCH_SIZE, TIME_STEPS ,1))BATCH_START += TIME_STEPS#returned seq, res and xs: shape (batch, step, input)#np.newaxisは1つの次元を3つの次元に変更するために使用され、3番目の次元は前のサンプルのステータスreturn[seq,res]def get_を保存するために使用されます.batch():global BATCH_START, TIME_STEPS#xs shape (50batch, 20steps)xs = np.arange(BATCH_START, BATCH_START+TIME_STEPSBATCH_SIZE).reshape((BATCH_SIZE, TIME_STEPS))/(10*np.pi)print('xs.shape=',xs.shape)seq = np.sin(xs)res = np.cos(xs)BATCH_START += TIME_STEPS#import matplotlib.pyplot as plt#plt.plot(xs[0, :], res[0, :], 'r', xs[0, :], seq[0, :], 'b--')#plt.show()#print(u'が次元を増加する前:',seq.shape)#print(seq[:2])#print(u'が次元を増加した後:',seq[:,:,np.newaxis].shape)#print(seq[:2])#returned seq,res and xs:shape(batch,step,input)#np.newaxisは1つの次元を3つの次元に変更するために使用され、3番目の次元は前のサンプルの状態return[seq[:,:,np.newaxis],res[:,:, np.newaxis],xs]class LSTMRNN(object):def init(self,n_steps,input_size,output_size,cell_size,batch_size):param n_Step:バッチあたりのデータの合計時間スケール:param input_size:入力データの次元:param output_size:出力データの次元が類似価格曲線であれば1:param cell_size:cellのサイズ:param batch_size:各ロットの訓練データの数''self.n_steps = n_stepsself.input_size = input_sizeself.output_size = output_sizeself.cell_size = cell_sizeself.batch_size = batch_sizewith tf.name_scope('inputs'):self.xs = tf.placeholder(tf.float 32,[None,n_steps,input_size],name='xs')#xsには3次元selfがある.ys = tf.placeholder(tf.float 32,[None,n_steps,output_size],name='ys')#ysには3つの次元with tfがある.variable_scope('in_hidden'):self.add_input_layer()with tf.variable_scope('LSTM_cell'):self.add_cell()with tf.variable_scope('out_hidden'):self.add_output_layer()with tf.name_scope('cost'):self.compute_cost()with tf.name_scope('train'):self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)#入力レイヤdef addを追加input_layer(self,):
l_in_x:(batch*n_step,in_size)は、このロットのサンプルを長さ1000のタイムラインに串刺しし、ロットごとに50サンプル、サンプルごとに20時刻
matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['font.family']='sans-serif'matplotlib.rcParams['axes.unicode_minus'] = False import systry:parameter=sys.argv[1]except:print "Please Enter parameter..."exit(1)warnings.filterwarnings("ignore")reload(sys)sys.setdefaultencoding('UTF-8')class ZcSummary:def read_csv(self):with open('config', 'r') as fileline:list1 = fileline.readlines()for i in range(0, len(list1)):if list1[i] !='':list1[i] = list1[i].rstrip('')if list1[i].rstrip(':') == parameter:print "-----",ibreaktry: datafile=list1[i+1].split("=")[1].replace('\'','').rstrip('')attribute=list1[i+2].split("=")[1]tag=list1[i+3].split("=")[1]except:print "Please check parameter...,system is not support"exit(2)v_dataframe = pd.read_csv(datafile,encoding='UTF-8')#v_dataframe = v_dataframe.reindex(np.random.permutation(v_dataframe.index))return v_dataframe,attribute,tagdef preprocess_features(self, california_housing_dataframe,attribute,tag):df = california_housingdataframetrain = df[0:df.shape[0]-1000]test_ = df[df.shape[0]-1000:]predictors = attribute.rstrip('').replace('\'','').replace('[','').replace(']','')predictors=predictors.split(',')tag=tag.rstrip('').replace('\'','').replace('[','').replace(']','')Xtrain = train[predictors]ytrain = train[tag]Xtest = test[predictors]ytest = test[tag]num=len(predictors)test_y_disorder = preprocessing.scale(y_test).reshape(-1, 1)train_y_disorder = preprocessing.scale(y_train).reshape(-1, 1)ss_x = preprocessing.StandardScaler()train_x_disorder = ss_x.fit_transform(X_train)test_x_disorder = ss_x.transform(X_test)return train_x_disorder,train_y_disorder,test_x_disorder,test_y_disorder,numdef main(self):california_housing_dataframe,attribute,tag = self.read_csv()X,y,X_test,y_test,num=self.preprocess_features(california_housing_dataframe,attribute,tag)return X,y,X_test,y_test,numt=ZcSummary()train_x,train_y,X_test,y_test,num=t.main()
BATCH_START=0#batch data確立時のindexTIME_STEP=10#backpropagation through timeのtime_stepsBATCH_SIZE = 30INPUT_SIZE=num#sinデータ入力sizeOUTPUT_SIZE=1#cosデータ出力sizeCELL_SIZE=10#RNNのhidden unit sizeLR=0.006#learning ratedef get_batch_boston():global train_x, train_y,BATCH_START, TIME_STEPSx_part1 = train_x[BATCH_START : BATCH_START+TIME_STEPSBATCH_SIZE]y_part1 = train_y[BATCH_START:BATCH_START+TIME_STEPsBATCH_SIZE]#print(u'期間=',BATCH_START,BATCH_START+TIME_STEPS BATCH_SIZE)seq=x_part1.reshape((BATCH_SIZE, TIME_STEPS ,INPUT_SIZE))res =y_part1.reshape((BATCH_SIZE, TIME_STEPS ,1))BATCH_START += TIME_STEPS#returned seq, res and xs: shape (batch, step, input)#np.newaxisは1つの次元を3つの次元に変更するために使用され、3番目の次元は前のサンプルのステータスreturn[seq,res]def get_を保存するために使用されます.batch():global BATCH_START, TIME_STEPS#xs shape (50batch, 20steps)xs = np.arange(BATCH_START, BATCH_START+TIME_STEPSBATCH_SIZE).reshape((BATCH_SIZE, TIME_STEPS))/(10*np.pi)print('xs.shape=',xs.shape)seq = np.sin(xs)res = np.cos(xs)BATCH_START += TIME_STEPS#import matplotlib.pyplot as plt#plt.plot(xs[0, :], res[0, :], 'r', xs[0, :], seq[0, :], 'b--')#plt.show()#print(u'が次元を増加する前:',seq.shape)#print(seq[:2])#print(u'が次元を増加した後:',seq[:,:,np.newaxis].shape)#print(seq[:2])#returned seq,res and xs:shape(batch,step,input)#np.newaxisは1つの次元を3つの次元に変更するために使用され、3番目の次元は前のサンプルの状態return[seq[:,:,np.newaxis],res[:,:, np.newaxis],xs]class LSTMRNN(object):def init(self,n_steps,input_size,output_size,cell_size,batch_size):param n_Step:バッチあたりのデータの合計時間スケール:param input_size:入力データの次元:param output_size:出力データの次元が類似価格曲線であれば1:param cell_size:cellのサイズ:param batch_size:各ロットの訓練データの数''self.n_steps = n_stepsself.input_size = input_sizeself.output_size = output_sizeself.cell_size = cell_sizeself.batch_size = batch_sizewith tf.name_scope('inputs'):self.xs = tf.placeholder(tf.float 32,[None,n_steps,input_size],name='xs')#xsには3次元selfがある.ys = tf.placeholder(tf.float 32,[None,n_steps,output_size],name='ys')#ysには3つの次元with tfがある.variable_scope('in_hidden'):self.add_input_layer()with tf.variable_scope('LSTM_cell'):self.add_cell()with tf.variable_scope('out_hidden'):self.add_output_layer()with tf.name_scope('cost'):self.compute_cost()with tf.name_scope('train'):self.train_op = tf.train.AdamOptimizer(LR).minimize(self.cost)#入力レイヤdef addを追加input_layer(self,):
l_in_x:(batch*n_step,in_size)は、このロットのサンプルを長さ1000のタイムラインに串刺しし、ロットごとに50サンプル、サンプルごとに20時刻 l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D') #-1
# Ws (in_size, cell_size)
Ws_in = self._weight_variable([self.input_size, self.cell_size])
# bs (cell_size, )
bs_in = self._bias_variable([self.cell_size,])
# l_in_y = (batch * n_steps, cell_size)
with tf.name_scope('Wx_plus_b'):
l_in_y = tf.matmul(l_in_x, Ws_in) + bs_in
# reshape l_in_y ==> (batch, n_steps, cell_size)
self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')
#
def add_cell(self):
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True)
with tf.name_scope('initial_state'):
self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
#time_major=False batch
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)
#
def add_output_layer(self):
# shape = (batch * steps, cell_size)
l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D')
Ws_out = self._weight_variable([self.cell_size, self.output_size])
bs_out = self._bias_variable([self.output_size, ])
# shape = (batch * steps, output_size)
with tf.name_scope('Wx_plus_b'):
self.pred = tf.matmul(l_out_x, Ws_out) + bs_out #
def compute_cost(self):
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost')
tf.summary.scalar('cost', self.cost)
def ms_error(self, labels, logits): # tf.contrib.legacy_seq2seq.sequence_loss_by_example
return tf.square(tf.subtract(labels,logits))
def _weight_variable(self, shape, name='weights'):
initializer = tf.random_normal_initializer(mean=0., stddev=1.,)
return tf.get_variable(shape=shape, initializer=initializer, name=name)
def _bias_variable(self, shape, name='biases'):
initializer = tf.constant_initializer(0.1)
return tf.get_variable(name=name, shape=shape, initializer=initializer)
if name == 'main':seq, res = get_batch_boston()model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)sess = tf.Session()merged = tf.summary.merge_all()writer = tf.summary.FileWriter("houseprice", sess.graph)
tf.initialize_all_variables() no long valid from # 2017-03-02 if using tensorflow >= 0.12
sess.run(tf.global_variables_initializer())
# relocate to the local dir and run this line to view it on Chrome (http://0.0.0.0:6006/):
# $ tensorboard --logdir='logs'
for j in range(1000):# 200
pred_res=None
for i in range(20):# 20
seq, res = get_batch_boston()
if i == 0:
feed_dict = {
model.xs: seq,
model.ys: res,
# create initial state
}
else:
feed_dict = {
model.xs: seq,
model.ys: res,
model.cell_init_state: state # use last state as the initial state for this run
}
_, cost, state, pred = sess.run(
[model.train_op, model.cost, model.cell_final_state, model.pred],
feed_dict=feed_dict)
pred_res=pred
result = sess.run(merged, feed_dict)
writer.add_summary(result, i)
print('{0} loss= '.format(j ), round(cost, 4))
BATCH_START=0 #
#
#print(u" :",pred_res.shape)
#
train_y = train_y[190:490]
#print(u' ',train_y.flatten().shape)
r_size=BATCH_SIZE * TIME_STEPS
### ###########################################################################
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20, 3)) # dpi , , 80
axes = fig.add_subplot(1, 1, 1)
# , 100
line1,=axes.plot(range(100), pred.flatten()[-100:] , 'b--',label='rnn ')
#line2,=axes.plot(range(len(gbr_pridict)), gbr_pridict, 'r--',label=' ')
line3,=axes.plot(range(100), train_y.flatten()[ - 100:], 'r',label=' ')
axes.grid()
fig.tight_layout()
#plt.legend(handles=[line1, line2,line3])
plt.legend(handles=[line1, line3])
plt.title(u' ')
plt.savefig('houseprice.png')
plt.show()
l_in_x = tf.reshape(self.xs, [-1, self.input_size], name='2_2D') #-1
# Ws (in_size, cell_size)
Ws_in = self._weight_variable([self.input_size, self.cell_size])
# bs (cell_size, )
bs_in = self._bias_variable([self.cell_size,])
# l_in_y = (batch * n_steps, cell_size)
with tf.name_scope('Wx_plus_b'):
l_in_y = tf.matmul(l_in_x, Ws_in) + bs_in
# reshape l_in_y ==> (batch, n_steps, cell_size)
self.l_in_y = tf.reshape(l_in_y, [-1, self.n_steps, self.cell_size], name='2_3D')
#
def add_cell(self):
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.cell_size, forget_bias=1.0, state_is_tuple=True)
with tf.name_scope('initial_state'):
self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32)
#time_major=False batch
self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(
lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)
#
def add_output_layer(self):
# shape = (batch * steps, cell_size)
l_out_x = tf.reshape(self.cell_outputs, [-1, self.cell_size], name='2_2D')
Ws_out = self._weight_variable([self.cell_size, self.output_size])
bs_out = self._bias_variable([self.output_size, ])
# shape = (batch * steps, output_size)
with tf.name_scope('Wx_plus_b'):
self.pred = tf.matmul(l_out_x, Ws_out) + bs_out #
def compute_cost(self):
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
[tf.reshape(self.pred, [-1], name='reshape_pred')],
[tf.reshape(self.ys, [-1], name='reshape_target')],
[tf.ones([self.batch_size * self.n_steps], dtype=tf.float32)],
average_across_timesteps=True,
softmax_loss_function=self.ms_error,
name='losses'
)
with tf.name_scope('average_cost'):
self.cost = tf.div(
tf.reduce_sum(losses, name='losses_sum'),
self.batch_size,
name='average_cost')
tf.summary.scalar('cost', self.cost)
def ms_error(self, labels, logits): # tf.contrib.legacy_seq2seq.sequence_loss_by_example
return tf.square(tf.subtract(labels,logits))
def _weight_variable(self, shape, name='weights'):
initializer = tf.random_normal_initializer(mean=0., stddev=1.,)
return tf.get_variable(shape=shape, initializer=initializer, name=name)
def _bias_variable(self, shape, name='biases'):
initializer = tf.constant_initializer(0.1)
return tf.get_variable(name=name, shape=shape, initializer=initializer)
# 2017-03-02 if using tensorflow >= 0.12
sess.run(tf.global_variables_initializer())
# relocate to the local dir and run this line to view it on Chrome (http://0.0.0.0:6006/):
# $ tensorboard --logdir='logs'
for j in range(1000):# 200
pred_res=None
for i in range(20):# 20
seq, res = get_batch_boston()
if i == 0:
feed_dict = {
model.xs: seq,
model.ys: res,
# create initial state
}
else:
feed_dict = {
model.xs: seq,
model.ys: res,
model.cell_init_state: state # use last state as the initial state for this run
}
_, cost, state, pred = sess.run(
[model.train_op, model.cost, model.cell_final_state, model.pred],
feed_dict=feed_dict)
pred_res=pred
result = sess.run(merged, feed_dict)
writer.add_summary(result, i)
print('{0} loss= '.format(j ), round(cost, 4))
BATCH_START=0 #
#
#print(u" :",pred_res.shape)
#
train_y = train_y[190:490]
#print(u' ',train_y.flatten().shape)
r_size=BATCH_SIZE * TIME_STEPS
### ###########################################################################
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(20, 3)) # dpi , , 80
axes = fig.add_subplot(1, 1, 1)
# , 100
line1,=axes.plot(range(100), pred.flatten()[-100:] , 'b--',label='rnn ')
#line2,=axes.plot(range(len(gbr_pridict)), gbr_pridict, 'r--',label=' ')
line3,=axes.plot(range(100), train_y.flatten()[ - 100:], 'r',label=' ')
axes.grid()
fig.tight_layout()
#plt.legend(handles=[line1, line2,line3])
plt.legend(handles=[line1, line3])
plt.title(u' ')
plt.savefig('houseprice.png')
plt.show()