Dropoutの循環ニューラルネットワークへの影響(実践総括/一定誤差あり)


  • 実践コード
  • import os
    import tensorflow as tf
    import numpy as np
    from tensorflow import keras
    from tensorflow.keras import layers
    
    
    #              
    tf.random.set_seed(22)
    #        ,           
    np.random.seed(22)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    # startwith('2.')         tf.__version__        '2.0'  ,  True  False
    # assert                     ,True    ,  False   ‘AssertionError: ’
    assert tf.__version__.startswith('2.')
    assert np.__version__.startswith('1.16.2')
    
    
    batchsz = 512
    # the most frequent words
    total_words = 10000    #            10000
    max_review_len = 80    #                ,     padding      
    # max_review_len = 100
    embedding_len = 100    #          ,  100          
    
    #     , imdb              ,  num_words=total_words        total_words
    #                    
    (x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]   x_train     (  ) padding      ,     0,     
    x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    # x_test:  [b, 80]   x_test padding      ——80
    x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    #           
    db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    # batch()    drop_remainer     True         batch         batch
    db_train = db_train.shuffle(1000).batch(batchsz, drop_remainder=True)
    db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    db_test = db_test.batch(batchsz, drop_remainder=True)
    
    
    #   MyRNN 
    class MyRNN(keras.Model):
        #        ,   units      RNN  Cell    (   )       h_dim
        def __init__(self, units):
            super(MyRNN, self).__init__()
            #        
            # [b, 64]        
            self.state0 = [tf.zeros([batchsz, units])]
            self.state1 = [tf.zeros([batchsz, units])]
            
    #         self.state2 = [tf.zeros([batchsz, units])]
            #   BatchNormalization
            self.bn1 = layers.BatchNormalization()
            
    
            # embedding  ,         (    ),                 ,
            #                 ,
            #               (  padding 80 )
            # [b, 80] => [b, 80, 100]
            # transform text to embedding representation
            self.embedding = layers.Embedding(total_words, embedding_len, input_length=max_review_len)
            #   RNN  
            # [b, 80, 100] , h_dim: 64
            # units     Cell        h_dim
            # dropout=0.5        ,           (    training    )
            # [b, 80, 100] => [b, 64]
            self.rnn_cell0 = layers.SimpleRNNCell(units, dropout=0.99)
            self.rnn_cell1 = layers.SimpleRNNCell(units, dropout=0.99)
    #         self.rnn_cell2 = layers.SimpleRNNCell(units, dropout=0.5)
            
            #       ,    ,     1,     
            # [b, 64] => [b, 1]
            
            self.outlayer = layers.Dense(1)
            
        def call(self, inputs, training=None):    
            """
                training=None,    dropout  
            net(x) ; net(x, training=True) ; net(x, training=None)   -->   train mode
            net(x, training=False)   -->   test mode
            
            """
            x = inputs    # [b, 80]
            
            x = self.embedding(x)    # [b, 80]  =>  [b, 80, 100]
            
            state0 = self.state0    # [batchsz, units]
            state1 = self.state1    # [batchsz, units]
    #         state2 = self.state2
            # [b, 80, 100]  =>  [b, 64]
            for word in tf.unstack(x, axis=1):    # word: [b, 100]  * 80
                # out0: [b, 64]   training  , training    dropout  
                out0, state0 = self.rnn_cell0(word, state0, training)
                # out1: [b, 64]
                out1, state1 = self.rnn_cell1(out0, state1, training)
                # out2: [b, 64]
    #             out2, state2 = self.rnn_cell1(out1, state2, training)
            # out1 --> x   :   [b, 64]  =>  [b, 1]    
            x = self.outlayer(out1)
            # p(y is pos | x )
            x_BN = self.bn1(x)
            prob = tf.sigmoid(x_BN)
            
            return prob
    
    
    
    units = 64
    # units = 150
    epochs = 100
    model = MyRNN(units)
    model.compile(optimizer = keras.optimizers.Adam(1e-3),
                 loss = tf.losses.BinaryCrossentropy(),
                 metrics = ['accuracy'])
    #   RNN
    model.fit(db_train, epochs=epochs, validation_data=db_test)
    #     
    model.evaluate(db_test)

     
  • 結果は
  • を示した.
    Dropout対Val_accuracyの影響
    dropout
    最高のValを得るaccuracy
    対応するepoch
    コメント
    0.0
    0.8175
    3
     
    0.1
    0.8168
    2
     
    0.2
    0.8187
    4
     
    0.3
    0.8260
    4
     
    0.4
    0.8258
    4
     
    0.5
    0.8259
    4
     
    0.6
    0.8310
    5
     
    0.7
    0.8336
    7
     
    0.8
    0.8310
    15前後
     
    0.9
    0.8331
    40~43
    ふあんてい
    0.99
    0.5000
    ~
     
     
     
  • 結果分析
  • dropoutは訓練効率を向上させ、オーバーフィット現象
  • を低減することができる.
  • dropoutの増加に伴い、Val_accuracyは徐々に上昇傾向にありdropoutが1に近づくとVal_accuracyは0.5(ランダム分類)
  • に低下した.
  • dropoutが大きな値をとると、訓練はますます不安定になる
  • .
  • dropoutの増加に伴い、より高いVal_を得るにはaccuracy値は,より多くのepoch,すなわちネットワーク収束速度が遅くなり,安定性が低下する必要がある.

  •