TensorFlow 2.0ノート(八)——循環神経ネットワーク


x = tf.random.normal([4, 80, 100])
xt0 = x[:, 0, :]
cell = tf.keras.layers.SimpleRNNCell(64)
out, xt1 = cell(xt0, [tf.zeros([4, 64])])
out.shape, xt1[0].shape
id(out), id(xt1[0])
cell.trainable_variables
x = tf.random.normal([4, 80, 100])
xt0 = x[:, 0, :]
cell = tf.keras.layers.SimpleRNNCell(64)
cell2 = tf.keras.layers.SimpleRNNCell(64)
state0 = [tf.zeros([4, 64])]
state1 = [tf.zeros([4, 64])]
out0, state0 = cell(xt0, state0)
out1, state1 = cell2(out0, state1)
out1.shape, state1[0].shape
  • RNN Cell
  • import os
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            self.state0 = [tf.zeros([batchsz, units])]
            self.state1 = [tf.zeros([batchsz, units])]
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            self.rnn_cell0 = tf.keras.layers.SimpleRNNCell(units, dropout=0.5)
            self.rnn_cell1 = tf.keras.layers.SimpleRNNCell(units, dropout=0.5)
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            state0 = self.state0
            state1 = self.state1
            for word in tf.unstack(x, axis=1):  # word: [b, 100]
                # h1 = x * wxh + h0 * whh
                # out0: [b, 64]
                out0, state0 = self.rnn_cell0(word, state0, training)
                # out1: [b, 64]
                out1, state1 = self.rnn_cell1(out0, state1, training)
    
            # out: [b, 64] => [b, 1]
            x = self.out_layer(out1)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      experimental_run_tf_function=False,
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
    
    if __name__ == '__main__':
        main()
    
  • RNN layer
  • import os
    import time
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            # unroll = True  RNN
            self.rnn = tf.keras.Sequential([
                tf.keras.layers.SimpleRNN(units, dropout=0.5, return_sequences=True),
                tf.keras.layers.SimpleRNN(units, dropout=0.5)
            ])
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            x = self.rnn(x)
            # out: [b, 64] => [b, 1]
            x = self.out_layer(x)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
        t0 = time.time()
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
        # accuracy: 0.8107 total time cost: 39.90
        t1 = time.time()
        print('total time cost:', t1 - t0)
    
    
    if __name__ == '__main__':
        main()
    
  • LSTM Cell
  • import os
    import time
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            self.state0 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
            self.state1 = [tf.zeros([batchsz, units]), tf.zeros([batchsz, units])]
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            self.rnn_cell0 = tf.keras.layers.LSTMCell(units, dropout=0.5)
            self.rnn_cell1 = tf.keras.layers.LSTMCell(units, dropout=0.5)
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            state0 = self.state0
            state1 = self.state1
            for word in tf.unstack(x, axis=1):  # word: [b, 100]
                # h1 = x * wxh + h0 * whh
                # out0: [b, 64]
                out0, state0 = self.rnn_cell0(word, state0, training)
                # out1: [b, 64]
                out1, state1 = self.rnn_cell1(out0, state1, training)
    
            # out: [b, 64] => [b, 1]
            x = self.out_layer(out1)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
    
        t0 = time.time()
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      experimental_run_tf_function=False,
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
        t1 = time.time()
        print('total time cost:', t1 - t0)
    
    
    if __name__ == '__main__':
        main()
    
  • LSTM layer
  • import os
    import time
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            self.rnn = tf.keras.Sequential([
                tf.keras.layers.LSTM(units, dropout=0.5, return_sequences=True),
                tf.keras.layers.LSTM(units, dropout=0.5)
            ])
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            x = self.rnn(x)
            # out: [b, 64] => [b, 1]
            x = self.out_layer(x)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
        t0 = time.time()
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
        # accuracy: 0.8124 total time cost: 25.03
        t1 = time.time()
        print('total time cost:', t1 - t0)
    
    
    if __name__ == '__main__':
        main()
    

    GRU Cell
    import os
    import time
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            self.state0 = [tf.zeros([batchsz, units])]
            self.state1 = [tf.zeros([batchsz, units])]
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            self.rnn_cell0 = tf.keras.layers.GRUCell(units, dropout=0.5)
            self.rnn_cell1 = tf.keras.layers.GRUCell(units, dropout=0.5)
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            state0 = self.state0
            state1 = self.state1
            for word in tf.unstack(x, axis=1):  # word: [b, 100]
                # h1 = x * wxh + h0 * whh
                # out0: [b, 64]
                out0, state0 = self.rnn_cell0(word, state0, training)
                # out1: [b, 64]
                out1, state1 = self.rnn_cell1(out0, state1, training)
    
            # out: [b, 64] => [b, 1]
            x = self.out_layer(out1)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
    
        t0 = time.time()
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      experimental_run_tf_function=False,
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
        t1 = time.time()
        print('total time cost:', t1 - t0)
    
    
    if __name__ == '__main__':
        main()
    

    GRU Layer
    import os
    import time
    import tensorflow as tf
    import numpy as np
    
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    assert tf.__version__.startswith('2.')
    physical_devices = tf.config.experimental.list_physical_devices('GPU')
    assert len(physical_devices) > 0
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
    tf.random.set_seed(22)
    np.random.seed(22)
    
    batchsz = 128
    # the most frequest words
    total_words = 10000
    max_review_len = 80
    embedding_len = 100
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=total_words)
    # x_train: [b, 80]
    # x_test:  [b, 80]
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, maxlen=max_review_len)
    
    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    train_data = train_data.shuffle(1000).batch(batchsz, drop_remainder=True)
    test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    test_data = test_data.batch(batchsz, drop_remainder=True)
    print('x_train shape', x_train.shape, tf.reduce_max(y_train), tf.reduce_min(y_train))
    print('x_test shape:', x_test.shape)
    
    
    class MyRNN(tf.keras.Model):
    
        def __init__(self, units):
            super(MyRNN, self).__init__()
    
            # transform text to embedding representation
            # [b, 80] => [b, 80, 100]
            self.embedding = tf.keras.layers.Embedding(total_words, embedding_len,
                                                       input_length=max_review_len)
            # [b, 80, 100], h_dim: 64
            # RNN: cell1, cell2, cell3
            # SimpleRNN
            self.rnn = tf.keras.Sequential([
                tf.keras.layers.GRU(units, dropout=0.5, return_sequences=True),
                tf.keras.layers.GRU(units, dropout=0.5)
            ])
            # fc, [b, 80, 100] => [b, 64] => [b, 1]
            self.out_layer = tf.keras.layers.Dense(1)
    
        def call(self, inputs, training=None, mask=None):
            """
            net(x) net(x, training=True): train mode
            net(x, training=False): test
            :param inputs: [b, 80]
            :param training:
            :param mask:
            :return:
            """
            # [b, 80}
            x = inputs
            # embedding: [b, 80] => [b, 80, 100]
            x = self.embedding(x)
            # run cell compute
            # [b, 80, 100] => [b, 64]
            x = self.rnn(x)
            # out: [b, 64] => [b, 1]
            x = self.out_layer(x)
            # p(y is pos|x)
            prob = tf.sigmoid(x)
    
            return prob
    
    
    def main():
        units = 64
        epochs = 4
        t0 = time.time()
        model = MyRNN(units)
        model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                      loss=tf.losses.BinaryCrossentropy(),
                      metrics=['accuracy'])
        model.fit(train_data, epochs=epochs, validation_data=test_data, validation_freq=1)
    
        model.evaluate(test_data)
    
        # accuracy: 0.8311 total time cost: 26.32
        t1 = time.time()
        print('total time cost:', t1 - t0)
    
    
    if __name__ == '__main__':
        main()