TensorFlow 2.0ノート(二)——高次操作


マージと分割
  • tf.concat
  • tf.split
  • tf.stack
  • tf.unstack

  • tf.concat
  • Along distinct dim/axis
  • #tf.concat
    a = tf.ones([4, 35, 8])
    b = tf.ones([2, 35, 8])
    c = tf.concat([a, b], axis=0)
    
    a = tf.ones([4, 32, 8])
    b = tf.ones([4, 3, 8])
    d = tf.concat([a, b], axis=1)
    

    tf.split
    #tf.split
    a = tf.ones([4, 35, 8])
    b = tf.ones([4, 35, 8])
    c = tf.stack([a, b])
    res = tf.split(c, axis=3, num_or_size_splits=2)
    
    res = tf.split(c, axis=3, num_or_size_splits=[2,2,4])
    

    tf.stack
  • Create new dim
  • #tf.stack
    a = tf.ones([4, 35, 8])
    b = tf.ones([4, 35, 8])
    c = tf.concat([a, b], axis=-1)
    d = tf.stack([a, b], axis=0)
    e = tf.stack([a, b], axis=3)
    

    tf.unstack
    #tf.unstack
    a = tf.ones([4, 35, 8])
    b = tf.ones([4, 35, 8])
    c = tf.stack([a, b])
    aa, bb = tf.unstack(c, axis=0)
    res = tf.unstack(c, axis=3)
    

    データ統計
  • tf.norm:定数
  • tf.reduce_min/max/mean
  • tf.argmax/argmin
  • tf.equal
  • tf.unique

  • Vector Norm Eukl. Norm ∣ ∣ x ∣ ∣ 2 = [ ∑ k x k 2 ] 1/2 ||x||_2=[\sum_kx_k^2]^{1/2} ∣∣x∣∣2​=[k∑​xk2​]1/2 Max. Norm ∣ ∣ x ∣ ∣ ∞ = m a x k ∣ x k ∣ ||x||_{ ∞}=max_k|x_k| ∣∣x∣∣∞​=maxk​∣xk​∣ L 1 L_1 L1​-Norm ∣ ∣ x ∣ ∣ 1 = ∑ k ∣ x k ∣ ||x||_1=\sum_k|x_k| ∣∣x∣∣1​=k∑​∣xk​∣
    tf.norm
    a = tf.ones([2, 2])
    tf.norm(a)
    tf.sqrt(tf.reduce_sum(tf.square(a)))
    
    b = tf.ones([4, 28, 28, 3])
    tf.norm(b)
    tf.sqrt(tf.reduce_sum(tf.square(b)))
    

    L1 Norm
    a = tf.ones([2, 2])
    tf.norm(a)
    tf.norm(a, ord=2, axis=1)
    tf.norm(a, ord=1)
    tf.norm(a, ord=1, axis=0)
    tf.norm(a, ord=1, axis=1)
    

    reduce_min/max/mean
    a = tf.random.normal([4, 10])
    tf.reduce_mean(a), tf.reduce_min(a), tf.reduce_max(a)
    tf.reduce_mean(a, axis=1), tf.reduce_min(a, axis=1), tf.reduce_max(a, axis=1)
    

    argmax/argmin
    a = tf.random.normal([4, 10])
    a.shape
    tf.argmax(a).shape
    tf.argmax(a)
    tf.argmin(a)
    tf.argmin(a).shape
    

    tf.equal
    a = tf.constant([1, 2, 3, 2, 5])
    b = tf.range(5)
    tf.equal(a, b)
    res = tf.equal(a, b)
    tf.reduce_sum(tf.cast(res, dtype=tf.int32))
    

    tf.unique
    a = tf.range(5)
    tf.unique(a)
    a = tf.constant([4, 2, 2, 4, 3])
    tf.unique(a)
    

    テンソルソート
  • Sort/argsort
  • Topk
  • Top-5 Acc.

  • Sort/argsort
    a = tf.random.shuffle(tf.range(5))
    tf.sort(a, direction='DESCENDING')
    tf.argsort(a, direction='DESCENDING')
    
    idx = tf.argsort(a, direction='DESCENDING')
    tf.gather(a, idx)
    
    a = tf.random.uniform([3, 3], maxval=10, dtype=tf.int32)
    tf.sort(a)
    tf.sort(a, direction='DESCENDING')
    idx = tf.argsort(a)
    

    Top_k
    Only return top-k values and indices
    a = tf.random.uniform([3, 3], maxval=10, dtype=tf.int32)
    res = tf.math.top_k(a, 2)
    res.indices
    res.values
    

    Top-k accuracy
  • Prob:[0.1, 0.2, 0.3, 0.4]
  • Label:[2]
  • *Only consider top-1 prediction: [3] *
  • Only consider top-2 prediction: [3, 2]
  • Only consider top-3 prediction: [3, 2, 1]
  • prob = tf.constant([[0.1, 0.2, 0.7], [0.2, 0.7, 0.1]])
    target = tf.constant([2, 0])
    k_b = tf.math.top_k(prob, 3).indices
    k_b = tf.transpose(k_b, [1, 0])
    target = tf.broadcast_to(target, [3, 2])
    
    def accuracy(output, target, topk=(1,)):
        maxk = max(topk)
        batch_size = target.shape[0]
    
        pred = tf.math.top_k(output, maxk).indices
        pred = tf.transpose(pred, perm=[1, 0])
        target_ = tf.broadcast_to(target, pred.shape)
        correct = tf.equal(pred, target_)
    
        res = []
        for k in topk:
            correct_k = tf.cast(tf.reshape(correct[:k], [-1]), dtype=tf.float32)
            correct_k = tf.reduce_sum(correct_k)
            acc = float(correct_k / batch_size)
            res.append(acc)
    
        return res
    

    塗りつぶしとコピー
  • pad
  • tile
  • broadcast_to

  • pad
    a = tf.reshape(tf.range(9), [3, 3])
    tf.pad(a, [[0, 0], [0, 0]])
    tf.pad(a, [[1, 0], [0, 0]])
    tf.pad(a, [[1, 1], [0, 0]])
    tf.pad(a, [[1, 1], [1, 0]])
    tf.pad(a, [[1, 1], [1, 1]])
    

    Image padding
    a = tf.random.normal([4, 28, 28, 3])
    b = tf.pad(a, [[0, 0], [2, 2], [2, 2], [0, 0]])
    b.shape
    

    tile
  • repeat data along dim n times
  • [a, b, c] , 2
  • → [a, b, c, a, b, c]
  • broadcast_to

  • Inner dim first
    a = tf.reshape(tf.range(9), [3, 3])
    tf.tile(a, [1, 2])
    tf.tile(a, [2, 1])
    tf.tile(a, [2, 2])
    

    tile VS broadcast_to
    a = tf.reshape(tf.range(9), [3, 3])
    tf.tile(a, [1, 2])
    tf.tile(a, [2, 1])
    tf.tile(a, [2, 2])
    
    aa = tf.expand_dims(a, axis=0)
    tf.tile(aa, [2, 1, 1])
    tf.broadcast_to(aa, [2, 3, 3])
    
    

    テンソルリミット
  • clip_by_value
  • relu
  • clip_by_norm
  • gradient clipping

  • clip_by_value
    #clip_by_value
    a = tf.range(10)
    tf.maximum(a, 2)
    tf.minimum(a, 8)
    tf.clip_by_value(a, 2, 8)
    

    relu
    a =a - 5
    tf.nn.relu(a)
    tf.maximum(a, 0)
    

    clip_by_norm
    a = tf.random.normal([2, 2], mean=10)
    tf.norm(a)
    aa = tf.clip_by_norm(a, 15)
    tf.norm(aa)
    

    gradient clipping
  • Gradient Exploding or vanishing
  • set lr=1
  • new_grads, total_norm = tf.clip_by_global_norm(grads, 25)
  • import  tensorflow as tf
    from    tensorflow import keras
    from    tensorflow.keras import datasets, layers, optimizers
    import  os
    
    os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
    print(tf.__version__)
    
    (x, y), _ = datasets.mnist.load_data()
    x = tf.convert_to_tensor(x, dtype=tf.float32) / 50.
    y = tf.convert_to_tensor(y)
    y = tf.one_hot(y, depth=10)
    print('x:', x.shape, 'y:', y.shape)
    train_db = tf.data.Dataset.from_tensor_slices((x,y)).batch(128).repeat(30)
    x,y = next(iter(train_db))
    print('sample:', x.shape, y.shape)
    # print(x[0], y[0])
    
    
    def main():
    
        # 784 => 512
        w1, b1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1)), tf.Variable(tf.zeros([512]))
        # 512 => 256
        w2, b2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1)), tf.Variable(tf.zeros([256]))
        # 256 => 10
        w3, b3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1)), tf.Variable(tf.zeros([10]))
    
        optimizer = optimizers.SGD(lr=0.01)
    
        for step, (x,y) in enumerate(train_db):
    
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 784))
    
            with tf.GradientTape() as tape:
    
                # layer1.
                h1 = x @ w1 + b1
                h1 = tf.nn.relu(h1)
                # layer2
                h2 = h1 @ w2 + b2
                h2 = tf.nn.relu(h2)
                # output
                out = h2 @ w3 + b3
                # out = tf.nn.relu(out)
    
                # compute loss
                # [b, 10] - [b, 10]
                loss = tf.square(y-out)
                # [b, 10] => [b]
                loss = tf.reduce_mean(loss, axis=1)
                # [b] => scalar
                loss = tf.reduce_mean(loss)
    
            # compute gradient
            grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
            # print('==before==')
            # for g in grads:
            #     print(tf.norm(g))
            
            grads,  _ = tf.clip_by_global_norm(grads, 15)
    
            # print('==after==')
            # for g in grads:
            #     print(tf.norm(g))
            # update w' = w - lr*grad
            optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w3, b3]))
    
            if step % 100 == 0:
                print(step, 'loss:', float(loss))
    
    
    if __name__ == '__main__':
        main()
    

    こうじとくせい
  • where
  • scatter_nd
  • meshgrid

  • Where(tensor)
    a = tf.random.normal([3, 3])
    mask = a > 0
    tf.boolean_mask(a, mask)
    indices = tf.where(mask)
    tf.gather_nd(a, indices)
    
  • where(cond, A, B)
  • mask
    A = tf.ones([3, 3])
    B = tf.zeros([3, 3])
    tf.where(mask, A, B)
    

    scatter_nd
  • tf.scatter_nd(
  • indices,
  • updates,
  • shape)
  • indices = tf.constant([[4], [3], [1], [7]])
    updates = tf.constant([9, 10, 11, 12])
    shape = tf.constant([8])
    tf.scatter_nd(indices, updates, shape)
    
    indices = tf.constant([[0], [2]])
    updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
                            [7, 7, 7, 7], [8, 8, 8, 8]],
    
                           [[5, 5, 5, 5], [6, 6, 6, 6],
                            [7, 7, 7, 7], [8, 8, 8, 8]]])
    updates.shape
    shape = tf.constant([4, 4, 4])
    tf.scatter_nd(indices, updates, shape)
    

    meshgrid
    import tensorflow as tf
    
    import matplotlib.pyplot as plt
    
    
    def func(x):
        """
    
        :param x: [b, 2]
        :return:
        """
        z = tf.math.sin(x[...,0]) + tf.math.sin(x[...,1])
    
        return z
    
    
    x = tf.linspace(0., 2*3.14, 500)
    y = tf.linspace(0., 2*3.14, 500)
    # [50, 50]
    point_x, point_y = tf.meshgrid(x, y)
    # [50, 50, 2]
    points = tf.stack([point_x, point_y], axis=2)
    # points = tf.reshape(points, [-1, 2])
    print('points:', points.shape)
    z = func(points)
    print('z:', z.shape)
    
    plt.figure('plot 2d func value')
    plt.imshow(z, origin='lower', interpolation='none')
    plt.colorbar()
    
    plt.figure('plot 2d func contour')
    plt.contour(point_x, point_y, z)
    plt.colorbar()
    plt.show()