データ分析学習ノート(二)-numpy:配列オブジェクト関連操作


配列とスカラーの演算
  • 加算、減算、乗算、除算、二乗等演算
  • arr1 = np.array([np.arange(5), np.arange(5,10)])
    arr2 = np.array(np.arange(5))
    arr1
    '''
    [[0 1 2 3 4]
    [5 6 7 8 9]]
    '''
    
    arr2
    '''[0 1 2 3 4]'''
    
    
    arr1+arr2
    '''
    [[ 0  2  4  6  8]
    [ 5  7  9 11 13]]
    '''
    
    arr1-arr2
    ''' 
    [[0 0 0 0 0]
    [5 5 5 5 5]]
    '''
    
    
     arr1*arr2
     '''
     [[ 0  1  4  9 16]
     [ 0  6 14 24 36]]
     '''
    
    
    arr1.dot(arr2)       #        
    np.dot(arr1,arr2)    #        
    '''
    [30 80]
    '''
    
    
     arr2**2
     ''' [ 0  1  4  9 16]'''
    
     arr2/2
     '''[0.  0.5 1.  1.5 2. ]'''
    
    
    #      python               ,                  
    arr3 = np.array(np.arange(-5,5),dtype=np.float)
    # python         
    abs(arr3)    #      [5. 4. 3. 2. 1. 0. 1. 2. 3. 4.]
    max(arr3)    #     4.0
    pow(arr3,3)  #    [-125.  -64.  -27.   -8.   -1.    0.    1.    8.   27.   64.]
    #       :TypeError: type numpy.ndarray doesn't define __round__ method
    round(arr3)  #     
    #  :          
    [round(item) for item in arr3]
    
    # math    
    arr4 = np.array(np.arange(1,5))
    math.sqrt(arr4) #   :TypeError: only size-1 arrays can be converted to Python scalars
    #  :          
    [math.sqrt(item) for item in arr4]
    '''
    [1.0, 1.4142135623730951, 1.7320508075688772, 2.0]
    '''
    
  • numpyに内蔵された演算
  • arr1 = np.array([np.arange(5), np.arange(5,10)])
    arr2 = np.array(np.arange(5))
    #        
    np.argmin(arr1)  # 0
    np.argmax(arr1)  # 9
    #   
    arr1.mean()      #     np.mean(arr1)   np.average(arr1)
    '''4.5'''
    #    
    np.median(arr1)  #    
    '''4.5'''
    #   
    arr1.cumsum()    #   ,    np.cumsum(arr1)
    '''[ 0  1  3  6 10 15 21 28 36 45]'''
    #   
    np.diff(arr1)
    '''[[1 1 1 1]
     [1 1 1 1]]
     '''
    #       
    arr1.dot(arr2)   #     np.dot(arr1,arr2)
    '''[30 80]'''
    #                 ,               
    np.nonzero(arr1)
    '''
    (array([0, 0, 0, 0, 1, 1, 1, 1, 1]), array([1, 2, 3, 4, 0, 1, 2, 3, 4])) # (0,0)     0,   
    '''
    #   ,       
    a = np.array([[2,3,1,5],[2,1,0,3]])
    np.sort(a,axis=0)     #     
    '''[[2 1 0 3]
     [2 3 1 5]]
     '''
    np.sort(a,axis=1)     #     
    '''[[1 2 3 5]
     [0 1 2 3]]
     '''
    #        
    np.clip(arr1,3,6)
    '''[[3 3 3 3 4]
     [5 6 6 6 6]]
     '''

    配列のインデックスとスライス
  • 次元配列のインデックスとスライス
  • #       
    arr = np.arange(9)  # [0 1 2 3 4 5 6 7 8]
    arr
    #       
    arr[2]       # 2
    #     ,   list    
    arr[2:6]     # [2 3 4 5]
    arr[::2]     # [0 2 4 6 8]
    arr[::-1]    # [8 7 6 5 4 3 2 1 0]
    # slice
    s = slice(2,6)
    arr[s]       #     arr[2:6] -> [2 3 4 5]
  • 多次元配列のインデックスとスライス
  • #       
    arr = np.arange(24).reshape(2,3,4)
    '''[[[ 0  1  2  3]
      [ 4  5  6  7]
      [ 8  9 10 11]]
    
     [[12 13 14 15]
      [16 17 18 19]
      [20 21 22 23]]]'''
    # []    ','            ,:    ,               ,        
    #            ,          , start:end:step
    arr[:,0,0]   #          0  0    [ 0 12]
    arr[0]       #     arr[0,:,:]   arr[0,...]
    arr[0,1]     #          1   [4 5 6 7]
    arr[0,1,::2] #          1 ,step 2  [4 6]
    arr[...,1]   #          1 
    '''[[ 1  5  9]
      [13 17 21]]
    '''
    arr[::-1]   #          
    '''[[[12 13 14 15]
      [16 17 18 19]
      [20 21 22 23]]
    
     [[ 0  1  2  3]
      [ 4  5  6  7]
      [ 8  9 10 11]]]
     '''
    arr[::-1,::-1,::-1]  #          ,        
    '''[[[23 22 21 20]
      [19 18 17 16]
      [15 14 13 12]]
    
     [[11 10  9  8]
      [ 7  6  5  4]
      [ 3  2  1  0]]]
     '''
    s = slice(None,None,-1)
    arr[(s,s,s)] #        
    '''[[[23 22 21 20]
      [19 18 17 16]
      [15 14 13 12]]
    
     [[11 10  9  8]
      [ 7  6  5  4]
      [ 3  2  1  0]]]
     '''
  • ブール索引
  • arr = np.arange(24).reshape(2,3,4)
    #   arr   10   
    arr[arr>10]  # [11 12 13 14 15 16 17 18 19 20 21 22 23]
    # arr   10    0
    arr[arr>10] = 0     
    '''[[[ 0  1  2  3]
      [ 4  5  6  7]
      [ 8  9 10  0]]
    
     [[ 0  0  0  0]
      [ 0  0  0  0]
      [ 0  0  0  0]]]
    '''
  • フラワーインデックス
  • arr = np.arange(32).reshape((8, 4))
    #   [1,5,2,6]    
    arr[[1,5,2,6]]
    '''
    [[ 4  5  6  7]
     [20 21 22 23]
     [ 8  9 10 11]
     [24 25 26 27]]
    '''
    #   [0,1]    
    arr[:,[0,1]]
    #         ,           ,   [(1,0),(5,2),(2,1),(6,3),(0,2)]  
    #  : 、      
    arr[[1,5,2,6,0],[0,2,1,3,2]]
    '''
    [ 4 22  9 27  2]
    '''
    #          
    #          , [1,5,2,6]
    #       ,      , [0,2]
    arr[[1,5,2,6]][:,[0,2]]
    '''[[ 4  6]
     [20 22]
     [ 8 10]
     [24 26]]
    '''
    #        ix_         
    arr[np.ix_([1,5,2,6],[0,2])]
    '''[[ 4  6  5  7]
     [20 22 21 23]
     [ 8 10  9 11]
     [24 26 25 27]]
    '''

    はいれつてんい
  • 配列転置
  • #       numpy            ,    
    arr = np.arange(15).reshape((3, 5))
    '''[[ 0  1  2  3  4]
     [ 5  6  7  8  9]
     [10 11 12 13 14]]'''
    arr.T
    '''[[ 0  5 10]
     [ 1  6 11]
     [ 2  7 12]
     [ 3  8 13]
     [ 4  9 14]]'''
    arr.transpose() #      ,(0,1) -> (1,0)
    '''[[ 0  5 10]
     [ 1  6 11]
     [ 2  7 12]
     [ 3  8 13]
     [ 4  9 14]]'''
  • 配列転置関数transpose()
  • transpose()パラメータが空の場合、デフォルトパラメータは緯度シーケンス番号の逆記述順序です.たとえば、(0,1)->(1,0)、行列交換緯度が3以上の場合、次元順序を交換できるのは対応する回転です.
    arr = np.arange(12).reshape(2,3,2)
    '''[[[ 0  1]
      [ 2  3]
      [ 4  5]]
    
     [[ 6  7]
      [ 8  9]
      [10 11]]]'''
    #      ,     
    arr = arr.transpose()   # arr.transpose(2,1,0)
    '''[[[ 0  6]
      [ 2  8]
      [ 4 10]]
    
     [[ 1  7]
      [ 3  9]
      [ 5 11]]]'''
    #     ,    
    arr = arr.transpose(1,0,2)
    '''[[[ 0  1]
      [ 6  7]]
    
     [[ 2  3]
      [ 8  9]]
    
     [[ 4  5]
      [10 11]]]'''
    #     ,     
    arr = arr.transpose(0,2,1)
    '''[[[ 0  2  4]
      [ 1  3  5]]
    
     [[ 6  8 10]
      [ 7  9 11]]]'''

    配列の緯度を変更
    #       
    arr = np.arange(24)
    #   reshape()    
    arr = arr.reshape(2,3,4)
    '''[[[ 0  1  2  3]
      [ 4  5  6  7]
      [ 8  9 10 11]]
    
     [[12 13 14 15]
      [16 17 18 19]
      [20 21 22 23]]]'''
    #        shape, :            
    arr.shape = (6,4)
    '''[[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]
     [12 13 14 15]
     [16 17 18 19]
     [20 21 22 23]]'''
    #   resize()      
    arr.resize(12,2)
    '''[[ 0  1]
     [ 2  3]
     [ 4  5]
     [ 6  7]
     [ 8  9]
     [10 11]
     [12 13]
     [14 15]
     [16 17]
     [18 19]
     [20 21]
     [22 23]]'''
    '''reshape  :        ,    
    resize  :         ,    
    shape  :         '''
    #     
    arr.ravel()
    arr.flatten()
    '''    :ravel、flatten              ,  flatten               '''

    配列の結合
    #      
    a = np.arange(12).reshape(3,4)
    '''[[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]]
     '''
    b = a * 2
    '''[[ 0  2  4  6]
     [ 8 10 12 14]
     [16 18 20 22]]
    '''
    #     
    np.hstack((a,b))    #     np.concatenate((a, b), axis=1)    axis:1   ,0   
    '''
    [[ 0  1  2  3  0  2  4  6]
     [ 4  5  6  7  8 10 12 14]
     [ 8  9 10 11 16 18 20 22]]
    '''
    #     
    np.vstack((a,b))    #     np.concatenate((a, b), axis=0)
    '''
    [[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]
     [ 0  2  4  6]
     [ 8 10 12 14]
     [16 18 20 22]]
    '''
    #     
    np.dstack((a,b))
    '''
    [[[ 0  0]
      [ 1  2]
      [ 2  4]
      [ 3  6]]
    
     [[ 4  8]
      [ 5 10]
      [ 6 12]
      [ 7 14]]
    
     [[ 8 16]
      [ 9 18]
      [10 20]
      [11 22]]]
    '''
    
    #     
    np.column_stack((a, b))     #     np.hstack((a,b))
    '''
    [[ 0  1  2  3  0  2  4  6]
     [ 4  5  6  7  8 10 12 14]
     [ 8  9 10 11 16 18 20 22]]
    '''
    #     
    np.row_stack((a, b))         #     np.vstack((a,b))
    '''
    [[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]
     [ 0  2  4  6]
     [ 8 10 12 14]
     [16 18 20 22]]
    '''

    配列の分割
  • 均等分割
  • a = np.arange(12).reshape(3, 4)
    '''[[ 0  1  2  3]
     [ 4  5  6  7]
     [ 8  9 10 11]]
     '''
    #     ,            1,2,4
    np.hsplit(a,4)   #     np.split(a, 4, axis=1)    axis:1   ,0   
    '''[array([[0],
           [4],
           [8]]), 
        array([[1],
           [5],
           [9]]), 
        array([[ 2],
           [ 6],
           [10]]),
        array([[ 3],
           [ 7],
           [11]])]
    '''
    #     ,            1,3
    np.vsplit(a,3)   #     np.split(a, 3, axis=0)
    '''[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]'''
  • 非均等分割
  • np.array_split(a,3,axis=1) #     4 , 3 
    '''[array([[0, 1],
           [4, 5],
           [8, 9]]), array([[ 2],
           [ 6],
           [10]]), array([[ 3],
           [ 7],
           [11]])]
    '''
    np.array_split(a,4,axis=0) #     3 , 4 ,           
    '''[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]]), array([], shape=(0, 4), dtype=int64)]'''

    配列の変換
    b = np.array([ 1.+1.j,  3.+2.j])
    #  ndarray     python list  
    list = b.tolist()
    type(list)      # <class 'list'>
    print(list)     # [(1+1j), (3+2j)]
    #  ndarray     python string  
    string = b.tostring()
    type(string)     # <class 'bytes'>
    print(string)
    '''
    b'\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\x00@'
    '''           
    #          ndarray  
    c = np.fromstring(string, dtype=complex)
    c        # [1.+1.j 3.+2.j]
    type(c)  # <class 'numpy.ndarray'>
    c.dtype  # complex128
    
    d = np.fromstring('20:42:52',sep=':', dtype=int)
    d        # [20 42 52]
    d.dtype  # int64