pandasデータピボットとクロステーブルクラスタリング分析

1328 ワード

     Dataframe df,                  
①  A   ,  C,D    
②  A,B   ,  D,E   、  
③  B  ,  A      

df = pd.DataFrame({'A':['one','two','three','one','two','three','one','two'],
                   'B':['h','h','h','h','f','f','f','f'],
                   'C':np.linspace(10,24,num=8,dtype=np.int),
                   'D':np.random.rand(8)*-1,
                  'E':np.random.rand(8)})
print(df)
      A   B   C         D         E
0    one  h  10 -0.496740  0.775771
1    two  h  12 -0.512694  0.589485
2  three  h  14 -0.436854  0.584725
3    one  h  16 -0.620049  0.950842
4    two  f  18 -0.864078  0.137051
5  three  f  20 -0.064089  0.658747
6    one  f  22 -0.878971  0.499687
7    two  f  24 -0.175620  0.485176

  1
pd.pivot_table(df,values=['C','D'],index=['A'], aggfunc=np.mean)

    C   D
A       
one 16  -0.665254
three   17  -0.250471
two 18  -0.517464

  2
pd.pivot_table(df,values=['D','E'],index=['A','B'], aggfunc=[np.mean,np.sum])

  3
pd.crosstab(df['B'],df['A'])
A   one three   two
B           
f   1   1   2
h   2   1   1

1555491903(1).jpg