python numpyとpandasの集約パケットデータ処理と分析

2068 ワード

import numpy as np
import pandas as pd
'''
    :
count      NA    
sum     NA   
mean  NA     
median  NA       
std、var  (   n-1)      
min、max  NA         
prod  NA   
first、last          NA 
'''
data=pd.DataFrame({'level':['a','b','c','b','a'],
               'num':[3,5,6,8,9]})
combine=data['num'].groupby(data['level'])
print(combine.sum())
#      
print('      :',combine.mean())
#         
print('         :',combine.size())
#              
combine_1=data.groupby(data.dtypes,axis=1)
print('            :',dict(list(combine_1)))#  combine_1  Serise    ,          ,            
#    
new_data=data.groupby('level')
print('        ',new_data.agg('mean'))
#            
data_2=pd.DataFrame({'level':['a','b','c','b','a'],
               'num':[3,5,6,8,9],
               'num1':[2,5,9,6,8]})
new_data_2=data_2.groupby('level')
print('         :',new_data_2.agg({'num':'mean','num1':'sum'}))
#transform   
np_data=np.random.rand(5,5)
print("transform   :",np_data)
'''
np.random.randn(d0,d1,d2……dn) 
1)           ,        ; 
2)            ,     1   ,         ; 
3)               ,          ,        ; 
4)np.random.standard_normal()   np.random.randn()  ,  np.random.standard_normal()
        (tuple). 
5)np.random.randn()        ,        ,             。
'''
#         
data_3=pd.DataFrame({'level':['a','b','c','b','a'],
               'key':['one','two','one','two','one'],
               'num':[3,5,6,8,9],
               'num1':[2,5,9,6,8]})
print(data_3)
# key         ,columns   
print(data_3.pivot_table(index='key',columns='level'))
#      ,       ,         
print(pd.crosstab(data_3.key,data_3.level,margins=True))