python numpyとpandasの集約パケットデータ処理と分析
2068 ワード
import numpy as np
import pandas as pd
'''
:
count NA
sum NA
mean NA
median NA
std、var ( n-1)
min、max NA
prod NA
first、last NA
'''
data=pd.DataFrame({'level':['a','b','c','b','a'],
'num':[3,5,6,8,9]})
combine=data['num'].groupby(data['level'])
print(combine.sum())
#
print(' :',combine.mean())
#
print(' :',combine.size())
#
combine_1=data.groupby(data.dtypes,axis=1)
print(' :',dict(list(combine_1)))# combine_1 Serise , ,
#
new_data=data.groupby('level')
print(' ',new_data.agg('mean'))
#
data_2=pd.DataFrame({'level':['a','b','c','b','a'],
'num':[3,5,6,8,9],
'num1':[2,5,9,6,8]})
new_data_2=data_2.groupby('level')
print(' :',new_data_2.agg({'num':'mean','num1':'sum'}))
#transform
np_data=np.random.rand(5,5)
print("transform :",np_data)
'''
np.random.randn(d0,d1,d2……dn)
1) , ;
2) , 1 , ;
3) , , ;
4)np.random.standard_normal() np.random.randn() , np.random.standard_normal()
(tuple).
5)np.random.randn() , , 。
'''
#
data_3=pd.DataFrame({'level':['a','b','c','b','a'],
'key':['one','two','one','two','one'],
'num':[3,5,6,8,9],
'num1':[2,5,9,6,8]})
print(data_3)
# key ,columns
print(data_3.pivot_table(index='key',columns='level'))
# , ,
print(pd.crosstab(data_3.key,data_3.level,margins=True))