python-pandaモジュール学習

11228 ワード

import pandas as pd
import numpy as np



#    
# population={
     'beijing':1242,'shanghai':423142,'guangzhou':244221}
# gdp={
     'beijing':122,'shanghai':4242,'guangzhou':4221}
# a=pd.DataFrame({
     'pop':population,'gdp':gdp})
#
# print(a['gdp'])#a gdp   
# print(a.gdp)#  a.  ,    
# print(a.loc['beijing'])#   loc  
# print(a.loc[['beijing','shanghai']])#   ,         
# a.iloc[0,1]=0# 1     
# s=pd.Series([1,1,2],index=['beijing','shanghai','guangzhou'])
# a['e']=s#     e   
# print(a)


#    
# dates=pd.date_range(start='2020-1-1',periods=6)
# df=pd.DataFrame(np.random.randint(0,10,(6,4)),index=dates,columns=['A','B','C','D'])
# # df.describe()#    、   、   、  
# df.info()#        ,    ,        
# df.head()#            
# df.tail()#      
# df.T#      
# # df.sort_index()#       
# # df.sort_values()#      
# print(df.info())
# print(df.head(2))#          
# print(df.tail(3))#         
# print(df.T)
# print(df.sort_index(axis=0))
# print(df.sort_index(axis=1))
# print(df.sort_values('B'))


#  
# a=pd.DataFrame([1,2,3])
# # print(a-2)#      2
# b=pd.DataFrame([1,2,3])
# # print(a+b)#         
# c=pd.DataFrame(np.random.randint(1,10,size=(1,3)))
# print(a@c)#    
# d=pd.DataFrame(np.arange(9).reshape((3,3)),columns=['a','s','d'])
# e=pd.DataFrame(np.random.randint(10,(2,2)),columns=['a','s'])
# # print(d+e)#       
# print(e.add(d,fill_value=0))#       0



#   
# a=pd.DataFrame(np.arange(9).reshape((3,3)))
# print(a)
# a.iloc[:2,2]=np.nan#             np.nan      
#
# print(a)
# a.dropna(axis=1,how='all')#        ,       
# print(a)
# a.fillna(value=1)#          1  
# print(a)


#    
# a=pd.DataFrame(np.zeros((3,4)),columns=['a','b','c','d'])
# b=pd.DataFrame(np.zeros((3,4)),columns=['a','b','c','d'])
# print(pd.concat([a,b]))#    
# print(pd.concat([a,b],ignore_index=True))#      ,       
# print(pd.concat([a,b],axis=1))#    
# pd.merge(a,b)


#  
df=pd.DataFrame({
     'key':list('ABCCBA'),'data1':range(6),'data2':range(20,26)})
print(df)
print(df.groupby('key').sum())
print(df.groupby('key').mean())