groupby匿名関数(lambda)特殊状況処理:グループ化結果は1組のみで、行ごとに値を取る必要がある場合

15185 ワード

問題発生前提条件1、groupbyグループ化後、2、匿名関数で操作し、行ごとの値取り操作が必要な場合
try:
   prf = df.groupby('start_time').apply(lambda x: x['B'] - x.iloc[0]['A']) 
except Exception as e:
   print(e)

start_timeには1つの値しかなく、groupbyに結果のセットが1つしかない場合、raiseエラー0
解決策は、groupbyが結果のセットしかない場合、groupbyは関連列を抽出し、匿名関数操作を行う
group_num = len(df.groupby('start_time'))   # groupby       ,    apply
#  group_num = len(df.groupby('start_time').groups)	# .groups       
   if group_num > 1:
       prf = df.groupby('start_time').apply(lambda x: x['B'] - x.iloc[0]['A'])
       prf = prf.reset_index(level=[0])
   elif group_num == 1:
       prf = df.groupby('start_time')[['B', 'A']].apply(lambda x: x['B'] - x.iloc[0]['A'])
   elif group_num == 0:  #       
       print('      ')


付録:groupby関連用法import pandas as pd import numpy as np
#      DataFrame   
data = {'Team': ['Riders', 'Riders', 'Devils', 'Devils', 'Kings',
         'kings', 'Kings', 'Kings', 'Riders', 'Royals', 'Royals', 'Riders'],
         'Rank': [1, 2, 2, 3, 3,4 ,1 ,1,2 , 4,1,2],
         'Year': [2014,2015,2014,2015,2014,2015,2016,2017,2016,2014,2015,2017],
         'Points':[876,789,863,673,741,812,756,7988,64,701,804,690]}
df = pd.DataFrame(data)
print (df)

#       
print (df.groupby('Team'))
#     
print (df.groupby('Team').groups)
#     
print (df.groupby(['Team','Year']).groups)

grouped = df.groupby('Year')
#       
for name,group in grouped:
    print (name)
    print (group)

#       
print('Get group 2014: ')
print (grouped.get_group(2014))

#   
print('Group agg: test mean ')
print (grouped['Points'].agg(np.mean))
#       
agg = grouped['Points'].agg([np.sum, np.mean, np.std])
print (agg)
#   
filter = df.groupby('Team').filter(lambda x: len(x) >= 3)
print (filter)