pandasのテクニック

1205 ワード

1、DataFrame遍歴行と新規行の例
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b']], columns=list('AB'))
#######
add_df = pd.DataFrame(columns=df1.columns)
for index, row in df1.iterrows():
  a_list = row['A'].split(',')
  if len(a_list) > 1:
    for i in a_list:
      row1 = row.copy(deep=True)
      row1['A'] = i
      add_df.loc[len(add_df.index)] = row1

df1 = df1.append(add_df)

2、記録数統計
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b'], ['4', 'b1']], columns=list('AB'))

df1['B'].value_counts()


3、重複記録整理
import pandas as pd
df1 = pd.DataFrame([['1,2,3', 'a'], ['4', 'b'], ['4', 'b'], ['4', 'b1']], columns=list('AB'))

df1 = df1.drop_duplicates(subset=['A','B'], keep='last')


4、Jsonファイルを読み込む
import pandas as pd
with open('data/net.json') as f:
  json_str = f.read()

df1 = pd.read_json(json_str, orient='records')