Pandas Index更新と計算(Modifying&computations)
15520 ワード
In [1]: import pandas as pd
...: df = pd.DataFrame({" ": [1001,1002,1003,1004,1005],
...: "name": ["A","B","C","D","E"],
...: " ": ["1990/01/02","1992/02/05","1990/05/06","1991/05/30","1992/05/03"],
...: " ": ["boy","girl","girl","boy","girl"],
...: " ": [56,67,47,87,47]
...: })
...: df
Out[1]:
name
0 A 1001 boy 56 1990/01/02
1 B 1002 girl 67 1992/02/05
2 C 1003 girl 47 1990/05/06
3 D 1004 boy 87 1991/05/30
4 E 1005 girl 47 1992/05/03
In [2]: df.index.all() # 0 true
Out[2]: False
In [3]: df.index.any()
Out[3]: True
In [4]: df.index.argmin()
Out[4]: 0
In [5]: df.index.argmax()
Out[5]: 4
In [6]: df.index.copy()
Out[6]: RangeIndex(start=0, stop=5, step=1)
In [7]: df.index.delete(1) # , location
Out[7]: Int64Index([0, 2, 3, 4], dtype='int64')
In [8]: df.index.drop(1) # , labels
Out[8]: Int64Index([0, 2, 3, 4], dtype='int64')
In [9]: df.index.drop_duplicates()
Out[9]: RangeIndex(start=0, stop=5, step=1)
In [10]: df.index.duplicated()
Out[10]: array([False, False, False, False, False], dtype=bool)
In [11]: df.index.equals([0,1,2,3,4]) # , false
Out[11]: False
In [12]: df.index.equals(df.index)
Out[12]: True
In [13]: df.index.factorize()
Out[13]:
(array([0, 1, 2, 3, 4], dtype=int64),
Int64Index([0, 1, 2, 3, 4], dtype='int64'))
In [14]: df.index.identical(df.index) # equals ,
Out[14]: True
In [15]: df.index.min()
Out[15]: 0
In [16]: df.index.max()
Out[16]: 4
In [17]: df.index.reindex([1,2,3,4,5])
Out[17]:
(Int64Index([1, 2, 3, 4, 5], dtype='int64'),
array([ 1, 2, 3, 4, -1], dtype=int64))
In [18]: df.reindex([1,2,3,4,5])
Out[18]:
name
1 B 1002.0 girl 67.0 1992/02/05
2 C 1003.0 girl 47.0 1990/05/06
3 D 1004.0 boy 87.0 1991/05/30
4 E 1005.0 girl 47.0 1992/05/03
5 NaN NaN NaN NaN NaN
In [19]: df.reset_index()
Out[19]:
index name
0 0 A 1001 boy 56 1990/01/02
1 1 B 1002 girl 67 1992/02/05
2 2 C 1003 girl 47 1990/05/06
3 3 D 1004 boy 87 1991/05/30
4 4 E 1005 girl 47 1992/05/03
In [20]: df.index.repeat(2)
Out[20]: Int64Index([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='int64')
In [21]: df.index.where(df.index.values>2,1) #
Out[21]: Int64Index([1, 1, 1, 3, 4], dtype='int64')
In [22]: df.index.take([1,2,4])
Out[22]: Int64Index([1, 2, 4], dtype='int64')
In [23]: df.index.putmask(df.index>1,0)
Out[23]: Int64Index([0, 1, 0, 0, 0], dtype='int64')
In [24]: df.index.set_names("wang",inplace=True)
...: df
Out[24]:
name
wang
0 A 1001 boy 56 1990/01/02
1 B 1002 girl 67 1992/02/05
2 C 1003 girl 47 1990/05/06
3 D 1004 boy 87 1991/05/30
4 E 1005 girl 47 1992/05/03
In [25]: df.index.unique()
Out[25]: Int64Index([0, 1, 2, 3, 4], dtype='int64', name='wang')
In [26]: df.index.nunique()
Out[26]: 5
In [27]: df.index.value_counts()
Out[27]:
4 1
3 1
2 1
1 1
0 1
Name: wang, dtype: int64
In [28]: df.index.fillna(2)
Out[28]: RangeIndex(start=0, stop=5, step=1, name='wang')
In [29]: df.index.dropna()
Out[29]: RangeIndex(start=0, stop=5, step=1, name='wang')
注意pd.reindexとpd.index.reindexは同じで、メモリアドレスが異なるのにpd.reset_index()は、新しいインデックスから、新しいインデックスオブジェクトが設定されています.