Pandas Index更新と計算(Modifying&computations)

15520 ワード

In [1]: import pandas as pd
   ...: df = pd.DataFrame({"  ": [1001,1002,1003,1004,1005],
   ...:                    "name": ["A","B","C","D","E"],
   ...:                  "  ": ["1990/01/02","1992/02/05","1990/05/06","1991/05/30","1992/05/03"],
   ...:                   "  ": ["boy","girl","girl","boy","girl"],
   ...:                    "  ": [56,67,47,87,47]
   ...:                   })
   ...: df
Out[1]: 
  name                            
0    A  1001   boy  56  1990/01/02
1    B  1002  girl  67  1992/02/05
2    C  1003  girl  47  1990/05/06
3    D  1004   boy  87  1991/05/30
4    E  1005  girl  47  1992/05/03

In [2]: df.index.all() #  0       true
Out[2]: False

In [3]: df.index.any()
Out[3]: True

In [4]: df.index.argmin()
Out[4]: 0

In [5]: df.index.argmax()
Out[5]: 4

In [6]: df.index.copy()
Out[6]: RangeIndex(start=0, stop=5, step=1)

In [7]: df.index.delete(1) #        ,   location
Out[7]: Int64Index([0, 2, 3, 4], dtype='int64')

In [8]: df.index.drop(1) #        ,   labels
Out[8]: Int64Index([0, 2, 3, 4], dtype='int64')

In [9]: df.index.drop_duplicates()
Out[9]: RangeIndex(start=0, stop=5, step=1)

In [10]: df.index.duplicated()
Out[10]: array([False, False, False, False, False], dtype=bool)

In [11]: df.index.equals([0,1,2,3,4]) #    ,        false
Out[11]: False

In [12]: df.index.equals(df.index)
Out[12]: True

In [13]: df.index.factorize()
Out[13]: 
(array([0, 1, 2, 3, 4], dtype=int64),
 Int64Index([0, 1, 2, 3, 4], dtype='int64'))

In [14]: df.index.identical(df.index) # equals  ,            
Out[14]: True

In [15]: df.index.min()
Out[15]: 0

In [16]: df.index.max()
Out[16]: 4

In [17]: df.index.reindex([1,2,3,4,5])
Out[17]: 
(Int64Index([1, 2, 3, 4, 5], dtype='int64'),
 array([ 1,  2,  3,  4, -1], dtype=int64))

In [18]: df.reindex([1,2,3,4,5])
Out[18]: 
  name                                
1    B  1002.0  girl  67.0  1992/02/05
2    C  1003.0  girl  47.0  1990/05/06
3    D  1004.0   boy  87.0  1991/05/30
4    E  1005.0  girl  47.0  1992/05/03
5  NaN     NaN   NaN   NaN         NaN

In [19]: df.reset_index()
Out[19]: 
   index name                            
0      0    A  1001   boy  56  1990/01/02
1      1    B  1002  girl  67  1992/02/05
2      2    C  1003  girl  47  1990/05/06
3      3    D  1004   boy  87  1991/05/30
4      4    E  1005  girl  47  1992/05/03

In [20]: df.index.repeat(2)
Out[20]: Int64Index([0, 0, 1, 1, 2, 2, 3, 3, 4, 4], dtype='int64')

In [21]: df.index.where(df.index.values>2,1) #   
Out[21]: Int64Index([1, 1, 1, 3, 4], dtype='int64')

In [22]: df.index.take([1,2,4])
Out[22]: Int64Index([1, 2, 4], dtype='int64')

In [23]: df.index.putmask(df.index>1,0)
Out[23]: Int64Index([0, 1, 0, 0, 0], dtype='int64')

In [24]: df.index.set_names("wang",inplace=True)
    ...: df
Out[24]: 
     name                            
wang                                 
0       A  1001   boy  56  1990/01/02
1       B  1002  girl  67  1992/02/05
2       C  1003  girl  47  1990/05/06
3       D  1004   boy  87  1991/05/30
4       E  1005  girl  47  1992/05/03

In [25]: df.index.unique()
Out[25]: Int64Index([0, 1, 2, 3, 4], dtype='int64', name='wang')

In [26]: df.index.nunique()
Out[26]: 5

In [27]: df.index.value_counts()
Out[27]: 
4    1
3    1
2    1
1    1
0    1
Name: wang, dtype: int64

In [28]: df.index.fillna(2)
Out[28]: RangeIndex(start=0, stop=5, step=1, name='wang')

In [29]: df.index.dropna()
Out[29]: RangeIndex(start=0, stop=5, step=1, name='wang')

注意pd.reindexとpd.index.reindexは同じで、メモリアドレスが異なるのにpd.reset_index()は、新しいインデックスから、新しいインデックスオブジェクトが設定されています.