【pandas学習ノート】Series

7213 ワード

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Seriesの作成およびSeriesに関するアクション
#       
# np.nan:empty value
>>>s1 = pd.Series([1,2,3,4,np.nan,5,6,7]) 
0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
5    5.0
6    6.0
7    7.0
dtype: float64

>>>s1.values
array([ 1.,  2.,  3.,  4., nan,  5.,  6.,  7.])

>>>s1.index
RangeIndex(start=0, stop=8, step=1)
#       
>>>s2 = pd.Series([21,23,42,21,23],index=['Jack','Lucy','Helen','Milky','Jasper'])
Jack      21
Lucy      23
Helen     42
Milky     21
Jasper    23
dtype: int64

>>>s2['Jack']
21

>>>s2.loc['Jack'] #       
21

>>>s2.iloc[0] #       
21

>>>print(s2.shape,s2.size)
(5,) 5

>>>s2.head(2) #     ,     
Jack    21
Lucy    23
dtype: int64

>>>s2.describe()
count     5.0
mean     26.0
std       9.0
min      21.0
25%      21.0
50%      23.0
75%      23.0
max      42.0
dtype: float64

>>>s2.sort_values() #  values  
Jack      21
Milky     21
Lucy      23
Jasper    23
Helen     42
dtype: int64

>>>s2[s2>22]# Check the people who is older than 22
Lucy      23
Helen     42
Jasper    23
dtype: int64

>>>'Lucy' in s2
True

>>>s2_dict = s2.to_dict() # Series     
{'Helen': 42, 'Jack': 21, 'Jasper': 23, 'Lucy': 23, 'Milky': 21}

>>>s2_series = pd.Series(s2_dict) #      Series
Helen     42
Jack      21
Jasper    23
Lucy      23
Milky     21
dtype: int64

#   dict      ,    
>>>name = ['Jack','Lucy','Helen','Milky','Tom','Jasper','Helen']
>>>s2_new = pd.Series(s2_dict,index = name)
Jack      21.0
Lucy      23.0
Helen     42.0
Milky     21.0
Tom        NaN
Jasper    23.0
Helen     42.0
dtype: float64

>>>s2_new.drop_duplicates() ## drop the duplicate in value   values     ,     
Jack     21.0
Lucy     23.0
Helen    42.0
Tom       NaN
dtype: float64

>>>pd.isnull(s2_new) #        = s2_new.isnull()
Jack      False
Lucy      False
Helen     False
Milky     False
Tom        True
Jasper    False
Helen     False
dtype: bool