2.8データ-paddlepaddleデータセットuci_housing
4877 ワード
UCI Housingデータセットモジュールはhttps://archive.ics.uci.edu/ml/machine-learning-databases/housing/データセットをダウンロードし、トレーニングセットとテストセットをpaddle reader creator に解析します.各サンプルは正規化と価格番号後の特徴 である.
paddle.dataset.uci_housing:https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/data/dataset_cn/uci_housing_cn.html
paddle.dataset.uci_housing:https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/data/dataset_cn/uci_housing_cn.html
import paddle
import paddle.fluid as fluid
import numpy as np
import paddle.dataset.uci_housing as uci_housing
# reader creator,reader
train=uci_housing.train() # .reader()>
'''
[==================================================]i_housing/housing.data not found, downloading http://paddlemodels.bj.bcebos.com/uci_housing/housing.data
'''
test=uci_housing.test() # .reader()>
a_sample=next(train())
print(len(a_sample)) # 2
print(a_sample[1]) # [24.]
print(a_sample[0].shape) # (13,)
print(a_sample[0]) # [-0.0405441 0.06636364 -0.32356227 -0.06916996 -0.03435197 0.05563625 -0.03475696 0.02682186 -0.37171335 -0.21419304 -0.33569506 0.10143217 -0.21172912]
len(uci_housing.feature_names) # 13
uci_housing.feature_names
'''
['CRIM',
'ZN',
'INDUS',
'CHAS',
'NOX',
'RM',
'AGE',
'DIS',
'RAD',
'TAX',
'PTRATIO',
'B',
'LSTAT']
'''
# 13 var x ,1 var y
# uci_housing.UCI_TEST_DATA.shape:(102, 14)
# uci_housing.UCI_TRAIN_DATA.shape:(404, 14)
uci_housing.UCI_TEST_DATA
'''
array([[ 0.42616306, -0.11363636, 0.25525005, ..., -0.0686218 ,
0.40637243, 8.5 ],
[ 0.72279828, -0.11363636, 0.25525005, ..., 0.07134996,
0.28495962, 5. ],
[ 0.19222996, -0.11363636, 0.25525005, ..., 0.03415696,
0.2948934 , 11.9 ],
...,
[-0.03993221, -0.11363636, 0.02907703, ..., 0.10143217,
-0.1935172 , 23.9 ],
[-0.03938337, -0.11363636, 0.02907703, ..., 0.09273279,
-0.17033839, 22. ],
[-0.04008226, -0.11363636, 0.02907703, ..., 0.10143217,
-0.13170704, 11.9 ]])
'''