# -*- coding: utf-8 -*-
#-----------------------------------------------------------------------------------------------------------------------
__Author__ = 'assasin'
__DateTime__ = '2020/1/5 15:13'
#-----------------------------------------------------------------------------------------------------------------------
'''
Numpy
Pandas ,
Pandas
Pandas : , , ,
'''
import numpy as np
import pandas as pd
from numpy import *
def loadDataSet(filepath,delim='\t'):
fr = open(filepath)
stringArr = [line.strip().split(delim) for line in fr.readlines()]
#print(stringArr)
dataArr = [list(map(float,line)) for line in stringArr]
return mat(dataArr)
def replaceNanwithMean(dataArr):
numfeat = shape(dataArr)
for i in range(numfeat[1]-1):
meanVal = mean(dataArr[nonzero((~isnan(dataArr[:,i].A))[0],i)])
dataArr[nonzero(isnan(dataArr[:,i].A))[0],i] = meanVal
return dataArr
if __name__ == '__main__':
#
dataArr = loadDataSet(r'../xxx.txt',' ')
#
replaceNanwithMean(dataArr)
datamat = loadDataSet(r'../xxx.txt',' ')
df = pd.DataFrame(datamat)
#
df = df.reindex(range(datamat.shape[0] + 5 ))
# NAN 0
loassVs = [df[col].mean() for col in range(datamat.shape[1])]
lists = [list(df[i].fillna(loassVs[i])) for i in range(len(loassVs))]
print(mat(lists).T)