cross validation:クロス検証
10244 ワード
devデータセットはクロス検証に用いられ,異なるアルゴリズムの表現を評価するためにデータセットをテストするのは選択したアルゴリズムの正確度を計算するためである.
def crossValidation(xArr,yArr,numVal = 10):
"""numVal: """
m = len(yArr)
indexList = range(m)
errorMat = np.zeros((numVal,30))
for i in range(numVal):
trainX = []
trainY = []
testX = []
testY = []
random.shuffle(indexList) #
#
for j in range(m):
if j < m*0.9:
trainX.append(xArr[indexList[j]])
trainY.append(yArr[indexList[j]])
else:
testX.append(xArr[indexList[j]])
testY.append(yArr[indexList[j]])
wMat = ridgeTest(trainX,trainY)
for k in range(30):
#
matTestX = np.mat(testX)
matTrainX = np.mat(trainX)
meanTrain = np.mean(matTrainX,0)
varTrain = np.var(matTrainX,0)
matTestX = (matTestX - meanTrain)/varTrain
yEst = matTestX*np.mat(wMat[k,:]).T+np.mean(trainY)
errorMat[i,k] = rssError(yEst.T.A,np.array(testY))
meanErrors = np.mean(errorMat,0)
minMean = float(min(meanErrors))
bestWeights = wMat[np.nonzero(meanErrors==minMean)]
print("the best regression params :",bestWeights)
xMat = np.mat(xArr)
yMat = np.mat(yArr).T
meanX = np.mean(xMat,0)
varX = np.var(xMat,0)
# -
unReg = bestWeights/varX
print('the best model from Ridge Regression is:
',unReg)
print('with constant term',-1*sum(np.multiply(meanX,unReg))+np.mean(yMat))