cross validation:クロス検証

10244 ワード

devデータセットはクロス検証に用いられ,異なるアルゴリズムの表現を評価するためにデータセットをテストするのは選択したアルゴリズムの正確度を計算するためである.
def crossValidation(xArr,yArr,numVal = 10):
    """numVal:       """
    m = len(yArr)
    indexList = range(m)
    errorMat = np.zeros((numVal,30))
    for i in range(numVal):
        trainX = []
        trainY = []
        testX = []
        testY = []
        random.shuffle(indexList)   #              
        #         
        for j in range(m):
            if j < m*0.9:
                trainX.append(xArr[indexList[j]])
                trainY.append(yArr[indexList[j]])
            else:
                testX.append(xArr[indexList[j]])
                testY.append(yArr[indexList[j]])
        wMat = ridgeTest(trainX,trainY)
        for k in range(30):
            #               
            matTestX = np.mat(testX)
            matTrainX = np.mat(trainX)
            meanTrain = np.mean(matTrainX,0)
            varTrain = np.var(matTrainX,0)
            matTestX = (matTestX - meanTrain)/varTrain
            yEst = matTestX*np.mat(wMat[k,:]).T+np.mean(trainY)
            errorMat[i,k] = rssError(yEst.T.A,np.array(testY))
    meanErrors = np.mean(errorMat,0)
    minMean = float(min(meanErrors))
    bestWeights = wMat[np.nonzero(meanErrors==minMean)]
    print("the best regression params :",bestWeights)
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    meanX = np.mean(xMat,0)
    varX = np.var(xMat,0)
    #    -                  
    unReg = bestWeights/varX
    print('the best model from Ridge Regression is:
'
,unReg) print('with constant term',-1*sum(np.multiply(meanX,unReg))+np.mean(yMat))