Pytorch:K折り返し交差検証を実現
K折交差検証
1.k折交差検証の概要
k折交差検証は、訓練セットと試験セットの区分に用いられ、主に訓練セット/試験セットを区分する違いがモデルの精度に明らかな変化をもたらす問題を解決するために用いられ、訓練データの過程に用いられ、データ量が十分でない場合が多い.
2.コード実現構想
2.1データの準備
# #
x = torch.rand(100,28,28)
y = torch.randn(100,28,28)
x = torch.cat((x,y),dim=0)
label =[1] *100 + [0]*100
label = torch.tensor(label,dtype=torch.long)
index = [i for i in range(len(x))] #
random.shuffle(index)
x = x[index]
label = label[index]
2.2ネットワークモデルとデータセットクラスの定義
# Model
class Net(nn.Module):
...
# DataSet
class TraindataSet(Dataset):
def __init__(self,train_features,train_labels):
...
2.3 K-Flodトレーニング(最重要)
k_fold(10,x,label) # k=10,
def k_fold(k, X_train, y_train, num_epochs=3,learning_rate=0.001, weight_decay=0.1, batch_size=5):
train_loss_sum, test_loss_sum = 0, 0
train_acc_sum ,test_acc_sum = 0,0
for i in range(k):
data = get_k_fold_data(k, i, X_train, y_train) # i
net = Net() # K
#
train_ls, test_ls = train(net, *data, num_epochs, learning_rate,\
weight_decay, batch_size)
... #
...
... #
...
k_flodは、k回のネットワークトレーニングおよび対応するテストセットテストを実行し、このk回のテスト結果を平均出力する.k_flod関数は、K値、すべてのデータセット、および他のネットワークトレーニングパラメータを受け入れる必要がある.k_flodは始まりであり最終的な終わりでもある.
2.4 K分割
def get_k_fold_data(k, i, X, y):
# i , ,X_train ,X_test
assert k > 1
fold_size = X.shape[0] // k # : / ( )
X_train, y_train = None, None
for j in range(k):
idx = slice(j * fold_size, (j + 1) * fold_size) # slice(start,end,step)
X_part, y_part = X[idx, :], y[idx] #
if j == i: # i test
X_test, y_test = X_part, y_part
elif X_train is None:
X_train, y_train = X_part, y_part
else:
X_train = torch.cat((X_train, X_part), dim=0) #
y_train = torch.cat((y_train, y_part), dim=0)
return X_train, y_train, X_test, y_test
2.5単折訓練過程
loss_func = nn.CrossEntropyLoss() # Loss
def train(net, X_train, y_train, X_test, y_test, num_epochs, learning_rate,weight_decay, batch_size):
train_ls, test_ls = [], [] # train_loss,test_loss
#
dataset = TraindataSet(X_train, y_train)
train_iter = DataLoader(dataset, batch_size, shuffle=True)
# Adam
optimizer = torch.optim.Adam(params=net.parameters(), lr= learning_rate, weight_decay=weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter: #
output = net(X)
loss = loss_func(output,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
### epoch loss accuracy
#
return train_ls, test_ls
2.1データの準備
# #
x = torch.rand(100,28,28)
y = torch.randn(100,28,28)
x = torch.cat((x,y),dim=0)
label =[1] *100 + [0]*100
label = torch.tensor(label,dtype=torch.long)
index = [i for i in range(len(x))] #
random.shuffle(index)
x = x[index]
label = label[index]
2.2ネットワークモデルとデータセットクラスの定義
# Model
class Net(nn.Module):
...
# DataSet
class TraindataSet(Dataset):
def __init__(self,train_features,train_labels):
...
2.3 K-Flodトレーニング(最重要)
k_fold(10,x,label) # k=10,
def k_fold(k, X_train, y_train, num_epochs=3,learning_rate=0.001, weight_decay=0.1, batch_size=5):
train_loss_sum, test_loss_sum = 0, 0
train_acc_sum ,test_acc_sum = 0,0
for i in range(k):
data = get_k_fold_data(k, i, X_train, y_train) # i
net = Net() # K
#
train_ls, test_ls = train(net, *data, num_epochs, learning_rate,\
weight_decay, batch_size)
... #
...
... #
...
k_flodは、k回のネットワークトレーニングおよび対応するテストセットテストを実行し、このk回のテスト結果を平均出力する.k_flod関数は、K値、すべてのデータセット、および他のネットワークトレーニングパラメータを受け入れる必要がある.k_flodは始まりであり最終的な終わりでもある.
2.4 K分割
def get_k_fold_data(k, i, X, y):
# i , ,X_train ,X_test
assert k > 1
fold_size = X.shape[0] // k # : / ( )
X_train, y_train = None, None
for j in range(k):
idx = slice(j * fold_size, (j + 1) * fold_size) # slice(start,end,step)
X_part, y_part = X[idx, :], y[idx] #
if j == i: # i test
X_test, y_test = X_part, y_part
elif X_train is None:
X_train, y_train = X_part, y_part
else:
X_train = torch.cat((X_train, X_part), dim=0) #
y_train = torch.cat((y_train, y_part), dim=0)
return X_train, y_train, X_test, y_test
2.5単折訓練過程
loss_func = nn.CrossEntropyLoss() # Loss
def train(net, X_train, y_train, X_test, y_test, num_epochs, learning_rate,weight_decay, batch_size):
train_ls, test_ls = [], [] # train_loss,test_loss
#
dataset = TraindataSet(X_train, y_train)
train_iter = DataLoader(dataset, batch_size, shuffle=True)
# Adam
optimizer = torch.optim.Adam(params=net.parameters(), lr= learning_rate, weight_decay=weight_decay)
for epoch in range(num_epochs):
for X, y in train_iter: #
output = net(X)
loss = loss_func(output,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
### epoch loss accuracy
#
return train_ls, test_ls