Week4 Day2
20523 ワード
📋 BasicBasicBasic ofofof RecurrentRecurrentRecurrent NeuralNeuralNeural NetworksNetworksNetworks
📌 RecurrentRecurrentRecurrent NeuralNeuralNeural NetworkNetworkNetwork
入力データと前ステップの状態状態状態状態を用いて
# B: batch size, L: maximum sequence length
vocab_size = 100
batch = torch.LongTensor(data) # (B, L)
embedding_size = 256
embedding = nn.Embedding(vocab_size, embedding_size)
# d_w: embedding size
batch_emb = embedding(batch) # (B, L, d_w)
hidden_size = 512 # RNN의 hidden size
num_layers = 1 # 쌓을 RNN layer의 개수
num_dirs = 1 # 1: 단방향 RNN, 2: 양방향 RNN
rnn = nn.RNN(
input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=True if num_dirs > 1 else False
)
h_0 = torch.zeros((num_layers * num_dirs, batch.shape[0], hidden_size)) # (num_layers * num_dirs, B, d_h)
hidden_states, h_n = rnn(batch_emb.transpose(0, 1), h_0)
num_classes = 2
classification_layer = nn.Linear(hidden_size, num_classes)
# C: number of classes
output = classification_layer(h_n.squeeze(0)) # (1, B, d_h) => (B, C)
📌 BackpropagationBackpropagationBackpropagation throughthroughthrough timetimetime
📌 LongLongLong ShortShortShort-TermTermTerm MemoryMemoryMemory
cellcellcell statestateを作成し、変更せずに情報を渡す
class RecurrentNeuralNetworkClass(nn.Module):
def __init__(self,name='rnn',xdim=28,hdim=256,ydim=10,n_layer=3):
super(RecurrentNeuralNetworkClass,self).__init__()
self.name = name
self.xdim = xdim
self.hdim = hdim
self.ydim = ydim
self.n_layer = n_layer # K
self.rnn = nn.LSTM(
input_size=self.xdim,hidden_size=self.hdim,num_layers=self.n_layer,batch_first=True)
self.lin = nn.Linear(self.hdim,self.ydim)
def forward(self,x):
# Set initial hidden and cell states
h0 = torch.zeros(self.n_layer, x.size(0), self.hdim).to(device)
c0 = torch.zeros(self.n_layer, x.size(0), self.hdim).to(device)
# RNN
rnn_out,(hn,cn) = self.rnn(x, (h0,c0))
# x:[N x L x Q] => rnn_out:[N x L x D]
# Linear
out = self.lin(rnn_out[:,-1 :]).view([-1,self.ydim])
return out
R = RecurrentNeuralNetworkClass(
name='rnn',xdim=28,hdim=256,ydim=10,n_layer=2).to(device)
loss = nn.CrossEntropyLoss()
optm = optim.Adam(R.parameters(),lr=1e-3)
print ("Done.")
x_numpy = np.random.rand(2,20,28) # [N x L x Q]
x_torch = torch.from_numpy(x_numpy).float().to(device)
rnn_out,(hn,cn) = R.rnn.forward(x_torch) # forward path
out = R.lin.forward(rnn_out[:,-1:]).view([-1,ydim])
📌 GRUGRUGRU
gru = nn.GRU(
input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=True if num_dirs > 1 else False
)
output_layer = nn.Linear(hidden_size, vocab_size)
📌 LanguageLanguageLanguage ModelingModelingModeling
以前の情報を使用して、
for t in range(max_len):
input_emb = embedding(input_id).unsqueeze(0) # (1, B, d_w)
output, hidden = gru(input_emb, hidden) # output: (1, B, d_h), hidden: (1, B, d_h)
# V: vocab size
output = output_layer(output) # (1, B, V)
probs, top_id = torch.max(output, dim=-1) # probs: (1, B), top_id: (1, B)
print("*" * 50)
print(f"Time step: {t}")
print(output.shape)
print(probs.shape)
print(top_id.shape)
input_id = top_id.squeeze(0) # (B)
Reference
この問題について(Week4 Day2), 我々は、より多くの情報をここで見つけました https://velog.io/@zero217/Week4-Day2テキストは自由に共有またはコピーできます。ただし、このドキュメントのURLは参考URLとして残しておいてください。
Collection and Share based on the CC Protocol