Week4 Day3
18885 ワード
📋 SequenceSequenceSequence tototo SequenceSequenceSequence withwithwith AttentionAttentionAttention
📌 Seq2SeqSeq2SeqSeq2Seq ModelModelModel
📌Seq2SeqSeq2SeqSeq2Seq ModelModelModel withwithwith AttentionAttentionAttention
各時間ラベル上のデコーダがいくつかの重要なソースソースソースシーケンスに集中できるように、注意注意注意注意注意注意注意注意注意注意メカニズムが必要である.
class DotAttention(nn.Module):
def __init__(self):
super().__init__()
def forward(self, decoder_hidden, encoder_outputs): # (1, B, d_h), (S_L, B, d_h)
query = decoder_hidden.squeeze(0) # (B, d_h)
key = encoder_outputs.transpose(0, 1) # (B, S_L, d_h)
energy = torch.sum(torch.mul(key, query.unsqueeze(1)), dim=-1) # (B, S_L)
attn_scores = F.softmax(energy, dim=-1) # (B, S_L)
attn_values = torch.sum(torch.mul(encoder_outputs.transpose(0, 1), attn_scores.unsqueeze(2)), dim=1) # (B, d_h)
return attn_values, attn_scores
class Decoder(nn.Module):
def forward(self, batch, encoder_outputs, hidden):
outputs, hidden = self.rnn(batch_emb, hidden) # (1, B, d_h), (1, B, d_h)
attn_values, attn_scores = self.attention(hidden, encoder_outputs) # (B, d_h), (B, S_L)
concat_outputs = torch.cat((outputs, attn_values.unsqueeze(0)), dim=-1) # (1, B, 2d_h)
return self.output_linear(concat_outputs).squeeze(0), hidden # (B, V), (1, B, d_h)
📌 DifferenctDifferenctDifferenct AttentionAttentionAttention MechanismsMechanismsMechanisms
class ConcatAttention(nn.Module):
def __init__(self):
super().__init__()
self.w = nn.Linear(2*hidden_size, hidden_size, bias=False)
self.v = nn.Linear(hidden_size, 1, bias=False)
def forward(self, decoder_hidden, encoder_outputs): # (1, B, d_h), (S_L, B, d_h)
src_max_len = encoder_outputs.shape[0]
decoder_hidden = decoder_hidden.transpose(0, 1).repeat(1, src_max_len, 1) # (B, S_L, d_h)
encoder_outputs = encoder_outputs.transpose(0, 1) # (B, S_L, d_h)
concat_hiddens = torch.cat((decoder_hidden, encoder_outputs), dim=2) # (B, S_L, 2d_h)
energy = torch.tanh(self.w(concat_hiddens)) # (B, S_L, d_h)
attn_scores = F.softmax(self.v(energy), dim=1) # (B, S_L, 1)
attn_values = torch.sum(torch.mul(encoder_outputs, attn_scores), dim=1) # (B, d_h)
return attn_values, attn_scores
class Decoder(nn.Module):
def __init__(self, attention):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_size)
self.attention = attention
self.rnn = nn.GRU(
embedding_size + hidden_size,
hidden_size
)
self.output_linear = nn.Linear(hidden_size, vocab_size)
def forward(self, batch, encoder_outputs, hidden): # batch: (B), encoder_outputs: (S_L, B, d_h), hidden: (1, B, d_h)
batch_emb = self.embedding(batch) # (B, d_w)
batch_emb = batch_emb.unsqueeze(0) # (1, B, d_w)
attn_values, attn_scores = self.attention(hidden, encoder_outputs) # (B, d_h), (B, S_L)
concat_emb = torch.cat((batch_emb, attn_values.unsqueeze(0)), dim=-1) # (1, B, d_w+d_h)
outputs, hidden = self.rnn(concat_emb, hidden) # (1, B, d_h), (1, B, d_h)
return self.output_linear(outputs).squeeze(0), hidden # (B, V), (1, B, d_h)
📋 BeamBeamBeam searchsearchsearch
📌 BeamBeamBeam searchsearchsearch
📋 BLEUBLEUBLEU scorescorescore
📌 PrecisionPrecisionPrecision andandand RecallRecallRecall
📌 BiLingualBiLingualBiLingual EvaluationEvaluationEvaluation UnderstudyUnderstudyUnderstudy (BLEU)(BLEU)(BLEU)
Reference
この問題について(Week4 Day3), 我々は、より多くの情報をここで見つけました https://velog.io/@zero217/Week4-Day3テキストは自由に共有またはコピーできます。ただし、このドキュメントのURLは参考URLとして残しておいてください。
Collection and Share based on the CC Protocol