Mar*_*ari 6 python tensorflow attention-model pytorch
在tensorflow 站点的本教程中,我们可以看到实现自动编码器的代码,其解码器如下所示:
class Decoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
super(Decoder, self).__init__()
self.batch_sz = batch_sz
self.dec_units = dec_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(self.dec_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
self.fc = tf.keras.layers.Dense(vocab_size)
# used for attention
self.attention = BahdanauAttention(self.dec_units)
def call(self, x, hidden, enc_output):
# enc_output shape == (batch_size, max_length, hidden_size)
context_vector, attention_weights = self.attention(hidden, enc_output)
# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)
# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
# passing the concatenated vector to the GRU
output, state = self.gru(x)
# output shape == (batch_size * 1, hidden_size)
output = tf.reshape(output, (-1, output.shape[2]))
# output shape == (batch_size, vocab)
x = self.fc(output)
return x, state, attention_weights
Run Code Online (Sandbox Code Playgroud)
将BahdanauAttention被施加到编码器和前面的隐藏状态则concated与输入的查找的输出,然后被馈送到GRU。
而在来自这个 github 存储库的另一个代码(它是使用 pytorch 实现的)中,注意力被应用于GRU:
class DecoderAttn(nn.Module):
def __init__(self, input_size, hidden_size, output_size, out_bias):
super(DecoderAttn, self).__init__()
self.hidden_size = hidden_size
self.input_size = input_size
self.embedding = nn.Embedding(input_size, hidden_size)
self.emb_drop = nn.Dropout(0.2)
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
self.gru_drop = nn.Dropout(0.2)
self.mlp = nn.Linear(hidden_size*2, output_size)
if out_bias is not None:
out_bias_tensor = torch.tensor(out_bias, requires_grad=False)
self.mlp.bias.data[:] = out_bias_tensor
self.logsoftmax = nn.LogSoftmax(dim=2)
self.att_mlp = nn.Linear(hidden_size, hidden_size, bias=False)
self.attn_softmax = nn.Softmax(dim=2)
def forward(self, input, hidden, encoder_outs):
emb = self.embedding(input)
out, hidden = self.gru(self.emb_drop(emb), hidden)
out_proj = self.att_mlp(out)
enc_out_perm = encoder_outs.permute(0, 2, 1)
e_exp = torch.bmm(out_proj, enc_out_perm)
attn = self.attn_softmax(e_exp)
ctx = torch.bmm(attn, encoder_outs)
full_ctx = torch.cat([self.gru_drop(out), ctx], dim=2)
out = self.mlp(full_ctx)
out = self.logsoftmax(out)
return out, hidden, attn
Run Code Online (Sandbox Code Playgroud)
我想知道第二种情况是否是错误的?如果不是错误,它和第一个解码器有什么区别?改变注意力位置如何影响输出?
| 归档时间: |
|
| 查看次数: |
303 次 |
| 最近记录: |