在 Keras 中使用自定义注意力(层)时,add_weight() 获得了参数“名称”的多个值

Des*_*wal 4 python keras tensorflow tensorflow2.0

(我认为这是因为作者使用的版本冲突keras.engine.topology.Layer

使用tensorflow==2.2.0和keras==2.4.3,我试图学习注意力机制并从某处导入代码:

from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints

from keras.layers import Dense, Input, LSTM, Bidirectional, Activation, Conv1D, GRU, TimeDistributed
from keras.layers import Dropout, Embedding, GlobalMaxPooling1D, MaxPooling1D, Add, Flatten, SpatialDropout1D
from keras.layers import GlobalAveragePooling1D, BatchNormalization, concatenate
from keras.layers import Reshape, merge, Concatenate, Lambda, Average
from keras.models import Sequential, Model
from keras.initializers import Constant
from keras.layers.merge import add


class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')
        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)
        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]
        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None
        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))
        if self.bias:
            eij += self.b
        eij = K.tanh(eij)
        a = K.exp(eij)
        if mask is not None:
            a *= K.cast(mask, K.floatx())
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim
Run Code Online (Sandbox Code Playgroud)

问题是当我尝试使用时,

lstm_layer = LSTM(300, dropout=0.25, recurrent_dropout=0.25, return_sequences=True)

inp = Input(shape=(maxlen,), dtype='int32')
embedding= embedding_layer(inp)
x = lstm_layer(embedding)
x = Dropout(0.25)(x)
merged = Attention(maxlen)(x)
merged = Dense(256, activation='relu')(merged)
merged = Dropout(0.25)(merged)
merged = BatchNormalization()(merged)
outp = Dense(len(int_category), activation='softmax')(merged)

AttentionLSTM = Model(inputs=inp, outputs=outp)
AttentionLSTM.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

AttentionLSTM.summary()
Run Code Online (Sandbox Code Playgroud)

它抛出一个错误TypeError: add_weight() gets multiple values for argument 'name'

错误的完整回溯是:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-25-1ab1f1ef1ed7> in <module>
      5 x = lstm_layer(embedding)
      6 x = Dropout(0.25)(x)
----> 7 merged = Attention(maxlen)(x)
      8 merged = Dense(256, activation='relu')(merged)
      9 merged = Dropout(0.25)(merged)

/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
    895           # Build layer if applicable (if the `build` method has been
    896           # overridden).
--> 897           self._maybe_build(inputs)
    898           cast_inputs = self._maybe_cast_inputs(inputs)
    899 

/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py in _maybe_build(self, inputs)
   2414         # operations.
   2415         with tf_utils.maybe_init_scope(self):
-> 2416           self.build(input_shapes)  # pylint:disable=not-callable
   2417       # We must set also ensure that the layer is marked as built, and the build
   2418       # shape is stored since user defined build functions may not be calling

<ipython-input-20-86a01469b2e5> in build(self, input_shape)
     23                                  name='{}_W'.format(self.name),
     24                                  regularizer=self.W_regularizer,
---> 25                                  constraint=self.W_constraint)
     26         self.features_dim = input_shape[-1]
     27         if self.bias:

TypeError: add_weight() got multiple values for argument 'name'

Run Code Online (Sandbox Code Playgroud)

Jay*_*oti 5

问题的出现是因为 add_weight 函数正在从形状元组中获取值。尝试写“ shape = shape(YOUR INPUT)”。这应该可以解决问题。

  • 你好 Jayesh,这不是一个很有帮助的信息。您能否报告上面的代码,并导入适当的模块并将“shape=shape(input)”正确放置在您的版本中?这会对我们有更多帮助。欢迎来到社区。 (2认同)
  • 很抱歉让您感到困惑,但我认为您已经研究过该问题并完美回答了。 (2认同)