'Issues running a Keras model with custom layers

I am currently working on my bachelor's thesis at FIIT STU, the primary goal of which is to attempt to replicate and verify the results of the following study. The authors of this study use a fake news detection model of their own design. I managed to find the code on GitHub: https://github.com/cuilimeng/dEFEND-web. They use the following dataset, also on GitHub: https://github.com/cuilimeng/CoAID. The data features news articles from the first couple of months of the covid-19 pandemic and IDs of tweets that comment on those articles. I retrieved the tweets from Twitter api, but per their user agreement (or whatever the correct term for that is), I cannot share the contents.

The linked code for the model features a web app in "application.py" and the model itself in "defend.py". The web app doesn't really matter for me, with the latter being the important file. Here are the classes and methods I have been trying to use, and have issues with: (note, I have played around with the code a bit, so there might be slight differences in prints and other non-essential lines, as compared to the original on GitHub)

    def train(self, train_x, train_y, train_c, val_c, val_x, val_y,
              batch_size=20, epochs=10,
              embeddings_path=False,
              saved_model_dir='saved_models', saved_model_filename=None, ):
        # Fit the vocabulary set on the content and comments
        self._fit_on_texts_and_comments(train_x, train_c, val_x, val_c)
        self.model = self._build_model(
            n_classes=train_y.shape[-1],
            embedding_dim=100,
            embeddings_path=embeddings_path)

        # Create encoded input for content and comments
        encoded_train_x = self._encode_texts(train_x)
        encoded_val_x = self._encode_texts(val_x)
        encoded_train_c = self._encode_comments(train_c)
        encoded_val_c = self._encode_comments(val_c)
        callbacks = [
            LambdaCallback(
                on_epoch_end=lambda epoch, logs: self._save_tokenizer_on_epoch_end(
                    os.path.join(saved_model_dir,
                                 self._get_tokenizer_filename(saved_model_filename)), epoch))
        ]

        if saved_model_filename:
            callbacks.append(
                ModelCheckpoint(
                    filepath=os.path.join(saved_model_dir, saved_model_filename),
                    monitor='val_loss',
                    save_best_only=True,
                    save_weights_only=False,
                )
            )
        callbacks.append(self.metrics)

        # """
        print("Input shapes for fit:")
        print(encoded_train_c.shape)
        print(encoded_train_x.shape)
        print(train_y.shape)
        print(encoded_val_c.shape)
        print(encoded_val_x.shape)
        print(val_y.shape)
        # """

        self.model.fit([encoded_train_c, encoded_train_x], y=train_y,
                       validation_data=([encoded_val_c, encoded_val_x], val_y),
                       batch_size=batch_size,
                       epochs=epochs,
                       verbose=1,
                       callbacks=callbacks)

Of particular importance is this bit of code here:

self.model = self._build_model(
            n_classes=train_y.shape[-1],
            embedding_dim=100,
            embeddings_path=embeddings_path)

This is the method it calls:

    def _build_model(self, n_classes=2, embedding_dim=100, embeddings_path=False, aff_dim=80):
        GLOVE_DIR = "."
        embeddings_index = {}
        f = open(os.path.join('./', 'glove.6B.100d.txt'))
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
        f.close()
        word_index = self.tokenizer.word_index
        embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
        for word, i in word_index.items():
            embedding_vector = embeddings_index.get(word)
            if embedding_vector is not None:
                embedding_matrix[i] = embedding_vector

        # embeddings_initializer = initializers.Constant(embedding_matrix),
        embedding_layer = Embedding(len(word_index) + 1,
                                    embedding_dim,
                                    weights=[embedding_matrix],
                                    input_length=self.MAX_SENTENCE_LENGTH,
                                    trainable=True,
                                    mask_zero=True)

        com_embedding_layer = Embedding(len(word_index) + 1,
                                        embedding_dim,
                                        weights=[embedding_matrix],
                                        input_length=self.MAX_COMS_LENGTH,
                                        trainable=True,
                                        mask_zero=True)

        sentence_input = Input(shape=(self.MAX_SENTENCE_LENGTH,), dtype='int32')
        embedded_sequences = embedding_layer(sentence_input)
        l_lstm = Bidirectional(GRU(100, return_sequences=True), name='word_lstm')(embedded_sequences)
        l_att = AttLayer(name='word_attention')(l_lstm)
        sentEncoder = Model(sentence_input, l_att)
        plot_model(sentEncoder, to_file='model_images/SentenceEncoder.png', show_shapes=True)

        self.news_content_word_level_encoder = sentEncoder
        self.news_content_word_level_encoder.summary()

        content_input = Input(shape=(self.MAX_SENTENCE_COUNT, self.MAX_SENTENCE_LENGTH), dtype='int32')
        print(f"content_input shape: {content_input.shape}")
        content_encoder = TimeDistributed(sentEncoder)(content_input)
        print(f"content_encoder shape: {content_encoder.shape}")
        l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)
        content_encoder = Model(content_input, l_lstm_sent)
        plot_model(content_encoder, to_file='model_images/ContentEncoder.png', show_shapes=True)

        self.news_content_sentence_level_encoder = content_encoder
        self.news_content_sentence_level_encoder.summary()

        # learn comments representations
        comment_input = Input(shape=(self.MAX_COMS_LENGTH,), dtype='int32')
        com_embedded_sequences = com_embedding_layer(comment_input)
        c_lstm = Bidirectional(GRU(100, return_sequences=True), name='comment_lstm')(com_embedded_sequences)
        c_att = AttLayer(name='comment_word_attention')(c_lstm)
        comEncoder = Model(comment_input, c_att, name='comment_word_level_encoder')
        plot_model(comEncoder, to_file='model_images/CommentEncoder.png', show_shapes=True)

        self.comment_word_level_encoder = comEncoder
        self.comment_word_level_encoder.summary()

        all_comment_input = Input(shape=(self.MAX_COMS_COUNT, self.MAX_COMS_LENGTH), dtype='int32')
        all_comment_encoder = TimeDistributed(comEncoder, name='comment_sequence_encoder')(all_comment_input)
        allComEncoder = Model(all_comment_input, all_comment_encoder)
        plot_model(allComEncoder, to_file='model_images/AllCommentEncoder.png', show_shapes=True)

        self.comment_sequence_encoder = allComEncoder
        self.comment_sequence_encoder.summary()

        # Co-attention

        L = LLayer(name="co-attention")([all_comment_encoder, l_lstm_sent])
        L_Model = Model([all_comment_input, content_input], L)

        self.co_attention_model = L_Model
        self.co_attention_model.summary()

        plot_model(L_Model, to_file='model_images/l_representation.png', show_shapes=True)

        preds = Dense(2, activation='softmax')(L)
        model = Model(inputs=[all_comment_input, content_input], outputs=preds)
        model.summary()
        plot_model(model, to_file='model_images/CHATT.png', show_shapes=True)

        optimize = RMSprop(lr=0.001)
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimize)

        return model

The _build_model() method uses two custom layers:

class AttLayer(Layer):
    """
    Attention layer used for the calculating attention in word and sentence levels
    """

    def __init__(self, **kwargs):
        super(AttLayer, self).__init__(**kwargs)
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = 100

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim,)))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self._trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)
        ait = K.exp(ait)
        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        print(ait)
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output

    def compute_output_shape(self, input_shape):
        return input_shape[0], input_shape[-1]

and

class LLayer(Layer):
    """
    Co-attention layer which accepts content and comment states and computes co-attention between them and returns the
     the weighted sum of the content and the comment states
    """

    def __init__(self, **kwargs):
        self.init = initializers.get('normal')
        self.latent_dim = 200
        self.k = 80
        super(LLayer, self).__init__(**kwargs)

    def build(self, input_shape, mask=None):
        self.Wl = K.variable(self.init((self.latent_dim, self.latent_dim)))

        self.Wc = K.variable(self.init((self.k, self.latent_dim)))
        self.Ws = K.variable(self.init((self.k, self.latent_dim)))

        self.whs = K.variable(self.init((1, self.k)))
        self.whc = K.variable(self.init((1, self.k)))
        self._trainable_weights = [self.Wl, self.Wc, self.Ws, self.whs, self.whc]

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        comment_rep = x[0]
        sentence_rep = x[1]
        sentence_rep_trans = K.permute_dimensions(sentence_rep, (0, 2, 1))
        comment_rep_trans = K.permute_dimensions(comment_rep, (0, 2, 1))
        L = K.tanh(tf.einsum('btd,dD,bDn->btn', comment_rep, self.Wl, sentence_rep_trans))
        L_trans = K.permute_dimensions(L, (0, 2, 1))

        Hs = K.tanh(tf.einsum('kd,bdn->bkn', self.Ws, sentence_rep_trans) + tf.einsum('kd,bdt,btn->bkn', self.Wc,
                                                                                      comment_rep_trans, L))
        Hc = K.tanh(tf.einsum('kd,bdt->bkt', self.Wc, comment_rep_trans) + tf.einsum('kd,bdn,bnt->bkt', self.Ws,
                                                                                     sentence_rep_trans, L_trans))
        As = K.softmax(tf.einsum('yk,bkn->bn', self.whs, Hs))
        Ac = K.softmax(tf.einsum('yk,bkt->bt', self.whc, Hc))
        co_s = tf.einsum('bdn,bn->bd', sentence_rep_trans, As)
        co_c = tf.einsum('bdt,bt->bd', comment_rep_trans, Ac)
        co_sc = K.concatenate([co_s, co_c], axis=1)

        return co_sc

    def compute_output_shape(self, input_shape):
        return input_shape[0][0], self.latent_dim + self.latent_dim

The last bit of code that I think is important is a custom callback:

class Metrics(Callback):
    def __init__(self, platform):
        self.log_file = open('./Log_Defend_' + platform + '.txt', 'a')

    def on_train_begin(self, logs={}):
        self.val_f1s = []
        self.val_recalls = []
        self.val_precisions = []
        self.val_auc = []
        self.val_acc = []

    def on_epoch_end(self, epoch, logs={}):
        val_predict_onehot = (
            np.asarray(self.model.predict([self.validation_data[0], self.validation_data[1]]))).round()
        val_targ_onehot = self.validation_data[2]
        val_predict = np.argmax(val_predict_onehot, axis=1)
        val_targ = np.argmax(val_targ_onehot, axis=1)
        _val_f1 = f1_score(val_targ, val_predict)
        _val_recall = recall_score(val_targ, val_predict)
        _val_precision = precision_score(val_targ, val_predict)
        _val_auc = roc_auc_score(val_targ, val_predict)
        _val_acc = accuracy_score(val_targ, val_predict)
        self.val_f1s.append(_val_f1)
        self.val_recalls.append(_val_recall)
        self.val_precisions.append(_val_precision)
        self.val_auc.append(_val_auc)
        self.val_acc.append(_val_acc)
        print("Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f" % (
            epoch, _val_acc, _val_precision, _val_recall, _val_f1, _val_auc))
        self.log_file.write(
            "Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f\n" % (epoch,
                                                                                                          _val_acc,
                                                                                                          _val_precision,
                                                                                                          _val_recall,
                                                                                                          _val_f1,
                                                                                                          _val_auc))
        return

As best I could figure out, the model uses glove word embeddings with dim=100, and the shapes for model.fit are the following:

(948, 150, 120) (948, 50, 120) (948, 2) (411, 150, 120) (411, 50, 120) (411, 2)

where 120 is the maximum number of words in a sentence, (missing words are set to 0), 150 is the number of tweets per article, 50 is the number of sentences per article, and 2 is a target value pair for the dense layer with 2 output nodes (I am not exactly sure why the authors use 2 output nodes, but I didn't want to mess with their code too much and so I tried to adjust the data to fit)

The model should look something like this: enter image description here

The time distributed wrapper on the left contains this model: enter image description here

And the one on the right contains this:

enter image description here

Now we can get to the issues:

When I tried running the code on python 3.6 with the requirements per the requirements.txt file from teh model, that is on tensorflow 1.13, the model seemed to build correctly, and I got the following error during fit:

WARNING:tensorflow:From C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 948 samples, validate on 411 samples
Epoch 1/10
Traceback (most recent call last):
  File "<input>", line 1, in <module>
  File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py", line 42, in <module>
    saved_model_filename='covid')
  File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 420, in train
    callbacks=callbacks)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\engine\training.py", line 1039, in fit
    validation_steps=validation_steps)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\engine\training_arrays.py", line 199, in fit_loop
    outs = f(ins_batch)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\backend\tensorflow_backend.py", line 2675, in _call
    fetched = self._callable_fn(*array_vals)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\client\session.py", line 1439, in __call__
    run_metadata_ptr)
  File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 528, in __exit__
    c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Inputs to operation sentence_lstm/while_1/Select_1 of type Select must have the same size and shape.  Input 0: [20,12000] != input 1: [20,100]
     [[{{node sentence_lstm/while_1/Select_1}}]]

As best I can figure out, the issue seems to be caused be the custom AttLayer, but I honestly have no idea.

I also tried running the code on python 3.8 and tensorflow 2, with the following issues, one of which seems to also relate to the same custom layer:

Traceback (most recent call last):
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 1853, in _create_c_op
    c_op = pywrap_tf_session.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 120 and 100 for '{{node while/SelectV2}} = SelectV2[T=DT_FLOAT](while/Tile, while/add_3, while/SelectV2/zeros_like)' with input shapes: [?,120], [?,100], [?,100].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-7e73ed9a0d40>", line 1, in <module>
    runfile('C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py', wdir='C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master')
  File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
    pydev_imports.execfile(filename, global_vars, local_vars)  # execute the script
  File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
    exec(compile(contents+"\n", file, 'exec'), glob, loc)
  File "C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py", line 40, in <module>
    h.train(x_train, y_train, t_train,
  File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 377, in train
    self.model = self._build_model(
  File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 234, in _build_model
    l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 539, in __call__
    return super(Bidirectional, self).__call__(inputs, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 951, in __call__
    return self._functional_construction_call(inputs, args, kwargs,
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1090, in _functional_construction_call
    outputs = self._keras_tensor_symbolic_call(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 822, in _keras_tensor_symbolic_call
    return self._infer_output_signature(inputs, args, kwargs, input_masks)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 863, in _infer_output_signature
    outputs = call_fn(inputs, *args, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 652, in call
    y = self.forward_layer(forward_inputs,
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 660, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1012, in __call__
    outputs = call_fn(inputs, *args, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 470, in call
    last_output, outputs, runtime, states = self._defun_gru_call(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 546, in _defun_gru_call
    last_output, outputs, new_h, runtime = gru_with_backend_selection(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 841, in gru_with_backend_selection
    last_output, outputs, new_h, runtime = defun_standard_gru(**params)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 2941, in __call__
    filtered_flat_args) = self._maybe_define_function(args, kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 3361, in _maybe_define_function
    graph_function = self._create_graph_function(args, kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 3196, in _create_graph_function
    func_graph_module.func_graph_from_py_func(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 990, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 621, in standard_gru
    last_output, outputs, new_states = K.rnn(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4459, in rnn
    final_outputs = control_flow_ops.while_loop(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2687, in while_loop
    return while_v2.while_loop(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\while_v2.py", line 192, in while_loop
    body_graph = func_graph_module.func_graph_from_py_func(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 990, in func_graph_from_py_func
    func_outputs = python_func(*func_args, **func_kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\while_v2.py", line 178, in wrapped_body
    outputs = body(*_pack_sequence_as(orig_loop_vars, args))
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4440, in _step
    flat_new_output = compute_masked_output(mask_t, flat_output,
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4391, in compute_masked_output
    return tuple(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4392, in <genexpr>
    array_ops.where_v2(m, o, fm)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
    return target(*args, **kwargs)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\array_ops.py", line 4598, in where_v2
    return gen_math_ops.select_v2(condition=condition, t=x, e=y, name=name)
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 8742, in select_v2
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 748, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 590, in _create_op_internal
    return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 3528, in _create_op_internal
    ret = Operation(
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 2015, in __init__
    self._c_op = _create_c_op(self._graph, node_def, inputs,
  File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 1856, in _create_c_op
    raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 120 and 100 for '{{node while/SelectV2}} = SelectV2[T=DT_FLOAT](while/Tile, while/add_3, while/SelectV2/zeros_like)' with input shapes: [?,120], [?,100], [?,100].

It fails on this line of code

l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)

so I believe it is also caused by the previous layer, which is a timedistributed wrapper of the model ending with the custom AttLayer.

Any help is appreciated!



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source