'Issues running a Keras model with custom layers
I am currently working on my bachelor's thesis at FIIT STU, the primary goal of which is to attempt to replicate and verify the results of the following study. The authors of this study use a fake news detection model of their own design. I managed to find the code on GitHub: https://github.com/cuilimeng/dEFEND-web. They use the following dataset, also on GitHub: https://github.com/cuilimeng/CoAID. The data features news articles from the first couple of months of the covid-19 pandemic and IDs of tweets that comment on those articles. I retrieved the tweets from Twitter api, but per their user agreement (or whatever the correct term for that is), I cannot share the contents.
The linked code for the model features a web app in "application.py" and the model itself in "defend.py". The web app doesn't really matter for me, with the latter being the important file. Here are the classes and methods I have been trying to use, and have issues with: (note, I have played around with the code a bit, so there might be slight differences in prints and other non-essential lines, as compared to the original on GitHub)
def train(self, train_x, train_y, train_c, val_c, val_x, val_y,
batch_size=20, epochs=10,
embeddings_path=False,
saved_model_dir='saved_models', saved_model_filename=None, ):
# Fit the vocabulary set on the content and comments
self._fit_on_texts_and_comments(train_x, train_c, val_x, val_c)
self.model = self._build_model(
n_classes=train_y.shape[-1],
embedding_dim=100,
embeddings_path=embeddings_path)
# Create encoded input for content and comments
encoded_train_x = self._encode_texts(train_x)
encoded_val_x = self._encode_texts(val_x)
encoded_train_c = self._encode_comments(train_c)
encoded_val_c = self._encode_comments(val_c)
callbacks = [
LambdaCallback(
on_epoch_end=lambda epoch, logs: self._save_tokenizer_on_epoch_end(
os.path.join(saved_model_dir,
self._get_tokenizer_filename(saved_model_filename)), epoch))
]
if saved_model_filename:
callbacks.append(
ModelCheckpoint(
filepath=os.path.join(saved_model_dir, saved_model_filename),
monitor='val_loss',
save_best_only=True,
save_weights_only=False,
)
)
callbacks.append(self.metrics)
# """
print("Input shapes for fit:")
print(encoded_train_c.shape)
print(encoded_train_x.shape)
print(train_y.shape)
print(encoded_val_c.shape)
print(encoded_val_x.shape)
print(val_y.shape)
# """
self.model.fit([encoded_train_c, encoded_train_x], y=train_y,
validation_data=([encoded_val_c, encoded_val_x], val_y),
batch_size=batch_size,
epochs=epochs,
verbose=1,
callbacks=callbacks)
Of particular importance is this bit of code here:
self.model = self._build_model(
n_classes=train_y.shape[-1],
embedding_dim=100,
embeddings_path=embeddings_path)
This is the method it calls:
def _build_model(self, n_classes=2, embedding_dim=100, embeddings_path=False, aff_dim=80):
GLOVE_DIR = "."
embeddings_index = {}
f = open(os.path.join('./', 'glove.6B.100d.txt'))
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
word_index = self.tokenizer.word_index
embedding_matrix = np.random.random((len(word_index) + 1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
# embeddings_initializer = initializers.Constant(embedding_matrix),
embedding_layer = Embedding(len(word_index) + 1,
embedding_dim,
weights=[embedding_matrix],
input_length=self.MAX_SENTENCE_LENGTH,
trainable=True,
mask_zero=True)
com_embedding_layer = Embedding(len(word_index) + 1,
embedding_dim,
weights=[embedding_matrix],
input_length=self.MAX_COMS_LENGTH,
trainable=True,
mask_zero=True)
sentence_input = Input(shape=(self.MAX_SENTENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = Bidirectional(GRU(100, return_sequences=True), name='word_lstm')(embedded_sequences)
l_att = AttLayer(name='word_attention')(l_lstm)
sentEncoder = Model(sentence_input, l_att)
plot_model(sentEncoder, to_file='model_images/SentenceEncoder.png', show_shapes=True)
self.news_content_word_level_encoder = sentEncoder
self.news_content_word_level_encoder.summary()
content_input = Input(shape=(self.MAX_SENTENCE_COUNT, self.MAX_SENTENCE_LENGTH), dtype='int32')
print(f"content_input shape: {content_input.shape}")
content_encoder = TimeDistributed(sentEncoder)(content_input)
print(f"content_encoder shape: {content_encoder.shape}")
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)
content_encoder = Model(content_input, l_lstm_sent)
plot_model(content_encoder, to_file='model_images/ContentEncoder.png', show_shapes=True)
self.news_content_sentence_level_encoder = content_encoder
self.news_content_sentence_level_encoder.summary()
# learn comments representations
comment_input = Input(shape=(self.MAX_COMS_LENGTH,), dtype='int32')
com_embedded_sequences = com_embedding_layer(comment_input)
c_lstm = Bidirectional(GRU(100, return_sequences=True), name='comment_lstm')(com_embedded_sequences)
c_att = AttLayer(name='comment_word_attention')(c_lstm)
comEncoder = Model(comment_input, c_att, name='comment_word_level_encoder')
plot_model(comEncoder, to_file='model_images/CommentEncoder.png', show_shapes=True)
self.comment_word_level_encoder = comEncoder
self.comment_word_level_encoder.summary()
all_comment_input = Input(shape=(self.MAX_COMS_COUNT, self.MAX_COMS_LENGTH), dtype='int32')
all_comment_encoder = TimeDistributed(comEncoder, name='comment_sequence_encoder')(all_comment_input)
allComEncoder = Model(all_comment_input, all_comment_encoder)
plot_model(allComEncoder, to_file='model_images/AllCommentEncoder.png', show_shapes=True)
self.comment_sequence_encoder = allComEncoder
self.comment_sequence_encoder.summary()
# Co-attention
L = LLayer(name="co-attention")([all_comment_encoder, l_lstm_sent])
L_Model = Model([all_comment_input, content_input], L)
self.co_attention_model = L_Model
self.co_attention_model.summary()
plot_model(L_Model, to_file='model_images/l_representation.png', show_shapes=True)
preds = Dense(2, activation='softmax')(L)
model = Model(inputs=[all_comment_input, content_input], outputs=preds)
model.summary()
plot_model(model, to_file='model_images/CHATT.png', show_shapes=True)
optimize = RMSprop(lr=0.001)
model.compile(loss='categorical_crossentropy',
optimizer=optimize)
return model
The _build_model() method uses two custom layers:
class AttLayer(Layer):
"""
Attention layer used for the calculating attention in word and sentence levels
"""
def __init__(self, **kwargs):
super(AttLayer, self).__init__(**kwargs)
self.init = initializers.get('normal')
self.supports_masking = True
self.attention_dim = 100
def build(self, input_shape):
assert len(input_shape) == 3
self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
self.b = K.variable(self.init((self.attention_dim,)))
self.u = K.variable(self.init((self.attention_dim, 1)))
self._trainable_weights = [self.W, self.b, self.u]
super(AttLayer, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
# size of x :[batch_size, sel_len, attention_dim]
# size of u :[batch_size, attention_dim]
# uit = tanh(xW+b)
uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
ait = K.dot(uit, self.u)
ait = K.squeeze(ait, -1)
ait = K.exp(ait)
if mask is not None:
# Cast the mask to floatX to avoid float64 upcasting in theano
ait *= K.cast(mask, K.floatx())
print(ait)
ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
ait = K.expand_dims(ait)
weighted_input = x * ait
output = K.sum(weighted_input, axis=1)
return output
def compute_output_shape(self, input_shape):
return input_shape[0], input_shape[-1]
and
class LLayer(Layer):
"""
Co-attention layer which accepts content and comment states and computes co-attention between them and returns the
the weighted sum of the content and the comment states
"""
def __init__(self, **kwargs):
self.init = initializers.get('normal')
self.latent_dim = 200
self.k = 80
super(LLayer, self).__init__(**kwargs)
def build(self, input_shape, mask=None):
self.Wl = K.variable(self.init((self.latent_dim, self.latent_dim)))
self.Wc = K.variable(self.init((self.k, self.latent_dim)))
self.Ws = K.variable(self.init((self.k, self.latent_dim)))
self.whs = K.variable(self.init((1, self.k)))
self.whc = K.variable(self.init((1, self.k)))
self._trainable_weights = [self.Wl, self.Wc, self.Ws, self.whs, self.whc]
def compute_mask(self, inputs, mask=None):
return mask
def call(self, x, mask=None):
comment_rep = x[0]
sentence_rep = x[1]
sentence_rep_trans = K.permute_dimensions(sentence_rep, (0, 2, 1))
comment_rep_trans = K.permute_dimensions(comment_rep, (0, 2, 1))
L = K.tanh(tf.einsum('btd,dD,bDn->btn', comment_rep, self.Wl, sentence_rep_trans))
L_trans = K.permute_dimensions(L, (0, 2, 1))
Hs = K.tanh(tf.einsum('kd,bdn->bkn', self.Ws, sentence_rep_trans) + tf.einsum('kd,bdt,btn->bkn', self.Wc,
comment_rep_trans, L))
Hc = K.tanh(tf.einsum('kd,bdt->bkt', self.Wc, comment_rep_trans) + tf.einsum('kd,bdn,bnt->bkt', self.Ws,
sentence_rep_trans, L_trans))
As = K.softmax(tf.einsum('yk,bkn->bn', self.whs, Hs))
Ac = K.softmax(tf.einsum('yk,bkt->bt', self.whc, Hc))
co_s = tf.einsum('bdn,bn->bd', sentence_rep_trans, As)
co_c = tf.einsum('bdt,bt->bd', comment_rep_trans, Ac)
co_sc = K.concatenate([co_s, co_c], axis=1)
return co_sc
def compute_output_shape(self, input_shape):
return input_shape[0][0], self.latent_dim + self.latent_dim
The last bit of code that I think is important is a custom callback:
class Metrics(Callback):
def __init__(self, platform):
self.log_file = open('./Log_Defend_' + platform + '.txt', 'a')
def on_train_begin(self, logs={}):
self.val_f1s = []
self.val_recalls = []
self.val_precisions = []
self.val_auc = []
self.val_acc = []
def on_epoch_end(self, epoch, logs={}):
val_predict_onehot = (
np.asarray(self.model.predict([self.validation_data[0], self.validation_data[1]]))).round()
val_targ_onehot = self.validation_data[2]
val_predict = np.argmax(val_predict_onehot, axis=1)
val_targ = np.argmax(val_targ_onehot, axis=1)
_val_f1 = f1_score(val_targ, val_predict)
_val_recall = recall_score(val_targ, val_predict)
_val_precision = precision_score(val_targ, val_predict)
_val_auc = roc_auc_score(val_targ, val_predict)
_val_acc = accuracy_score(val_targ, val_predict)
self.val_f1s.append(_val_f1)
self.val_recalls.append(_val_recall)
self.val_precisions.append(_val_precision)
self.val_auc.append(_val_auc)
self.val_acc.append(_val_acc)
print("Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f" % (
epoch, _val_acc, _val_precision, _val_recall, _val_f1, _val_auc))
self.log_file.write(
"Epoch: %d - val_accuracy: % f - val_precision: % f - val_recall % f val_f1: %f auc: %f\n" % (epoch,
_val_acc,
_val_precision,
_val_recall,
_val_f1,
_val_auc))
return
As best I could figure out, the model uses glove word embeddings with dim=100, and the shapes for model.fit
are the following:
(948, 150, 120) (948, 50, 120) (948, 2) (411, 150, 120) (411, 50, 120) (411, 2)
where 120 is the maximum number of words in a sentence, (missing words are set to 0), 150 is the number of tweets per article, 50 is the number of sentences per article, and 2 is a target value pair for the dense layer with 2 output nodes (I am not exactly sure why the authors use 2 output nodes, but I didn't want to mess with their code too much and so I tried to adjust the data to fit)
The model should look something like this:
The time distributed wrapper on the left contains this model:
And the one on the right contains this:
Now we can get to the issues:
When I tried running the code on python 3.6 with the requirements per the requirements.txt file from teh model, that is on tensorflow 1.13, the model seemed to build correctly, and I got the following error during fit:
WARNING:tensorflow:From C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 948 samples, validate on 411 samples
Epoch 1/10
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py", line 42, in <module>
saved_model_filename='covid')
File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 420, in train
callbacks=callbacks)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\engine\training.py", line 1039, in fit
validation_steps=validation_steps)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\engine\training_arrays.py", line 199, in fit_loop
outs = f(ins_batch)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\backend\tensorflow_backend.py", line 2715, in __call__
return self._call(inputs)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\keras\backend\tensorflow_backend.py", line 2675, in _call
fetched = self._callable_fn(*array_vals)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\client\session.py", line 1439, in __call__
run_metadata_ptr)
File "C:\Users\vikio\PycharmProjects\Bakalarka\venv36\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Inputs to operation sentence_lstm/while_1/Select_1 of type Select must have the same size and shape. Input 0: [20,12000] != input 1: [20,100]
[[{{node sentence_lstm/while_1/Select_1}}]]
As best I can figure out, the issue seems to be caused be the custom AttLayer, but I honestly have no idea.
I also tried running the code on python 3.8 and tensorflow 2, with the following issues, one of which seems to also relate to the same custom layer:
Traceback (most recent call last):
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 1853, in _create_c_op
c_op = pywrap_tf_session.TF_FinishOperation(op_desc)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dimensions must be equal, but are 120 and 100 for '{{node while/SelectV2}} = SelectV2[T=DT_FLOAT](while/Tile, while/add_3, while/SelectV2/zeros_like)' with input shapes: [?,120], [?,100], [?,100].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\IPython\core\interactiveshell.py", line 3437, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-7e73ed9a0d40>", line 1, in <module>
runfile('C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py', wdir='C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master')
File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Users\vikio\AppData\Local\JetBrains\Toolbox\apps\PyCharm-P\ch-0\211.7142.13\plugins\python\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/vikio/PycharmProjects/Bakalarka/dEFEND-web-master/defend_test.py", line 40, in <module>
h.train(x_train, y_train, t_train,
File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 377, in train
self.model = self._build_model(
File "C:\Users\vikio\PycharmProjects\Bakalarka\dEFEND-web-master\defend.py", line 234, in _build_model
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 539, in __call__
return super(Bidirectional, self).__call__(inputs, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 951, in __call__
return self._functional_construction_call(inputs, args, kwargs,
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1090, in _functional_construction_call
outputs = self._keras_tensor_symbolic_call(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 822, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 863, in _infer_output_signature
outputs = call_fn(inputs, *args, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\wrappers.py", line 652, in call
y = self.forward_layer(forward_inputs,
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 660, in __call__
return super(RNN, self).__call__(inputs, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 1012, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 470, in call
last_output, outputs, runtime, states = self._defun_gru_call(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 546, in _defun_gru_call
last_output, outputs, new_h, runtime = gru_with_backend_selection(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 841, in gru_with_backend_selection
last_output, outputs, new_h, runtime = defun_standard_gru(**params)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 2941, in __call__
filtered_flat_args) = self._maybe_define_function(args, kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\eager\function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\layers\recurrent_v2.py", line 621, in standard_gru
last_output, outputs, new_states = K.rnn(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4459, in rnn
final_outputs = control_flow_ops.while_loop(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2687, in while_loop
return while_v2.while_loop(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\while_v2.py", line 192, in while_loop
body_graph = func_graph_module.func_graph_from_py_func(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\while_v2.py", line 178, in wrapped_body
outputs = body(*_pack_sequence_as(orig_loop_vars, args))
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4440, in _step
flat_new_output = compute_masked_output(mask_t, flat_output,
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4391, in compute_masked_output
return tuple(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\backend.py", line 4392, in <genexpr>
array_ops.where_v2(m, o, fm)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\util\dispatch.py", line 201, in wrapper
return target(*args, **kwargs)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\array_ops.py", line 4598, in where_v2
return gen_math_ops.select_v2(condition=condition, t=x, e=y, name=name)
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 8742, in select_v2
_, _, _op, _outputs = _op_def_library._apply_op_helper(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 748, in _apply_op_helper
op = g._create_op_internal(op_type_name, inputs, dtypes=None,
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\func_graph.py", line 590, in _create_op_internal
return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 3528, in _create_op_internal
ret = Operation(
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 2015, in __init__
self._c_op = _create_c_op(self._graph, node_def, inputs,
File "C:\Users\vikio\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 1856, in _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 120 and 100 for '{{node while/SelectV2}} = SelectV2[T=DT_FLOAT](while/Tile, while/add_3, while/SelectV2/zeros_like)' with input shapes: [?,120], [?,100], [?,100].
It fails on this line of code
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True), name='sentence_lstm')(content_encoder)
so I believe it is also caused by the previous layer, which is a timedistributed wrapper of the model ending with the custom AttLayer.
Any help is appreciated!
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|