'Target Data Missing from tensorflow fit()
So I have a problem when train deep learning with BERT with tensorflow which contain text dataset. So i want to fit()
the model but got an error when training. I think it happen because the data_train did't have the label. But from my research It also same problem like SO question in here Same problem. Since it didn't have a answer is this a bug? The error is like this
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.CategoricalCrossentropy object at 0x7fa707d96fd0>, and therefore expects target data to be provided in `fit()`.
My code like this
X_input_ids = np.zeros((len(df), 256))
X_attn_masks = np.zeros((len(df), 256))
def generate_training_data(df, ids, masks, tokenizer):
for i, text in tqdm(enumerate(df['text'])):
tokenized_text = tokenizer.encode_plus(
text,
max_length=256,
truncation=True,
padding='max_length',
add_special_tokens=True,
return_tensors='tf'
)
ids[i, :] = tokenized_text.input_ids
masks[i, :] = tokenized_text.attention_mask
return ids, masks
X_input_ids, X_attn_masks = generate_training_data(df, X_input_ids, X_attn_masks, tokenizer)
labels = np.zeros((len(df), 3))
labels[np.arange(len(df)), df['label'].values] = 1
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks, labels))
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks
},
dataset = dataset.map(SentimentDatasetMapFunction)
dataset = dataset.shuffle(2000).batch(6, drop_remainder=True)
p = 0.8
train_size = int((len(df)//16)*p)
train_dataset = dataset.take(train_size)
val_dataset = dataset.skip(train_size)
model = TFBertModel.from_pretrained('cahya/bert-base-indonesian-522M')
input_ids = tf.keras.layers.Input(shape=(256,), name='input_ids', dtype='int32')
attn_masks = tf.keras.layers.Input(shape=(256,), name='attention_mask', dtype='int32')
bert_embds = model.bert(input_ids, attention_mask=attn_masks)[1]
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(bert_embds)
output_layer = tf.keras.layers.Dense(3, activation='softmax', name='output_layer')(intermediate_layer) # softmax -> calcs probs of classes
sentiment_model = tf.keras.Model(inputs=[input_ids, attn_masks], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
train_dataset,
validation_data=val_dataset,
epochs=2
)
Solution 1:[1]
I spend a bit of time finding something we can update and I download the model from the websites.
[ Sample ]:
import tensorflow as tf
import tensorflow_text as text # Registers the ops.
import tensorflow_hub as hub
import os
from os.path import exists
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def generate_training_data(train_labels):
input_ids = [ ]
attn_masks = [ ]
labels = [ ]
for item in train_labels:
input_ids.append( str(item) )
attn_masks.append( int(1) )
labels.append( item )
attn_masks = tf.constant(attn_masks, shape=(1, len(attn_masks),1), dtype=tf.float32)
labels = tf.constant(labels, shape=(1, len(labels),1), dtype=tf.int64)
input_ids = tf.constant(input_ids, shape=(1, len(input_ids),1), dtype=tf.string)
return input_ids, attn_masks, labels
def SentimentDatasetMapFunction(input_ids, attn_masks, labels):
return {
'input_ids': input_ids,
'attention_mask': attn_masks,
'labels': labels
},
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)
return tf.keras.Model(text_input, net)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()
X_input_ids, X_attn_masks, labels = generate_training_data(train_labels)
dataset = tf.data.Dataset.from_tensor_slices((X_input_ids, X_attn_masks))
options = tf.saved_model.LoadOptions(
allow_partial_checkpoint=False,
experimental_io_device="/physical_device:GPU:0",
experimental_skip_checkpoint=True
)
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='sentences')
preprocessor = hub.KerasLayer(export_dir)
encoder_inputs = preprocessor(text_input)
encoder = hub.KerasLayer( export_dir_2, trainable=False, load_options=options)
outputs = encoder(encoder_inputs)
intermediate_layer = tf.keras.layers.Dense(512, activation='relu', name='intermediate_layer')(outputs['default'])
output_layer = tf.keras.layers.Dense(1, activation='softmax', name='output_layer')(intermediate_layer)
sentiment_model = tf.keras.Model(inputs=[text_input], outputs=output_layer)
sentiment_model.summary()
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
optim = tf.keras.optimizers.Adam(learning_rate=1e-5, decay=1e-6)
loss_func = tf.keras.losses.CategoricalCrossentropy()
acc = tf.keras.metrics.CategoricalAccuracy('accuracy')
sentiment_model.compile(optimizer=optim, loss=loss_func, metrics=[acc])
hist = sentiment_model.fit(
dataset,
validation_data=dataset,
epochs=2
)
[ Output ]:
outputs: KerasTensor(type_spec=TensorSpec(shape=(None, 512), dtype=tf.float32, name=None), name='keras_layer_1/StatefulPartitionedCall:0', description="created by layer 'keras_layer_1'")
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
sentences (InputLayer) [(None,)] 0 []
keras_layer (KerasLayer) {'input_mask': (Non 0 ['sentences[0][0]']
e, 128),
'input_word_ids':
(None, 128),
'input_type_ids':
(None, 128)}
keras_layer_1 (KerasLayer) {'default': (None, 28763649 ['keras_layer[0][0]',
512), 'keras_layer[0][1]',
'encoder_outputs': 'keras_layer[0][2]']
[(None, 128, 512),
(None, 128, 512),
(None, 128, 512),
(None, 128, 512)],
'sequence_output':
(None, 128, 512),
'pooled_output': (
None, 512)}
intermediate_layer (Dense) (None, 512) 262656 ['keras_layer_1[0][0]']
output_layer (Dense) (None, 1) 513 ['intermediate_layer[0][0]']
==================================================================================================
Total params: 29,026,818
Trainable params: 263,169
Non-trainable params: 28,763,649
__________________________________________________________________________________________________
Epoch 1/2
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Martijn Pieters |