'Weighted categorical cross entropy
please I'm trying to build an NLP classifier on top of BERT but I'm struggling with data imbalance. I'm looking for an implementation of weighted CategoricalCrossEntropy. I've already seen a solution using class_weight
parameter on fit
function but it doesn't "fit" well with my data (I've one hot encoded them and it actually throws an error cause dict element are not matching.
Can someone please give me an implementation from scratch of a WeightedCategoricalCrossEntropy
function allowing me me to add weights manually to Tensorflow's native CategoricalCrossEntropy
.
Solution 1:[1]
The __call__
method of tf.losses.CategoricalCrossentropy
accepts three arguments:
y_pred
y_true
sample_weights
And the sample_weight
acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If sample_weight
is a tensor of size [batch_size]
, then the total loss for each sample of the batch is rescaled by the corresponding element in the sample_weight
vector. You can use it as such:
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out,
sample_weight=sample_weight)
return loss
These are random values but you can change the values depending on y
so it becomes a class weight rather than a sample weight. Here's a full example of a running training loop with custom sample weights:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial
(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()
train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))
def prepare(inputs, outputs):
inputs = tf.divide(x=inputs, y=255)
inputs = tf.expand_dims(inputs, -1)
targets = tf.one_hot(indices=outputs, depth=10)
return inputs, targets
train = train.take(5_000).batch(4).map(prepare)
test = test.take(1_000).batch(4).map(prepare)
class MyCNN(K.Model):
def __init__(self):
super(MyCNN, self).__init__()
Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
MaxPool = partial(MaxPooling2D, pool_size=(2, 2))
self.conv1 = Conv(filters=8)
self.maxp1 = MaxPool()
self.conv2 = Conv(filters=16)
self.maxp2 = MaxPool()
self.conv3 = Conv(filters=32)
self.maxp3 = MaxPool()
self.flatt = Flatten()
self.dens1 = Dense(64, activation=nn.relu)
self.drop1 = Dropout(.5)
self.dens2 = Dense(10, activation=nn.softmax)
def call(self, x, training=None, **kwargs):
x = self.conv1(x)
x = self.maxp1(x)
x = self.conv2(x)
x = self.maxp2(x)
x = self.conv3(x)
x = self.maxp3(x)
x = self.flatt(x)
x = self.dens1(x)
x = self.drop1(x)
x = self.dens2(x)
return x
model = MyCNN()
loss_object = tf.losses.CategoricalCrossentropy()
def compute_loss(model, x, y, training):
out = model(inputs=x, training=training)
sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
minval=0,
maxval=1,
dtype=tf.float32)
loss = loss_object(y_true=y, y_pred=out, sample_weight=sample_weight)
return loss
def get_grad(model, x, y):
with tf.GradientTape() as tape:
loss = compute_loss(model, x, y, training=False)
return loss, tape.gradient(loss, model.trainable_variables)
optimizer = tf.optimizers.Adam()
verbose = "Epoch {:2d}" \
" Loss: {:.3f} TLoss: {:.3f} Acc: {:.3%} TAcc: {:.3%}"
for epoch in range(1, 10 + 1):
train_loss = tf.metrics.Mean()
train_acc = tf.metrics.CategoricalAccuracy()
test_loss = tf.metrics.Mean()
test_acc = tf.metrics.CategoricalAccuracy()
for x, y in train:
loss_value, grads = get_grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
train_loss.update_state(loss_value)
train_acc.update_state(y, model(x, training=True))
for x, y in test:
loss_value, _ = get_grad(model, x, y)
test_loss.update_state(loss_value)
test_acc.update_state(y, model(x, training=False))
print(verbose.format(epoch,
train_loss.result(),
train_acc.result(),
test_loss.result(),
test_acc.result()))
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | M.Innat |