'Weighted categorical cross entropy

please I'm trying to build an NLP classifier on top of BERT but I'm struggling with data imbalance. I'm looking for an implementation of weighted CategoricalCrossEntropy. I've already seen a solution using class_weight parameter on fit function but it doesn't "fit" well with my data (I've one hot encoded them and it actually throws an error cause dict element are not matching.

Can someone please give me an implementation from scratch of a WeightedCategoricalCrossEntropy function allowing me me to add weights manually to Tensorflow's native CategoricalCrossEntropy.



Solution 1:[1]

The __call__ method of tf.losses.CategoricalCrossentropy accepts three arguments:

y_pred
y_true
sample_weights

And the sample_weight acts as a coefficient for the loss. If a scalar is provided, then the loss is simply scaled by the given value. If sample_weight is a tensor of size [batch_size], then the total loss for each sample of the batch is rescaled by the corresponding element in the sample_weight vector. You can use it as such:

def compute_loss(model, x, y, training):
  out = model(inputs=x, training=training)
  sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
                                    minval=0,
                                    maxval=1,
                                    dtype=tf.float32)
  loss = loss_object(y_true=y, y_pred=out,
                    sample_weight=sample_weight)
  return loss

These are random values but you can change the values depending on y so it becomes a class weight rather than a sample weight. Here's a full example of a running training loop with custom sample weights:

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout
from tensorflow import nn as nn
from functools import partial

(xtrain, ytrain), (xtest, ytest) = tf.keras.datasets.mnist.load_data()

train = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
test = tf.data.Dataset.from_tensor_slices((xtest, ytest))

def prepare(inputs, outputs):
    inputs = tf.divide(x=inputs, y=255)
    inputs = tf.expand_dims(inputs, -1)
    targets = tf.one_hot(indices=outputs, depth=10)
    return inputs, targets

train = train.take(5_000).batch(4).map(prepare)
test = test.take(1_000).batch(4).map(prepare)

class MyCNN(K.Model):
    def __init__(self):
        super(MyCNN, self).__init__()
        Conv = partial(Conv2D, kernel_size=(3, 3), activation=nn.relu)
        MaxPool = partial(MaxPooling2D, pool_size=(2, 2))

        self.conv1 = Conv(filters=8)
        self.maxp1 = MaxPool()
        self.conv2 = Conv(filters=16)
        self.maxp2 = MaxPool()
        self.conv3 = Conv(filters=32)
        self.maxp3 = MaxPool()
        self.flatt = Flatten()
        self.dens1 = Dense(64, activation=nn.relu)
        self.drop1 = Dropout(.5)
        self.dens2 = Dense(10, activation=nn.softmax)

    def call(self, x, training=None, **kwargs):
        x = self.conv1(x)
        x = self.maxp1(x)
        x = self.conv2(x)
        x = self.maxp2(x)
        x = self.conv3(x)
        x = self.maxp3(x)
        x = self.flatt(x)
        x = self.dens1(x)
        x = self.drop1(x)
        x = self.dens2(x)
        return x

model = MyCNN()

loss_object = tf.losses.CategoricalCrossentropy()

def compute_loss(model, x, y, training):
  out = model(inputs=x, training=training)
  sample_weight = tf.random.uniform((tf.shape(x)[0], 1),
                                    minval=0,
                                    maxval=1,
                                    dtype=tf.float32)
  loss = loss_object(y_true=y, y_pred=out, sample_weight=sample_weight)
  return loss

def get_grad(model, x, y):
    with tf.GradientTape() as tape:
        loss = compute_loss(model, x, y, training=False)
    return loss, tape.gradient(loss, model.trainable_variables)

optimizer = tf.optimizers.Adam()

verbose = "Epoch {:2d}" \
          " Loss: {:.3f} TLoss: {:.3f} Acc: {:.3%} TAcc: {:.3%}"

for epoch in range(1, 10 + 1):
    train_loss = tf.metrics.Mean()
    train_acc = tf.metrics.CategoricalAccuracy()
    test_loss = tf.metrics.Mean()
    test_acc = tf.metrics.CategoricalAccuracy()

    for x, y in train:
        loss_value, grads = get_grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        train_loss.update_state(loss_value)
        train_acc.update_state(y, model(x, training=True))

    for x, y in test:
        loss_value, _ = get_grad(model, x, y)
        test_loss.update_state(loss_value)
        test_acc.update_state(y, model(x, training=False))

    print(verbose.format(epoch,
                         train_loss.result(),
                         train_acc.result(),
                         test_loss.result(),
                         test_acc.result()))

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 M.Innat