'RoBERTa classifier: cannot generate single prediction

I have succesfully trained a text emotion classifier fine-tuning a RoBERTa language model, mostly using a helpful notebook found online. Now I am trying to write a function to generate the prediction for a single sample (sentence), but can't seem to do it.

My model class is like this:

class ClassificationModel(nn.Module):
    def __init__(self, base_model, n_classes, base_model_output_size=768, dropout=0.05):
        super().__init__()
        self.base_model = base_model
        
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(base_model_output_size, base_model_output_size),
            Mish(),
            nn.Dropout(dropout),
            nn.Linear(base_model_output_size, n_classes)
        )
        
        for layer in self.classifier:
            if isinstance(layer, nn.Linear):
                layer.weight.data.normal_(mean=0.0, std=0.02)
                if layer.bias is not None:
                    layer.bias.data.zero_()

    def forward(self, input_, *args):
        X, attention_mask = input_
        hidden_states = self.base_model(X, attention_mask=attention_mask)
        
        return self.classifier(hidden_states[0][:, 0, :])

And once I create an instance of this class, I am able to succesfully load the state dictionary of my fine-tuned weights. But my function for classifying a single sample won't work. The line defining the output returns an error about the wrong amount of values to unpack.

def get_label(text):
  text = re.sub(r'[^\w\s]', '', text)
  text = text.lower()

  t = ByteLevelBPETokenizer(
            "tokenizer/vocab.json",
            "tokenizer/merges.txt"
        )
  t._tokenizer.post_processor = BertProcessing(
            ("</s>", t.token_to_id("</s>")),
            ("<s>", t.token_to_id("<s>")),
        )
  t.enable_truncation(512)
  t.enable_padding(pad_id=t.token_to_id("<pad>"))
  tokenizer = t

  encoded = tokenizer.encode(text)
  sequence_padded = torch.tensor(encoded.ids)

  attention_mask_padded = torch.tensor(encoded.attention_mask) 
   

  output = model((sequence_padded, attention_mask_padded)) #this does not work

  dec = [tokenizer.decode(ids) for ids in output] #not sure if this will work 
  label = dec[0]
  return label

My impression is that the forward function of the model takes the (tokenized) input and the attention mask, but when I try to call the function

get_label('this is text to classify')

I get the following error:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-34-9a58782b060e> in <module>()
----> 1 get_label('this is text to classify')

4 frames
<ipython-input-33-bc7c6292a9bf> in get_label(text)
     23 
     24 
---> 25   output = model((sequence_padded, attention_mask_padded)) #this does not work
     26 
     27   dec = [tokenizer.decode(ids) for ids in output] #not sure if this will work

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

<ipython-input-8-275361c034f8> in forward(self, input_, *args)
     22     def forward(self, input_, *args):
     23         X, attention_mask = input_
---> 24         hidden_states = self.base_model(X, attention_mask=attention_mask)
     25 
     26         return self.classifier(hidden_states[0][:, 0, :])

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
   1049         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
   1050                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051             return forward_call(*input, **kwargs)
   1052         # Do not call functions when jit is used
   1053         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/transformers/models/roberta/modeling_roberta.py in forward(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
    790         elif input_ids is not None:
    791             input_shape = input_ids.size()
--> 792             batch_size, seq_length = input_shape
    793         elif inputs_embeds is not None:
    794             input_shape = inputs_embeds.size()[:-1]

ValueError: not enough values to unpack (expected 2, got 1)

I am at a loss as to what I should do. It seems that I am not feeding the correct thing to the forward function of my model. I am just starting out and any help would be greatly appreciated. Thanks!



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source