'Runtime Error - element 0 of tensors does not require grad and does not have a grad_fn

I am using a Unet model for semantic segmentation - I have a custom dataset of images and their masks both in .png format. I have looked in the online forums and tried stuff, but not much works? Any suggestions in how to resolve the error or improve the code would be helpful.

with torch.no_grad():
    for xb, yb in val_dl:
        yb_pred = model(xb.to(device))
        # yb_pred = yb_pred["out"].cpu()
        yb_pred = torch.argmax(yb_pred,axis = 1)     

criteron = nn.CrossEntropyLoss(reduction = 'sum')
opt = optim.Adam(model.parameters(), lr = 3e-4)

def loss_batch(loss_func, output, target, opt = None):
    loss = loss_func(output, target)
    if opt is not None:
    return loss.item(), None

lr_scheduler = ReduceLROnPlateau(opt, mode = 'min', factor = 0.5, patience= 20, verbose = 1)

def get_lr(opt):
    for param_group in opt.param_groups:
        return param_group['lr']
current_lr = get_lr(opt)
print('current_lr = {}'.format(current_lr))

def loss_epoch(model, loss_func, dataset_dl, sanity_check = False, opt = None):
    running_loss = 0.0
    len_data = len(dataset_dl.dataset)
    for xb, yb in dataset_dl:
        xb = xb.to(device)
        yb = yb.to(device)
        # xb = torch.tensor(xbh, requires_grad=True)
        output = model(xb)
        loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
        running_loss += loss_b
        if sanity_check is True:
    loss = running_loss/float(len_data) 
    return loss, None

def train_val(model, params):
    num_epochs = params["num_epochs"]
    loss_func = params["loss_func"]
    opt = params["optimizer"]
    train_dl = params["train_dl"]
    val_dl = params["val_dl"]
    sanity_check = params["sanity_check"]
    lr_scheduler = params["lr_scheduler"]
    path2weights = params["path2weights"]
    loss_history = {"train": [],
                    "val": []}
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = float('inf')
    for epoch in range(num_epochs):
        current_lr = get_lr(opt)
        print('Epoch {}/{}, current_lr = {}'.format(epoch, num_epochs - 1, current_lr))
        with torch.enable_grad():
            train_loss, _ = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
        with torch.no_grad():
            val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), path2weights)
            print("copied best model weights!!")
        if current_lr != get_lr(opt):
            print("Loading best model weights!!")
            print("train Loss: %.6f" %(train_loss))
            print("val_loss: %.6f" %(val_loss))
        return model, loss_history, metric_history

path2models = "./models/"
if not os.path.exists(path2models):
param_train = {
    "num_epochs": 10,
    "loss_func": criteron,
    "optimizer": opt,
    "train_dl": train_dl,
    "val_dl": val_dl,
    "sanity_check": False,
    "lr_scheduler": lr_scheduler,
    "path2weights": path2models + "weights.pt"
model, loss_hist, _ = train_val(model, param_train)

The error message looks like - File "", line 10, in model, loss_hist, _ = train_val(model, param_train)

File "", line 27, in train_val val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)

File "", line 13, in loss_epoch loss_b, metric_b = loss_batch(loss_func, output, yb, opt)

File "", line 6, in loss_batch loss.backward()

File "C:\Users\W540\anaconda3\lib\site-packages\torch\tensor.py", line 198, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph)

File "C:\Users\W540\anaconda3\lib\site-packages\torch\autograd_init_.py", line 100, in backward allow_unreachable=True) # allow_unreachable flag

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

I am not sure which variable to set as require_grad = True or where I should enable grad...

Solution 1:[1]

You can try this before loss.backward():

loss = Variable(loss, requires_grad = True)

Or, because the Variable has been removed from PyTorch (still exists but deprecated), you can do the same thing simply by using following code:

loss.requires_grad = True

Solution 2:[2]

For me calling .retain_grad() before the .backward() solved the issue as stated here

Solution 3:[3]

I got this error from passing the input instead of the output to the loss function.

output = model(input)
loss = loss_fn(input, target)

The correct code is

loss = loss_fn(output, target)


