'Runtime Error - element 0 of tensors does not require grad and does not have a grad_fn
I am using a Unet model for semantic segmentation - I have a custom dataset of images and their masks both in .png format. I have looked in the online forums and tried stuff, but not much works? Any suggestions in how to resolve the error or improve the code would be helpful.
model.eval()
with torch.no_grad():
for xb, yb in val_dl:
yb_pred = model(xb.to(device))
# yb_pred = yb_pred["out"].cpu()
print(yb_pred.shape)
yb_pred = torch.argmax(yb_pred,axis = 1)
break
print(yb_pred.shape)
criteron = nn.CrossEntropyLoss(reduction = 'sum')
opt = optim.Adam(model.parameters(), lr = 3e-4)
def loss_batch(loss_func, output, target, opt = None):
loss = loss_func(output, target)
if opt is not None:
opt.zero_grad()
loss.backward()
opt.step()
return loss.item(), None
lr_scheduler = ReduceLROnPlateau(opt, mode = 'min', factor = 0.5, patience= 20, verbose = 1)
def get_lr(opt):
for param_group in opt.param_groups:
return param_group['lr']
current_lr = get_lr(opt)
print('current_lr = {}'.format(current_lr))
def loss_epoch(model, loss_func, dataset_dl, sanity_check = False, opt = None):
running_loss = 0.0
len_data = len(dataset_dl.dataset)
for xb, yb in dataset_dl:
xb = xb.to(device)
yb = yb.to(device)
# xb = torch.tensor(xbh, requires_grad=True)
output = model(xb)
loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
running_loss += loss_b
if sanity_check is True:
break
loss = running_loss/float(len_data)
return loss, None
def train_val(model, params):
num_epochs = params["num_epochs"]
loss_func = params["loss_func"]
opt = params["optimizer"]
train_dl = params["train_dl"]
val_dl = params["val_dl"]
sanity_check = params["sanity_check"]
lr_scheduler = params["lr_scheduler"]
path2weights = params["path2weights"]
loss_history = {"train": [],
"val": []}
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = float('inf')
for epoch in range(num_epochs):
current_lr = get_lr(opt)
print('Epoch {}/{}, current_lr = {}'.format(epoch, num_epochs - 1, current_lr))
with torch.enable_grad():
model.train()
train_loss, _ = loss_epoch(model, loss_func, train_dl, sanity_check, opt)
loss_history["train"].append(train_loss)
model.eval()
with torch.no_grad():
val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
loss_history["val"].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
torch.save(model.state_dict(), path2weights)
print("copied best model weights!!")
lr_scheduler.step(val_loss)
if current_lr != get_lr(opt):
print("Loading best model weights!!")
model.load_state_dict(best_model_wts)
print("train Loss: %.6f" %(train_loss))
print("val_loss: %.6f" %(val_loss))
print("-"*20)
model.load_state_dict(best_model_wts)
return model, loss_history, metric_history
path2models = "./models/"
if not os.path.exists(path2models):
os.mkdir(path2models)
param_train = {
"num_epochs": 10,
"loss_func": criteron,
"optimizer": opt,
"train_dl": train_dl,
"val_dl": val_dl,
"sanity_check": False,
"lr_scheduler": lr_scheduler,
"path2weights": path2models + "weights.pt"
model, loss_hist, _ = train_val(model, param_train)
The error message looks like - File "", line 10, in model, loss_hist, _ = train_val(model, param_train)
File "", line 27, in train_val val_loss, _ = loss_epoch(model, loss_func, val_dl, sanity_check, opt)
File "", line 13, in loss_epoch loss_b, metric_b = loss_batch(loss_func, output, yb, opt)
File "", line 6, in loss_batch loss.backward()
File "C:\Users\W540\anaconda3\lib\site-packages\torch\tensor.py", line 198, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph)
File "C:\Users\W540\anaconda3\lib\site-packages\torch\autograd_init_.py", line 100, in backward allow_unreachable=True) # allow_unreachable flag
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
I am not sure which variable to set as require_grad = True or where I should enable grad...
Solution 1:[1]
You can try this before loss.backward()
:
loss = Variable(loss, requires_grad = True)
Or, because the Variable has been removed from PyTorch (still exists but deprecated), you can do the same thing simply by using following code:
loss.requires_grad = True
Solution 2:[2]
For me calling .retain_grad()
before the .backward()
solved the issue as stated here
Solution 3:[3]
I got this error from passing the input instead of the output to the loss function.
output = model(input)
loss = loss_fn(input, target)
The correct code is
loss = loss_fn(output, target)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | ouflak |
Solution 2 | OuttaSpaceTime |
Solution 3 | Tom Huntington |