'how to modify resnet 50 with 4 channels as input using pre-trained weights in Pytorch?
I would like to change the resnet50 so that I can switch to 4 channel input, use the same weights for the rgb channels and initialize the last channel with a normal with mean 0 and variance 0.01.
here is my code:
import torch.nn as nn
import torch
from torchvision import models
from misc.layer import Conv2d, FC
import torch.nn.functional as F
from misc.utils import *
import pdb
class Res50(nn.Module):
def __init__(self, pretrained=True):
super(Res50, self).__init__()
self.de_pred = nn.Sequential(Conv2d(1024, 128, 1, same_padding=True, NL='relu'),
Conv2d(128, 1, 1, same_padding=True, NL='relu'))
self._initialize_weights()
res = models.resnet50(pretrained=pretrained)
pretrained_weights = res.conv1.weight
res.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,bias=False)
res.conv1.weight[:,:3,:,:] = pretrained_weights
res.conv1.weight[:,3,:,:].data.normal_(0.0, std=0.01)
self.frontend = nn.Sequential(
res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2
)
self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 6, stride=1)
self.own_reslayer_3.load_state_dict(res.layer3.state_dict())
def forward(self,x):
x = self.frontend(x)
x = self.own_reslayer_3(x)
x = self.de_pred(x)
x = F.upsample(x,scale_factor=8)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.weight.data.normal_(0.0, std=0.01)
if m.bias is not None:
m.bias.data.fill_(0)
elif isinstance(m, nn.BatchNorm2d):
m.weight.fill_(1)
m.bias.data.fill_(0)
but it produces the following error, does anyone have any advice?
/usr/local/lib/python3.6/dist-packages/torch/tensor.py:746: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the gradient for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations.
warnings.warn("The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad "
Traceback (most recent call last):
File "train.py", line 62, in <module>
cc_trainer = Trainer(loading_data,cfg_data,pwd)
File "/content/drive/My Drive/Folder/Code/trainer.py", line 28, in __init__
self.optimizer = optim.Adam(self.net.CCN.parameters(), lr=cfg.LR, weight_decay=1e-4) #remenber was 1e-4
File "/usr/local/lib/python3.6/dist-packages/torch/optim/adam.py", line 44, in __init__
super(Adam, self).__init__(params, defaults)
File "/usr/local/lib/python3.6/dist-packages/torch/optim/optimizer.py", line 51, in __init__
self.add_param_group(param_group)
File "/usr/local/lib/python3.6/dist-packages/torch/optim/optimizer.py", line 206, in add_param_group
raise ValueError("can't optimize a non-leaf Tensor")
ValueError: can't optimize a non-leaf Tensor
Solution 1:[1]
Ideally, ResNet accepts 3-channel input. To make it work for 4-channel input, you have to add one extra layer (2D conv), pass the 4-channel input through this layer to make the output of this layer suitable for ResNet architecture.
steps
Copy the model weight
weight = model.conv1.weight.clone()
Add the extra 2d conv for the 4-channel input
model.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False) #here 4 indicates 4-channel input
You can add Relu and BatchNorm on top of the extra con2d. In this example, I am not using.
Connect the extra cov2d with the ResNet model (the weight you copied before)
with torch.no_grad(): model.conv1.weight[:, :3] = weight model.conv1.weight[:, 3] = model.conv1.weight[:, 0]
Done
Sorry, I didn't modify your code. You can adjust the changes in your code.
Solution 2:[2]
Try setting .data
of the first channels as well:
res.conv1.weight[:,:3,:,:].data[...] = pretrained_weights
Solution 3:[3]
I think I have solved it, but I don't understand why. Would anyone be able to give me an explanation of what nn.Parameter does? and why does it work?
class Res50(nn.Module):
def __init__(self, pretrained=True):
super(Res50, self).__init__()
self.de_pred = nn.Sequential(Conv2d(1024, 128, 1, same_padding=True, NL='relu'),
Conv2d(128, 1, 1, same_padding=True, NL='relu'))
initialize_weights(self.modules())
res = models.resnet50(pretrained=pretrained)
pretrained_weights = res.conv1.weight.clone()
res.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3,bias=False)
res.conv1.weight[:,:3,:,:] = torch.nn.Parameter(pretrained_weights)
res.conv1.weight[:,3,:,:] = torch.nn.Parameter(pretrained_weights[:,1,:,:])
self.frontend = nn.Sequential(
res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2
)
self.own_reslayer_3 = make_res_layer(Bottleneck, 256, 6, stride=1)
self.own_reslayer_3.load_state_dict(res.layer3.state_dict())
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Jake |
Solution 2 | Shai |
Solution 3 |