'Pytorch CNN issue with loss not changing
I am making a CNN for fluid prediction generation on Pytorch. My input is a batchx100x200x100 array containing levelset data, and my training output is also a batchx100x200x100 array containing laser flux data. So this is a regression problem. I am very confused with building CNN model for my data as most of examples are for image processing, which input is 3 channel 2D images, while my inputs are 3D arrays containing just numbers.
Here is my code.
Dataloader:
class Data_set(Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        X_shuffle = self.X[idx]
        y_shuffle = self.y[idx]
        return X_shuffle, y_shuffle
CNN class
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # L1 CNN block
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(100, 50, kernel_size=5, stride=(1, 1), padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L2 CNN block
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(50, 25, kernel_size=5, stride=(1, 1), padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        # L3 CNN block
        self.layer3 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(25, 50, kernel_size=(3,5), stride=(2, 2), padding=(1,2), output_padding=(1,1)))
        # L4 CNN block
        self.layer4 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(50, 100, kernel_size=(3,3), stride=(2, 2), padding=(1,1), output_padding=(1,1)))
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        return out
model = CNN()
Training Process
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epoch = 20
n_sample = X.shape[0]
batchsize = 20
n_batch = int(np.ceil(n_sample/batchsize))
loss_print = np.zeros(n_batch)
net = model.cuda()
train_data = Data_set(X, y)
train_dataloader = DataLoader(train_data, batch_size = batchsize, shuffle = True)
scheduler = StepLR(optimizer, step_size=10, gamma=0.95)
print('Training the Deep Learning network ...')
print('Batch size is : {}'.format(batchsize))
print('Total number of batches is : {0:2.0f}'.format(n_batch))
print('Total number of epochs is : {0:2.0f}'.format(num_epoch))
for epochs in range(num_epoch):
    count = 0
    for X_batch, y_batch in train_dataloader:
        X_batch = X_batch.to(torch.float32)
        y_batch = y_batch.to(torch.float32)
                
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()
        optimizer.zero_grad()
        # Network prediction
        predict = net(X_batch)
        # Calculate loss
        loss = F.mse_loss(predict, y_batch)
        loss.backward()
        optimizer.step()
        loss_print[count] = np.array(loss.item()).copy()
        count += 1
    scheduler.step()
    if (epochs+1) % 1 == 0:
        print(f'epoch: {epochs+1}, loss = {np.mean(loss_print):.12f}')
print('Learning Finished!')
The loss does not update for every epoch. I have tried changing the learning rate but the problem still remains. I've also tried using optimizer.zero_grad() or net.zero_grad() but the loss is still not functioning correctly.
I suspect issue in the CNN model setup.
Training the Deep Learning network ...
Batch size is : 20
Total number of batches is :  2
Total number of epochs is : 20
epoch: 1, loss = 196293869581631488.000000000000
epoch: 2, loss = 196293869581631488.000000000000
epoch: 3, loss = 196293869581631488.000000000000
epoch: 4, loss = 196293869581631488.000000000000
epoch: 5, loss = 196293869581631488.000000000000
epoch: 6, loss = 196293869581631488.000000000000
epoch: 7, loss = 196293869581631488.000000000000
epoch: 8, loss = 196293869581631488.000000000000
epoch: 9, loss = 196293869581631488.000000000000
epoch: 10, loss = 196293869581631488.000000000000
epoch: 11, loss = 196293869581631488.000000000000
epoch: 12, loss = 196293869581631488.000000000000
epoch: 13, loss = 196293869581631488.000000000000
epoch: 14, loss = 196293869581631488.000000000000
epoch: 15, loss = 196293869581631488.000000000000
epoch: 16, loss = 196293869581631488.000000000000
epoch: 17, loss = 196293869581631488.000000000000
epoch: 18, loss = 196293869581631488.000000000000
epoch: 19, loss = 196293869581631488.000000000000
epoch: 20, loss = 196293869581631488.000000000000
Learning Finished!
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source | 
|---|
