'Pytorch CNN issue with loss not changing

I am making a CNN for fluid prediction generation on Pytorch. My input is a batchx100x200x100 array containing levelset data, and my training output is also a batchx100x200x100 array containing laser flux data. So this is a regression problem. I am very confused with building CNN model for my data as most of examples are for image processing, which input is 3 channel 2D images, while my inputs are 3D arrays containing just numbers.

Here is my code.

Dataloader:

class Data_set(Dataset):
    
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        X_shuffle = self.X[idx]
        y_shuffle = self.y[idx]
        return X_shuffle, y_shuffle

CNN class

class CNN(torch.nn.Module):

    def __init__(self):
        super(CNN, self).__init__()

        # L1 CNN block
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(100, 50, kernel_size=5, stride=(1, 1), padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # L2 CNN block
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(50, 25, kernel_size=5, stride=(1, 1), padding=2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))

        # L3 CNN block
        self.layer3 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(25, 50, kernel_size=(3,5), stride=(2, 2), padding=(1,2), output_padding=(1,1)))

        # L4 CNN block
        self.layer4 = torch.nn.Sequential(
            torch.nn.ConvTranspose2d(50, 100, kernel_size=(3,3), stride=(2, 2), padding=(1,1), output_padding=(1,1)))


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        return out

model = CNN()

Training Process

learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

num_epoch = 20
n_sample = X.shape[0]
batchsize = 20
n_batch = int(np.ceil(n_sample/batchsize))
loss_print = np.zeros(n_batch)
net = model.cuda()

train_data = Data_set(X, y)
train_dataloader = DataLoader(train_data, batch_size = batchsize, shuffle = True)
scheduler = StepLR(optimizer, step_size=10, gamma=0.95)

print('Training the Deep Learning network ...')
print('Batch size is : {}'.format(batchsize))
print('Total number of batches is : {0:2.0f}'.format(n_batch))
print('Total number of epochs is : {0:2.0f}'.format(num_epoch))
for epochs in range(num_epoch):
    count = 0
    for X_batch, y_batch in train_dataloader:

        X_batch = X_batch.to(torch.float32)
        y_batch = y_batch.to(torch.float32)
                
        X_batch = X_batch.cuda()
        y_batch = y_batch.cuda()

        optimizer.zero_grad()
        # Network prediction
        predict = net(X_batch)

        # Calculate loss
        loss = F.mse_loss(predict, y_batch)
        loss.backward()
        optimizer.step()
        loss_print[count] = np.array(loss.item()).copy()
        count += 1
    scheduler.step()

    if (epochs+1) % 1 == 0:
        print(f'epoch: {epochs+1}, loss = {np.mean(loss_print):.12f}')

print('Learning Finished!')

The loss does not update for every epoch. I have tried changing the learning rate but the problem still remains. I've also tried using optimizer.zero_grad() or net.zero_grad() but the loss is still not functioning correctly. I suspect issue in the CNN model setup.

Training the Deep Learning network ...
Batch size is : 20
Total number of batches is :  2
Total number of epochs is : 20
epoch: 1, loss = 196293869581631488.000000000000
epoch: 2, loss = 196293869581631488.000000000000
epoch: 3, loss = 196293869581631488.000000000000
epoch: 4, loss = 196293869581631488.000000000000
epoch: 5, loss = 196293869581631488.000000000000
epoch: 6, loss = 196293869581631488.000000000000
epoch: 7, loss = 196293869581631488.000000000000
epoch: 8, loss = 196293869581631488.000000000000
epoch: 9, loss = 196293869581631488.000000000000
epoch: 10, loss = 196293869581631488.000000000000
epoch: 11, loss = 196293869581631488.000000000000
epoch: 12, loss = 196293869581631488.000000000000
epoch: 13, loss = 196293869581631488.000000000000
epoch: 14, loss = 196293869581631488.000000000000
epoch: 15, loss = 196293869581631488.000000000000
epoch: 16, loss = 196293869581631488.000000000000
epoch: 17, loss = 196293869581631488.000000000000
epoch: 18, loss = 196293869581631488.000000000000
epoch: 19, loss = 196293869581631488.000000000000
epoch: 20, loss = 196293869581631488.000000000000
Learning Finished!


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source