'Pytorch CNN issue with loss not changing
I am making a CNN for fluid prediction generation on Pytorch. My input is a batchx100x200x100 array containing levelset data, and my training output is also a batchx100x200x100 array containing laser flux data. So this is a regression problem. I am very confused with building CNN model for my data as most of examples are for image processing, which input is 3 channel 2D images, while my inputs are 3D arrays containing just numbers.
Here is my code.
Dataloader:
class Data_set(Dataset):
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return self.X.shape[0]
def __getitem__(self, idx):
X_shuffle = self.X[idx]
y_shuffle = self.y[idx]
return X_shuffle, y_shuffle
CNN class
class CNN(torch.nn.Module):
def __init__(self):
super(CNN, self).__init__()
# L1 CNN block
self.layer1 = torch.nn.Sequential(
torch.nn.Conv2d(100, 50, kernel_size=5, stride=(1, 1), padding=2),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2))
# L2 CNN block
self.layer2 = torch.nn.Sequential(
torch.nn.Conv2d(50, 25, kernel_size=5, stride=(1, 1), padding=2),
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=2, stride=2))
# L3 CNN block
self.layer3 = torch.nn.Sequential(
torch.nn.ConvTranspose2d(25, 50, kernel_size=(3,5), stride=(2, 2), padding=(1,2), output_padding=(1,1)))
# L4 CNN block
self.layer4 = torch.nn.Sequential(
torch.nn.ConvTranspose2d(50, 100, kernel_size=(3,3), stride=(2, 2), padding=(1,1), output_padding=(1,1)))
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
return out
model = CNN()
Training Process
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epoch = 20
n_sample = X.shape[0]
batchsize = 20
n_batch = int(np.ceil(n_sample/batchsize))
loss_print = np.zeros(n_batch)
net = model.cuda()
train_data = Data_set(X, y)
train_dataloader = DataLoader(train_data, batch_size = batchsize, shuffle = True)
scheduler = StepLR(optimizer, step_size=10, gamma=0.95)
print('Training the Deep Learning network ...')
print('Batch size is : {}'.format(batchsize))
print('Total number of batches is : {0:2.0f}'.format(n_batch))
print('Total number of epochs is : {0:2.0f}'.format(num_epoch))
for epochs in range(num_epoch):
count = 0
for X_batch, y_batch in train_dataloader:
X_batch = X_batch.to(torch.float32)
y_batch = y_batch.to(torch.float32)
X_batch = X_batch.cuda()
y_batch = y_batch.cuda()
optimizer.zero_grad()
# Network prediction
predict = net(X_batch)
# Calculate loss
loss = F.mse_loss(predict, y_batch)
loss.backward()
optimizer.step()
loss_print[count] = np.array(loss.item()).copy()
count += 1
scheduler.step()
if (epochs+1) % 1 == 0:
print(f'epoch: {epochs+1}, loss = {np.mean(loss_print):.12f}')
print('Learning Finished!')
The loss does not update for every epoch. I have tried changing the learning rate but the problem still remains. I've also tried using optimizer.zero_grad()
or net.zero_grad()
but the loss is still not functioning correctly.
I suspect issue in the CNN model setup.
Training the Deep Learning network ...
Batch size is : 20
Total number of batches is : 2
Total number of epochs is : 20
epoch: 1, loss = 196293869581631488.000000000000
epoch: 2, loss = 196293869581631488.000000000000
epoch: 3, loss = 196293869581631488.000000000000
epoch: 4, loss = 196293869581631488.000000000000
epoch: 5, loss = 196293869581631488.000000000000
epoch: 6, loss = 196293869581631488.000000000000
epoch: 7, loss = 196293869581631488.000000000000
epoch: 8, loss = 196293869581631488.000000000000
epoch: 9, loss = 196293869581631488.000000000000
epoch: 10, loss = 196293869581631488.000000000000
epoch: 11, loss = 196293869581631488.000000000000
epoch: 12, loss = 196293869581631488.000000000000
epoch: 13, loss = 196293869581631488.000000000000
epoch: 14, loss = 196293869581631488.000000000000
epoch: 15, loss = 196293869581631488.000000000000
epoch: 16, loss = 196293869581631488.000000000000
epoch: 17, loss = 196293869581631488.000000000000
epoch: 18, loss = 196293869581631488.000000000000
epoch: 19, loss = 196293869581631488.000000000000
epoch: 20, loss = 196293869581631488.000000000000
Learning Finished!
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|