'ValueError: logits and labels must have the same shape ((None, 328, 328, 3) vs (None, 1)) with autoencoder

I am trying to build an autoencoder with the following code

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds

import keras
from keras.layers import Conv2D, Conv2DTranspose, UpSampling2D, MaxPool2D, Flatten, BatchNormalization
from keras.layers import Conv1D, MaxPool1D, Reshape
from keras.layers import Input, Dense, Dropout, Activation, Add, Concatenate
from keras import regularizers
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from keras.optimizers import SGD, Adam, RMSprop, Adadelta
import keras.backend as K
from keras.objectives import mean_squared_error
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils

def create_block(input, chs): ## Convolution block of 2 layers
    x = input
    for i in range(2):
        x = Conv2D(chs, 3, padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
    return x
input_img = Input(shape=(328, 328, 3))

block1 = create_block(input_img, 32)
x = MaxPool2D(2)(block1)
block2 = create_block(x, 64)

#Middle
x = MaxPool2D(2)(block2)
middle = create_block(x, 128)

# Decoder
block3 = create_block(middle, 64)
up1 = UpSampling2D((2,2))(block3)
block4 = create_block(up1, 32)
up2 = UpSampling2D((2,2))(block4)

# output
x = Conv2D(3, 1)(up2)
output = Activation("sigmoid")(x)


autoencoder = Model(input_img, output)
autoencoder.compile(SGD(1e-3, 0.9), loss='binary_crossentropy')
autoencoder.summary()

For my training data i use:

img_height = 328
img_width = 328

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

When I try to run it with autoencoder.fit( train_ds, validation_data=val_ds, epochs=50 ) I get the ValueError: logits and labels must have the same shape ((None, 328, 328, 3) vs (None, 1))

Does somebody know how to fix this?



Solution 1:[1]

For an autoencoder, your target needs to be your input. The error you're trying to minimize is the reconstruction loss, so the output needs to be your original input. By default, Keras directory iterators will return the target (e.g., 0 and 1), not the input.

generator = tf.keras.preprocessing.image.ImageDataGenerator(
    validation_split=0.2
)

Specify class_mode='input'. Note that I seem to have a different version, so I had to replace image_size by target_size to have it run for me.

train_ds = generator.flow_from_directory(
  data_dir,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  class_mode='input'
)

Here's a complete working example with generated data, which uses the autoencoder architecture you gave:

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Conv2D, UpSampling2D, MaxPool2D
from tensorflow.keras.layers import Activation, Dense, Input, BatchNormalization
from tensorflow.keras import Model, Sequential

def create_block(input, chs):
    x = input
    for i in range(2):
        x = Conv2D(chs, 3, padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
    return x
input_img = Input(shape=(328, 328, 3))

block1 = create_block(input_img, 32)
x = MaxPool2D(2)(block1)
block2 = create_block(x, 64)

#Middle
x = MaxPool2D(2)(block2)
middle = create_block(x, 128)

# Decoder
block3 = create_block(middle, 64)
up1 = UpSampling2D((2,2))(block3)
block4 = create_block(up1, 32)
up2 = UpSampling2D((2,2))(block4)

# output
x = Conv2D(3, 1)(up2)
output = Activation("sigmoid")(x)

X = np.random.rand(8, 328, 328, 3).astype(np.float32)

autoencoder = Model(input_img, output)
autoencoder.compile('adam', loss='binary_crossentropy')
autoencoder.summary()

generator = tf.keras.preprocessing.image.ImageDataGenerator()

train_ds = generator.flow(
  x=X,
  y=X
)

history = autoencoder.fit(train_ds)
Train for 1 steps
1/1 [==============================] - 9s 9s/step - loss: 0.8827

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1