'ValueError: Please initialize `TimeDistributed` layer with a `Layer` instance

I'm trying to build a model which can be trained on both audio and video samples but I get this error
ValueError: Please initialize `TimeDistributed` layer with a `Layer` instance. You passed: Tensor("input_13:0", shape=(None, 5, 648, 384, 3), dtype=float32)

Here are my three model functions:

def build_convnet(shape=(648, 384, 3)):
    momentum = .9 
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Conv2D(64, (2,2), input_shape=shape,padding='same', activation='relu'))
    model.add(tf.keras.layers.Conv2D(64, (2,2), padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
    model.add(tf.keras.layers.MaxPool2D())
    model.add(tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
    model.add(tf.keras.layers.MaxPool2D())
    model.add(tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(tf.keras.layers.BatchNormalization(momentum=momentum))
    model.add(tf.keras.layers.GlobalMaxPool2D())

    print(model.summary())
    return model
def action_model(shape=(5, 648, 384, 3)):
    # Create our convnet with (112, 112, 3) input shape
    convnet = build_convnet(shape[1:])
    # then create our final model
    # model = tf.keras.Sequential()
    # add the convnet with (5, 112, 112, 3) shape
    input_shape = tf.keras.layers.Input(shape)
    TD = tf.keras.layers.TimeDistributed(input_shape)(convnet)
    # here, you can also use tf.keras.layers.GRU or LSTM
    LSTM1 = tf.keras.layers.LSTM(1024)(TD)

    Dense1 = tf.keras.layers.Dense(512, activation='relu')(LSTM1)
    Drop1 = tf.keras.layers.Dropout(.2)(Dense1)
    Dense2 = tf.keras.layers.Dense(128, activation='relu')(Drop1)
    Drop2 = tf.keras.layers.Dropout(.2)(Dense2)
    Dense3 = tf.keras.layers.Dense(64, activation='relu')(Drop2)
    # Dense4 = tf.keras.layers.Dense(2, activation='softmax')(Dense3)

    model = tf.keras.models.Model(inputs=input_shape,outputs=Dense3)

    return model
def audio_and_final_model():
  input_shape = tf.keras.layers.Input(shape=(220941,1))
  Conv1 = tf.keras.layers.Conv1D(16,activation='relu',kernel_size=(10))(input_shape)
  MaxPool1 = tf.keras.layers.MaxPool1D()(Conv1)
  Dropout1 = tf.keras.layers.Dropout(0.2)(MaxPool1)
  Conv2 = tf.keras.layers.Conv1D(32,activation='relu',kernel_size=(10))(Dropout1)
  MaxPool2 = tf.keras.layers.MaxPool1D()(Conv2)
  Dropout2 = tf.keras.layers.Dropout(0.2)(MaxPool2)
  Conv3 = tf.keras.layers.Conv1D(16,activation='relu',kernel_size=(10))(Dropout2)
  MaxPool3 = tf.keras.layers.MaxPool1D()(Conv3)
  Dropout3 = tf.keras.layers.Dropout(0.2)(MaxPool3)
  Flatten = tf.keras.layers.Flatten()(Dropout3)
  Dense1 = tf.keras.layers.Dense(128,activation='relu')(Flatten)
  Dense2 = tf.keras.layers.Dense(64,activation='relu')(Dense1)


  model = tf.keras.models.Model(inputs=input_shape,outputs=Dense2)

  return model
INSHAPEAM = (5, 648, 384, 3)
INSHAPEAFM = (220941,1)
am = action_model()
afm = audio_and_final_model()

combined = tf.keras.layers.Concatenate([am.output,afm.output])
z = tf.keras.layers.Dense(2,activation='softmax')(combined)

model = tf.keras.models.Model(inputs=[INSHAPEAM,INSHAPEAFM],outputs=z)

I tried to search but I could just find one answer here but I didn't really understand it so it would be great help if someone could help me here. Thanks in advance!



Solution 1:[1]

the problem is in the part below. change this part from the function: just try to embed the part on build_convnet in the action model using the functional model not the sequential

 TD = tf.keras.layers.TimeDistributed(convnet)(input_shape)

Solution 2:[2]

If somebody else encountered the same error, I had same problem and solve it by making the build_convnet return a keras.Model not Sequential.

On your example, it would be:

def build_convnet(shape=(648, 384, 3)):
    momentum = .9 
    input_tensor = Input(shape, name='input')
    x = tf.keras.layers.Conv2D(64, (2,2), padding='same', acitvation='relu')(input_tensor)
    x = tf.keras.layers.Conv2D(64, (2,2), padding='same', acitvation='relu')(x)

    x = tf.keras.layers.BatchNormalization(momentum=momentum)(x)
    x = tf.keras.layers.MaxPool2D()(x)
    x = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = tf.keras.layers.Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = tf.keras.layers.BatchNormalization(momentum=momentum)(x)
    x = tf.keras.layers.MaxPool2D()(x)
    x = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(x)
    x = tf.keras.layers.Conv2D(256, (3,3), padding='same', activation='relu')(x)
    x = tf.keras.layers.BatchNormalization(momentum=momentum)(x)
    x = tf.keras.layers.GlobalMaxPool2D()(x)

    return tf.keras.models.Model(inputs=[input_tensor], outputs=[x])

And of course, changing the order of convnet and input in the TimeDistributed call as mentioned in the comments:

TD = tf.keras.layers.TimeDistributed(convnet)(input_shape)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 ZiGaelle