'Extra trainable parameters in Keras based neural network

What I want to do is to add three external trainable parameters in the VAE network using the following function:

def gmmpara_init():

    theta_init = tf.Variable(K.ones((n_centroid,1))/n_centroid,trainable=True)
    u_init=tf.Variable(K.zeros((n_centroid,latent_dim)),trainable=True)
    lambda_init=tf.Variable(K.ones((n_centroid,latent_dim)),trainable=True)
    return theta_init,u_init,lambda_init

Then, ideally, I expect that the three parameters can be trained together with the neural network parameters. But the full code always run with errors.

Traceback (most recent call last):
  File "vade_modified.py", line 214, in <module>
    vade.fit(X, X,shuffle=True,epochs=epoch,batch_size=batch_size,callbacks=[epoch_begin])
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 66, in _method_wrapper
    return method(self, *args, **kwargs)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 848, in fit
    tmp_logs = train_function(iterator)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 580, in __call__
    result = self._call(*args, **kwds)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 644, in _call
    return self._stateless_fn(*args, **kwds)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2420, in __call__
    return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 1665, in _filtered_call
    self.captured_inputs)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 1746, in _call_flat
    ctx, args, cancellation_manager=cancellation_manager))
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 598, in call
    ctx=ctx)
  File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/execute.py", line 74, in quick_execute
    "tensors, but found {}".format(keras_symbolic_tensors))
tensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'lambda/Identity:0' shape=(100, 20) dtype=float32>, <tf.Tensor 'dense_3/Identity:0' shape=(100, 20) dtype=float32>, <tf.Tensor 'dense_4/Identity:0' shape=(100, 20) dtype=float32>]

Anyone know how to handle this error? Much appreciated. The full code is shown as follows:

# -*- coding: utf-8 -*-
import numpy as np
from tensorflow import keras
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense, Lambda,Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import tensorflow as tf
# from tensorflow.keras import objectives
import scipy.io as scio
import gzip
from six.moves import cPickle
import sys
# import  theano
# import  theano.tensor as T
import math
from sklearn import mixture
from sklearn.cluster import KMeans
from keras.models import model_from_json

import warnings
warnings.filterwarnings("ignore")

def floatX(X):
    return np.asarray(X)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
    return z_mean + K.exp(z_log_var / 2) * epsilon
#=====================================
def cluster_acc(Y_pred, Y):
  from sklearn.utils.linear_assignment_ import linear_assignment
  assert Y_pred.size == Y.size
  D = max(Y_pred.max(), Y.max())+1
  w = np.zeros((D,D), dtype=np.int64)
  for i in range(Y_pred.size):
    w[Y_pred[i], Y[i]] += 1
  ind = linear_assignment(w.max() - w)
  return sum([w[i,j] for i,j in ind])*1.0/Y_pred.size, w

#==================================================
def load_data(dataset):
    path = 'dataset/'+dataset+'/'
    if dataset == 'mnist':
        (x_train,y_train),(x_test,y_test) = keras.datasets.mnist.load_data()
        x_train = x_train / 255
        x_test = x_test / 255
        x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
        x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
        X = np.concatenate((x_train,x_test))
        Y = np.concatenate((y_train,y_test))

    if dataset == 'reuters10k':
        data=scio.loadmat(path+'reuters10k.mat')
        X = data['X']
        Y = data['Y'].squeeze()

    if dataset == 'har':
        data=scio.loadmat(path+'HAR.mat')
        X=data['X']
        # X=X.astype('float32')
        Y=data['Y']-1
        X=X[:10200]
        Y=Y[:10200]

    return X,Y

def config_init(dataset):
    if dataset == 'mnist':
        return 784,3000,10,0.002,0.002,10,0.9,0.9,1,'sigmoid'
    if dataset == 'reuters10k':
        return 2000,15,4,0.002,0.002,5,0.5,0.5,1,'linear'
    if dataset == 'har':
        return 561,120,6,0.002,0.00002,10,0.9,0.9,5,'linear'

def gmmpara_init():

    theta_init = tf.Variable(K.ones((n_centroid,1))/n_centroid,trainable=True)
    u_init=tf.Variable(K.zeros((n_centroid,latent_dim)),trainable=True)
    lambda_init=tf.Variable(K.ones((n_centroid,latent_dim)),trainable=True)
    return theta_init,u_init,lambda_init

#================================
def get_gamma(tempz):
    temp_Z=K.repeat(tempz,n_centroid)
    temp_theta_tensor3 = K.repeat_elements(theta_p,latent_dim,axis=1);

    temp_p_c_z=K.exp(K.sum((K.log(temp_theta_tensor3)-0.5*K.log(2*math.pi*lambda_p)-\
                       K.square(temp_Z-u_p)/(2*lambda_p)),axis=-1))+1e-10
    return temp_p_c_z/K.sum(temp_p_c_z,axis=-1,keepdims=True)


#=====================================================
def vae_loss(x, x_decoded_mean):
    Z=K.repeat(z,n_centroid) #(3,4) --> (3,n_centroid,4), 3 is the batch size
    z_mean_t=K.repeat(z_mean,n_centroid)#(3,4) --> (3,n_centroid,4)
    z_log_var_t=K.repeat(z_log_var,n_centroid)#(3,4) --> (3,n_centroid,4)
    u_tensor3=u_p #(n_centroid,4)
    lambda_tensor3=lambda_p #(n_centroid,4)
    theta_tensor3=K.repeat_elements(theta_p,latent_dim,axis=1); #(n_centroid,1)-->(n_centroid,latent_dim), there is a potential problem here, as theta_p is related to the n_centroid, how to update it if we repeat it to a new dimension

    p_c_z=K.exp(K.sum((K.log(theta_tensor3)-0.5*K.log(2*math.pi*lambda_tensor3)-\
                       K.square(Z-u_tensor3)/(2*lambda_tensor3)),axis=-1))+1e-10 # p_c_z should be in shape(3,n_centroid)

    gamma=p_c_z/K.sum(p_c_z,axis=-1,keepdims=True) #(3,n_centroid)
    gamma_t=K.repeat(gamma,latent_dim) #(3,latent_dim,n_centroid)

    if datatype == 'sigmoid':
        loss=alpha*original_dim*keras.losses.binary_crossentropy(x, x_decoded_mean)\
        +K.sum(0.5*gamma*K.sum(K.log(lambda_tensor3)+K.exp(z_log_var_t)/lambda_tensor3+K.square(z_mean_t-u_tensor3)/lambda_tensor3,axis=2),axis=1)\
        -0.5*K.sum(z_log_var+1,axis=-1)\
        +K.sum((K.log(gamma/math.pi))*gamma,axis=-1) # corresponding to the second last item in Eq. 12


    else:
        loss=alpha*original_dim * keras.losses.mean_squared_error(x, x_decoded_mean)\
        +K.sum(0.5*gamma_t*(latent_dim*K.log(math.pi*2)+K.log(lambda_tensor3)+K.exp(z_log_var_t)/lambda_tensor3+K.square(z_mean_t-u_tensor3)/lambda_tensor3),axis=(1,2))\
        -0.5*K.sum(z_log_var+1,axis=-1)\
        -K.sum(K.log(K.repeat_elements(theta_p.dimshuffle('x',0),batch_size,0))*gamma,axis=-1)\
        +K.sum(K.log(gamma)*gamma,axis=-1)

    return loss
#================================

#===================================
def lr_decay():
    if dataset == 'mnist':
        # adam_nn.lr.set_value(max(adam_nn.lr.get_value()*decay_nn,0.0002))
        # adam_gmm.lr.set_value(max(adam_gmm.lr.get_value()*decay_gmm,0.0002))
        pass
    else:
        adam_nn.lr.set_value(adam_nn.lr.get_value()*decay_nn)
        adam_gmm.lr.set_value(adam_gmm.lr.get_value()*decay_gmm)
    print ('lr_nn:%f'%adam_nn.lr.get_value())
    print ('lr_gmm:%f'%adam_gmm.lr.get_value())

def epochBegin(epoch):

    if epoch % decay_n == 0 and epoch!=0:
        pass
        # lr_decay()
    '''
    sample = sample_output.predict(X,batch_size=batch_size)
    g = mixture.GMM(n_components=n_centroid,covariance_type='diag')
    g.fit(sample)
    p=g.predict(sample)
    acc_g=cluster_acc(p,Y)

    if epoch <1 and ispretrain == False:
        u_p.set_value(floatX(g.means_.T))
        print ('no pretrain,random init!')
    '''
    gamma = gamma_output.predict(X,batch_size=batch_size)
    acc=cluster_acc(np.argmax(gamma,axis=1),Y)
    global accuracy
    accuracy+=[acc[0]]
    if epoch>0 :
        #print ('acc_gmm_on_z:%0.8f'%acc_g[0])
        print ('acc_p_c_z:%0.8f'%acc[0])
    if epoch==1 and dataset == 'har' and acc[0]<0.77:
        print ('=========== HAR dataset:bad init!Please run again! ============')
        sys.exit(0)

class EpochBegin(Callback):#https://keras.io/guides/writing_your_own_callbacks/ inherit from the Callback class, then implement some functions
    def on_epoch_begin(self, epoch, logs={}):# the name is specified, see in the link
        epochBegin(epoch)
#==============================================

dataset = 'mnist'
db = sys.argv[1]
if db in ['mnist','reuters10k','har']:
    dataset = db
print ('training on: ' + dataset)
ispretrain = False
batch_size = 100
latent_dim = 20
intermediate_dim = [50,50,100]
# theano.config.floatX='float32'
accuracy=[]
X,Y = load_data(dataset)
original_dim,epoch,n_centroid,lr_nn,lr_gmm,decay_n,decay_nn,decay_gmm,alpha,datatype = config_init(dataset)
theta_p,u_p,lambda_p = gmmpara_init()
#===================

x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim[0], activation='relu')(x)
h = Dense(intermediate_dim[1], activation='relu')(h)
h = Dense(intermediate_dim[2], activation='relu')(h)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

h_decoded = Dense(intermediate_dim[-1], activation='relu')(z)
h_decoded = Dense(intermediate_dim[-2], activation='relu')(h_decoded)
h_decoded = Dense(intermediate_dim[-3], activation='relu')(h_decoded)
x_decoded_mean = Dense(original_dim, activation=datatype)(h_decoded)


#========================
#========================
Gamma = Lambda(get_gamma, output_shape=(n_centroid,))(z)
sample_output = Model(x, z_mean)
gamma_output = Model(x,Gamma)
#===========================================
vade = Model(x, x_decoded_mean)
vade.summary()

adam_nn= Adam(lr=lr_nn,epsilon=1e-4)
vade.compile(optimizer=adam_nn, loss =vae_loss)
epoch_begin=EpochBegin()
#-------------------------------------------------------

vade.fit(X, X,shuffle=True,epochs=epoch,batch_size=batch_size,callbacks=[epoch_begin])


Solution 1:[1]

I am working on something similar regarding deep clustering using Tensorflow 2.0 and Keras. The solution that I have found that works is to define your own custom layer whose weights are the new trainable parameters that you want. I tried modifying your code to define a new GMM layer:

class GMMLayer(keras.layers.Layer):

    def __init__(self, latent_dim, n_clusters):
        super(GMMLayer, self).__init__()
        self.latent_dim = latent_dim
        self.n_clusters = n_clusters

    def build(self, input_shape):

        self.pi = self.add_weight(name='pi', shape=(self.n_clusters, 1), trainable=True, initializer=tf.constant_initializer(value=1/self.n_clusters))
        self.u = self.add_weight(name='u', shape=(self.latent_dim, self.n_clusters), trainable=True, initializer='zeros_initializer')
        self.lam = self.add_weight(name='lam', shape=(self.latent_dim, self.n_clusters), trainable=True, initializer='ones_initializer')

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.n_clusters)

    def call(self, inputs):
        z_in, z_mean_in, z_log_var_in = inputs
        temp_Z = tf.transpose(K.repeat(z_in, self.n_clusters), perm=[0, 2, 1]) #(3,4) --> (3,n_centroid,4), 3 is the batch size
        theta_tensor3 = tf.transpose(K.repeat_elements(self.pi, self.latent_dim, axis=1))  # (n_centroid,1)-->(n_centroid,latent_dim), there is a potential problem here, as theta_p is related to the n_centroid, how to update it if we repeat it to a new dimension
        p_c_z = K.exp(K.sum((K.log(theta_tensor3) - 0.5 * K.log(2 * math.pi * self.lam) - \
                             K.square(temp_Z - self.u) / (2 * self.lam)),
                            axis=-1)) + 1e-10  # p_c_z should be in shape(batch_size, n_centroid)
        gamma = p_c_z / K.sum(p_c_z, axis=-1, keepdims=True)

        z_mean_t = tf.transpose(K.repeat(z_mean_in, self.n_clusters), perm=[0, 2, 1])
        z_log_var_t = tf.transpose(K.repeat(z_log_var_in, self.n_clusters), perm=[0, 2, 1])

        gmm_loss = K.sum(0.5 * gamma * K.sum(
                    K.log(self.lam) + K.exp(z_log_var_t) / self.lam + K.square(z_mean_t - self.u) / self.lam,
                    axis=2), axis=1) \
                - 0.5 * K.sum(z_log_var + 1, axis=-1) \
                - 0.5 * K.sum(z_log_var + 1, axis=-1) \
                + K.sum((K.log(gamma / math.pi)) * gamma, axis=-1)
        return [gamma, gmm_loss]

Let me know if this helps!

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 jumproper