'Extra trainable parameters in Keras based neural network
What I want to do is to add three external trainable parameters in the VAE network using the following function:
def gmmpara_init():
theta_init = tf.Variable(K.ones((n_centroid,1))/n_centroid,trainable=True)
u_init=tf.Variable(K.zeros((n_centroid,latent_dim)),trainable=True)
lambda_init=tf.Variable(K.ones((n_centroid,latent_dim)),trainable=True)
return theta_init,u_init,lambda_init
Then, ideally, I expect that the three parameters can be trained together with the neural network parameters. But the full code always run with errors.
Traceback (most recent call last):
File "vade_modified.py", line 214, in <module>
vade.fit(X, X,shuffle=True,epochs=epoch,batch_size=batch_size,callbacks=[epoch_begin])
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 66, in _method_wrapper
return method(self, *args, **kwargs)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 848, in fit
tmp_logs = train_function(iterator)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py", line 644, in _call
return self._stateless_fn(*args, **kwds)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 2420, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 1665, in _filtered_call
self.captured_inputs)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 1746, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/function.py", line 598, in call
ctx=ctx)
File "/home/shuiqiao/anaconda3/lib/python3.7/site-packages/tensorflow/python/eager/execute.py", line 74, in quick_execute
"tensors, but found {}".format(keras_symbolic_tensors))
tensorflow.python.eager.core._SymbolicException: Inputs to eager execution function cannot be Keras symbolic tensors, but found [<tf.Tensor 'lambda/Identity:0' shape=(100, 20) dtype=float32>, <tf.Tensor 'dense_3/Identity:0' shape=(100, 20) dtype=float32>, <tf.Tensor 'dense_4/Identity:0' shape=(100, 20) dtype=float32>]
Anyone know how to handle this error? Much appreciated. The full code is shown as follows:
# -*- coding: utf-8 -*-
import numpy as np
from tensorflow import keras
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Dense, Lambda,Layer
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import tensorflow as tf
# from tensorflow.keras import objectives
import scipy.io as scio
import gzip
from six.moves import cPickle
import sys
# import theano
# import theano.tensor as T
import math
from sklearn import mixture
from sklearn.cluster import KMeans
from keras.models import model_from_json
import warnings
warnings.filterwarnings("ignore")
def floatX(X):
return np.asarray(X)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
return z_mean + K.exp(z_log_var / 2) * epsilon
#=====================================
def cluster_acc(Y_pred, Y):
from sklearn.utils.linear_assignment_ import linear_assignment
assert Y_pred.size == Y.size
D = max(Y_pred.max(), Y.max())+1
w = np.zeros((D,D), dtype=np.int64)
for i in range(Y_pred.size):
w[Y_pred[i], Y[i]] += 1
ind = linear_assignment(w.max() - w)
return sum([w[i,j] for i,j in ind])*1.0/Y_pred.size, w
#==================================================
def load_data(dataset):
path = 'dataset/'+dataset+'/'
if dataset == 'mnist':
(x_train,y_train),(x_test,y_test) = keras.datasets.mnist.load_data()
x_train = x_train / 255
x_test = x_test / 255
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
X = np.concatenate((x_train,x_test))
Y = np.concatenate((y_train,y_test))
if dataset == 'reuters10k':
data=scio.loadmat(path+'reuters10k.mat')
X = data['X']
Y = data['Y'].squeeze()
if dataset == 'har':
data=scio.loadmat(path+'HAR.mat')
X=data['X']
# X=X.astype('float32')
Y=data['Y']-1
X=X[:10200]
Y=Y[:10200]
return X,Y
def config_init(dataset):
if dataset == 'mnist':
return 784,3000,10,0.002,0.002,10,0.9,0.9,1,'sigmoid'
if dataset == 'reuters10k':
return 2000,15,4,0.002,0.002,5,0.5,0.5,1,'linear'
if dataset == 'har':
return 561,120,6,0.002,0.00002,10,0.9,0.9,5,'linear'
def gmmpara_init():
theta_init = tf.Variable(K.ones((n_centroid,1))/n_centroid,trainable=True)
u_init=tf.Variable(K.zeros((n_centroid,latent_dim)),trainable=True)
lambda_init=tf.Variable(K.ones((n_centroid,latent_dim)),trainable=True)
return theta_init,u_init,lambda_init
#================================
def get_gamma(tempz):
temp_Z=K.repeat(tempz,n_centroid)
temp_theta_tensor3 = K.repeat_elements(theta_p,latent_dim,axis=1);
temp_p_c_z=K.exp(K.sum((K.log(temp_theta_tensor3)-0.5*K.log(2*math.pi*lambda_p)-\
K.square(temp_Z-u_p)/(2*lambda_p)),axis=-1))+1e-10
return temp_p_c_z/K.sum(temp_p_c_z,axis=-1,keepdims=True)
#=====================================================
def vae_loss(x, x_decoded_mean):
Z=K.repeat(z,n_centroid) #(3,4) --> (3,n_centroid,4), 3 is the batch size
z_mean_t=K.repeat(z_mean,n_centroid)#(3,4) --> (3,n_centroid,4)
z_log_var_t=K.repeat(z_log_var,n_centroid)#(3,4) --> (3,n_centroid,4)
u_tensor3=u_p #(n_centroid,4)
lambda_tensor3=lambda_p #(n_centroid,4)
theta_tensor3=K.repeat_elements(theta_p,latent_dim,axis=1); #(n_centroid,1)-->(n_centroid,latent_dim), there is a potential problem here, as theta_p is related to the n_centroid, how to update it if we repeat it to a new dimension
p_c_z=K.exp(K.sum((K.log(theta_tensor3)-0.5*K.log(2*math.pi*lambda_tensor3)-\
K.square(Z-u_tensor3)/(2*lambda_tensor3)),axis=-1))+1e-10 # p_c_z should be in shape(3,n_centroid)
gamma=p_c_z/K.sum(p_c_z,axis=-1,keepdims=True) #(3,n_centroid)
gamma_t=K.repeat(gamma,latent_dim) #(3,latent_dim,n_centroid)
if datatype == 'sigmoid':
loss=alpha*original_dim*keras.losses.binary_crossentropy(x, x_decoded_mean)\
+K.sum(0.5*gamma*K.sum(K.log(lambda_tensor3)+K.exp(z_log_var_t)/lambda_tensor3+K.square(z_mean_t-u_tensor3)/lambda_tensor3,axis=2),axis=1)\
-0.5*K.sum(z_log_var+1,axis=-1)\
+K.sum((K.log(gamma/math.pi))*gamma,axis=-1) # corresponding to the second last item in Eq. 12
else:
loss=alpha*original_dim * keras.losses.mean_squared_error(x, x_decoded_mean)\
+K.sum(0.5*gamma_t*(latent_dim*K.log(math.pi*2)+K.log(lambda_tensor3)+K.exp(z_log_var_t)/lambda_tensor3+K.square(z_mean_t-u_tensor3)/lambda_tensor3),axis=(1,2))\
-0.5*K.sum(z_log_var+1,axis=-1)\
-K.sum(K.log(K.repeat_elements(theta_p.dimshuffle('x',0),batch_size,0))*gamma,axis=-1)\
+K.sum(K.log(gamma)*gamma,axis=-1)
return loss
#================================
#===================================
def lr_decay():
if dataset == 'mnist':
# adam_nn.lr.set_value(max(adam_nn.lr.get_value()*decay_nn,0.0002))
# adam_gmm.lr.set_value(max(adam_gmm.lr.get_value()*decay_gmm,0.0002))
pass
else:
adam_nn.lr.set_value(adam_nn.lr.get_value()*decay_nn)
adam_gmm.lr.set_value(adam_gmm.lr.get_value()*decay_gmm)
print ('lr_nn:%f'%adam_nn.lr.get_value())
print ('lr_gmm:%f'%adam_gmm.lr.get_value())
def epochBegin(epoch):
if epoch % decay_n == 0 and epoch!=0:
pass
# lr_decay()
'''
sample = sample_output.predict(X,batch_size=batch_size)
g = mixture.GMM(n_components=n_centroid,covariance_type='diag')
g.fit(sample)
p=g.predict(sample)
acc_g=cluster_acc(p,Y)
if epoch <1 and ispretrain == False:
u_p.set_value(floatX(g.means_.T))
print ('no pretrain,random init!')
'''
gamma = gamma_output.predict(X,batch_size=batch_size)
acc=cluster_acc(np.argmax(gamma,axis=1),Y)
global accuracy
accuracy+=[acc[0]]
if epoch>0 :
#print ('acc_gmm_on_z:%0.8f'%acc_g[0])
print ('acc_p_c_z:%0.8f'%acc[0])
if epoch==1 and dataset == 'har' and acc[0]<0.77:
print ('=========== HAR dataset:bad init!Please run again! ============')
sys.exit(0)
class EpochBegin(Callback):#https://keras.io/guides/writing_your_own_callbacks/ inherit from the Callback class, then implement some functions
def on_epoch_begin(self, epoch, logs={}):# the name is specified, see in the link
epochBegin(epoch)
#==============================================
dataset = 'mnist'
db = sys.argv[1]
if db in ['mnist','reuters10k','har']:
dataset = db
print ('training on: ' + dataset)
ispretrain = False
batch_size = 100
latent_dim = 20
intermediate_dim = [50,50,100]
# theano.config.floatX='float32'
accuracy=[]
X,Y = load_data(dataset)
original_dim,epoch,n_centroid,lr_nn,lr_gmm,decay_n,decay_nn,decay_gmm,alpha,datatype = config_init(dataset)
theta_p,u_p,lambda_p = gmmpara_init()
#===================
x = Input(batch_shape=(batch_size, original_dim))
h = Dense(intermediate_dim[0], activation='relu')(x)
h = Dense(intermediate_dim[1], activation='relu')(h)
h = Dense(intermediate_dim[2], activation='relu')(h)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
h_decoded = Dense(intermediate_dim[-1], activation='relu')(z)
h_decoded = Dense(intermediate_dim[-2], activation='relu')(h_decoded)
h_decoded = Dense(intermediate_dim[-3], activation='relu')(h_decoded)
x_decoded_mean = Dense(original_dim, activation=datatype)(h_decoded)
#========================
#========================
Gamma = Lambda(get_gamma, output_shape=(n_centroid,))(z)
sample_output = Model(x, z_mean)
gamma_output = Model(x,Gamma)
#===========================================
vade = Model(x, x_decoded_mean)
vade.summary()
adam_nn= Adam(lr=lr_nn,epsilon=1e-4)
vade.compile(optimizer=adam_nn, loss =vae_loss)
epoch_begin=EpochBegin()
#-------------------------------------------------------
vade.fit(X, X,shuffle=True,epochs=epoch,batch_size=batch_size,callbacks=[epoch_begin])
Solution 1:[1]
I am working on something similar regarding deep clustering using Tensorflow 2.0 and Keras. The solution that I have found that works is to define your own custom layer whose weights are the new trainable parameters that you want. I tried modifying your code to define a new GMM layer:
class GMMLayer(keras.layers.Layer):
def __init__(self, latent_dim, n_clusters):
super(GMMLayer, self).__init__()
self.latent_dim = latent_dim
self.n_clusters = n_clusters
def build(self, input_shape):
self.pi = self.add_weight(name='pi', shape=(self.n_clusters, 1), trainable=True, initializer=tf.constant_initializer(value=1/self.n_clusters))
self.u = self.add_weight(name='u', shape=(self.latent_dim, self.n_clusters), trainable=True, initializer='zeros_initializer')
self.lam = self.add_weight(name='lam', shape=(self.latent_dim, self.n_clusters), trainable=True, initializer='ones_initializer')
def compute_output_shape(self, input_shape):
return (input_shape[0], self.n_clusters)
def call(self, inputs):
z_in, z_mean_in, z_log_var_in = inputs
temp_Z = tf.transpose(K.repeat(z_in, self.n_clusters), perm=[0, 2, 1]) #(3,4) --> (3,n_centroid,4), 3 is the batch size
theta_tensor3 = tf.transpose(K.repeat_elements(self.pi, self.latent_dim, axis=1)) # (n_centroid,1)-->(n_centroid,latent_dim), there is a potential problem here, as theta_p is related to the n_centroid, how to update it if we repeat it to a new dimension
p_c_z = K.exp(K.sum((K.log(theta_tensor3) - 0.5 * K.log(2 * math.pi * self.lam) - \
K.square(temp_Z - self.u) / (2 * self.lam)),
axis=-1)) + 1e-10 # p_c_z should be in shape(batch_size, n_centroid)
gamma = p_c_z / K.sum(p_c_z, axis=-1, keepdims=True)
z_mean_t = tf.transpose(K.repeat(z_mean_in, self.n_clusters), perm=[0, 2, 1])
z_log_var_t = tf.transpose(K.repeat(z_log_var_in, self.n_clusters), perm=[0, 2, 1])
gmm_loss = K.sum(0.5 * gamma * K.sum(
K.log(self.lam) + K.exp(z_log_var_t) / self.lam + K.square(z_mean_t - self.u) / self.lam,
axis=2), axis=1) \
- 0.5 * K.sum(z_log_var + 1, axis=-1) \
- 0.5 * K.sum(z_log_var + 1, axis=-1) \
+ K.sum((K.log(gamma / math.pi)) * gamma, axis=-1)
return [gamma, gmm_loss]
Let me know if this helps!
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | jumproper |