'How to specify the correlation coefficient as the loss function in keras
I am using keras+tensorflow for the first time. I would like to specify the correlation coefficient as the loss function. It makes sense to square it so that it is a number between 0 and 1 where 0 is bad and 1 is good.
My basic code currently looks like:
def baseline_model():
model = Sequential()
model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, random_state=0)
results = cross_val_score(pipeline, X, Y, cv=kfold)
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))
How can I change this so that it optimizes to minimize the squared correlation coefficient instead?
I tried the following:
def correlation_coefficient(y_true, y_pred):
pearson_r, _ = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
return 1-pearson_r**2
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4000, input_dim=n**2, kernel_initializer='normal', activation='relu'))
# model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss=correlation_coefficient, optimizer='adam')
return model
but this crashes with:
Traceback (most recent call last):
File "deeplearning-det.py", line 67, in <module>
results = cross_val_score(pipeline, X, Y, cv=kfold)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 321, in cross_val_score
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 195, in cross_validate
for train, test in cv.split(X, y, groups))
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 779, in __call__
while self.dispatch_one_batch(iterator):
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 625, in dispatch_one_batch
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 588, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 111, in apply_async
result = ImmediateResult(func)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/_parallel_backends.py", line 332, in __init__
self.results = batch()
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/user/.local/lib/python3.5/site-packages/sklearn/externals/joblib/parallel.py", line 131, in <listcomp>
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/user/.local/lib/python3.5/site-packages/sklearn/model_selection/_validation.py", line 437, in _fit_and_score
estimator.fit(X_train, y_train, **fit_params)
File "/home/user/.local/lib/python3.5/site-packages/sklearn/pipeline.py", line 259, in fit
self._final_estimator.fit(Xt, y, **fit_params)
File "/home/user/.local/lib/python3.5/site-packages/keras/wrappers/scikit_learn.py", line 147, in fit
history = self.model.fit(x, y, **fit_args)
File "/home/user/.local/lib/python3.5/site-packages/keras/models.py", line 867, in fit
File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 1575, in fit
File "/home/user/.local/lib/python3.5/site-packages/keras/engine/training.py", line 960, in _make_train_function
File "/home/user/.local/lib/python3.5/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
return func(*args, **kwargs)
File "/home/user/.local/lib/python3.5/site-packages/keras/optimizers.py", line 432, in get_updates
m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 856, in binary_op_wrapper
y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 611, in convert_to_tensor
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 676, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 121, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/home/user/.local/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 364, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
Update 1
Following the answer below the code now runs. Unfortunately, the correlation_coefficient
and correlation_coefficient_loss
functions give different values from each other and I am not sure either of them is the same as you would get from 1- scipy.stats.pearsonr()[0]**2.
Why are loss functions giving the wrong outputs and how can they be corrected to give the same values as
1 - scipy.stats.pearsonr()[0]**2
would give?
Here is the completely self contained code that should just run:
import numpy as np
import sys
import math
from scipy.stats import ortho_group
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import tensorflow as tf
from keras import backend as K
def permanent(M):
n = M.shape[0]
d = np.ones(n)
j = 0
s = 1
f = np.arange(n)
v = M.sum(axis=0)
p = np.prod(v)
while (j < n-1):
v -= 2*d[j]*M[j]
d[j] = -d[j]
s = -s
prod = np.prod(v)
p += s*prod
f[0] = 0
f[j] = f[j+1]
f[j+1] = j+1
j = f[0]
return p/2**(n-1)
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
r_num = K.sum(xm * ym)
r_den = K.sum(K.sum(K.square(xm)) * K.sum(K.square(ym)))
r = r_num / r_den
return 1 - r**2
def correlation_coefficient(y_true, y_pred):
pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true)
# find all variables created for this metric
metric_vars = [i for i in tf.local_variables() if 'correlation_coefficient' in i.name.split('/')[1]]
# Add metric variables to GLOBAL_VARIABLES collection.
# They will be initialized for new session.
for v in metric_vars:
tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
# force to update metric values
with tf.control_dependencies([update_op]):
pearson_r = tf.identity(pearson_r)
return 1-pearson_r**2
def baseline_model():
# create model
model = Sequential()
model.add(Dense(4000, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
# model.add(Dense(2000, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient])
return model
no_rows = 8
print("Making the input data using seed 7", file=sys.stderr)
U = ortho_group.rvs(no_rows**2)
U = U[:, :no_rows]
# U is a random orthogonal matrix
X = []
Y = []
for i in range(40000):
I = np.random.choice(no_rows**2, size = no_rows)
A = U[I][np.lexsort(np.rot90(U[I]))]
Y.append(-math.log(permanent(A)**2, 2))
X = np.array(X)
Y = np.array(Y)
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=32, verbose=2)))
pipeline = Pipeline(estimators)
X_train, X_test, y_train, y_test = train_test_split(X, Y,
train_size=0.75, test_size=0.25)
pipeline.fit(X_train, y_train)
Update 2
I have given up on the correlation_coefficient
function and am now just using the correlation_coefficient_loss
one as given by JulioDanielReyes below. However, either this is still wrong or keras is dramatically overfitting. Even when I have:
def baseline_model():
model = Sequential()
model.add(Dense(40, input_dim=no_rows**2, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss=correlation_coefficient_loss, optimizer='adam', metrics=[correlation_coefficient_loss])
return model
I get a loss of, for example, 0.6653 after 100 epochs but 0.857 when I test the trained model.
How can it be overfitting which such a tiny number of nodes in the hidden layer?
Solution 1:[1]
According to keras documentation, you should pass the squared correlation coefficient as a function instead of the string 'mean_squared_error'
The function needs to receive 2 tensors (y_true, y_pred)
. You can look at keras source code for inspiration.
There is also a function tf.contrib.metrics.streaming_pearson_correlation
implemented on tensorflow. Just be careful on the order of the parameters, it should be something like this:
Update 1: initialize local variables according to this issue
import tensorflow as tf
def correlation_coefficient(y_true, y_pred):
pearson_r, update_op = tf.contrib.metrics.streaming_pearson_correlation(y_pred, y_true, name='pearson_r'
# find all variables created for this metric
metric_vars = [i for i in tf.local_variables() if 'pearson_r' in i.name.split('/')]
# Add metric variables to GLOBAL_VARIABLES collection.
# They will be initialized for new session.
for v in metric_vars:
tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v)
# force to update metric values
with tf.control_dependencies([update_op]):
pearson_r = tf.identity(pearson_r)
return 1-pearson_r**2
model.compile(loss=correlation_coefficient, optimizer='adam')
Update 2: even though you cannot use the scipy function directly, you can look at the implementation and port it to your code using keras backend.
Update 3: The tensorflow function as it is may not be differentiable, your loss function needs to be something like this: (Please check the math)
from keras import backend as K
def correlation_coefficient_loss(y_true, y_pred):
x = y_true
y = y_pred
mx = K.mean(x)
my = K.mean(y)
xm, ym = x-mx, y-my
r_num = K.sum(tf.multiply(xm,ym))
r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym))))
r = r_num / r_den
r = K.maximum(K.minimum(r, 1.0), -1.0)
return 1 - K.square(r)
Update 4: The results are different on both functions, but correlation_coefficient_loss
gives the same results as scipy.stats.pearsonr
Here is the code to test it:
import tensorflow as tf
from keras import backend as K
import numpy as np
import scipy.stats
inputa = np.array([[3,1,2,3,4,5],
inputb = np.array([[3,1,2,3,4,5],
with tf.Session() as sess:
a = tf.placeholder(tf.float32, shape=[None])
b = tf.placeholder(tf.float32, shape=[None])
f1 = correlation_coefficient(a, b)
f2 = correlation_coefficient_loss(a, b)
for i in range(inputa.shape[0]):
f1_result, f2_result = sess.run([f1, f2], feed_dict={a: inputa[i], b: inputb[i]})
scipy_result =1- scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
print("a: "+ str(inputa[i]) + " b: " + str(inputb[i]))
print("correlation_coefficient: " + str(f1_result))
print("correlation_coefficient_loss: " + str(f2_result))
print("scipy.stats.pearsonr:" + str(scipy_result))
a: [3 1 2 3 4 5] b: [3 1 2 3 4 5]
correlation_coefficient: -2.38419e-07
correlation_coefficient_loss: 0.0
a: [1 2 3 4 5 6] b: [3 1 2 3 4 5]
correlation_coefficient: 0.292036
correlation_coefficient_loss: 0.428571
a: [1 2 3 4 5 6] b: [6 5 4 3 2 1]
correlation_coefficient: 0.994918
correlation_coefficient_loss: 0.0
Solution 2:[2]
The following code is an implementation of correlation coefficient in tensorflow version 2.0
import tensorflow as tf
def correlation(x, y):
mx = tf.math.reduce_mean(x)
my = tf.math.reduce_mean(y)
xm, ym = x-mx, y-my
r_num = tf.math.reduce_mean(tf.multiply(xm,ym))
r_den = tf.math.reduce_std(xm) * tf.math.reduce_std(ym)
return r_num / r_den
It returns the same result as numpy's corrcoef
Solution 3:[3]
@Trifon's answer is correct if you have all your data available at the same time. The below code implements Pearson Correlation as a Keras metric which allows you to get the metric using batch inputs as is typically done during DNN training/eval:
class PearsonCorrelation(tf.keras.metrics.Metric):
def __init__(self, **kwargs):
self.cov = tf.metrics.Sum()
self.sq_yt = tf.metrics.Sum()
self.sq_yp = tf.metrics.Sum()
self.mean_yp = tf.metrics.Mean()
self.mean_yt = tf.metrics.Mean()
self.count = tf.metrics.Sum()
def update_state(self, y_true, y_pred, ):
''' Note y_pred are one-hot predictions, not probs/scores '''
self.cov(y_true * y_pred)
def result(self):
count = self.count.result()
mean_yp = self.mean_yp.result()
mean_yt = self.mean_yt.result()
numerator = (self.cov.result() - count * self.mean_yp.result() * self.mean_yt.result())
denominator = tf.sqrt(self.sq_yp.result() - count * mean_yp**2) * \
tf.sqrt(self.sq_yt.result() - count * mean_yt**2)
return numerator / denominator
def reset_states(self):
Solution 4:[4]
r = scipy.stats.pearsonr(inputa[i], inputb[i])[0]
is the correlation, so why did you take the square over r
scipy_result = 1 - scipy.stats.pearsonr(inputa[i], inputb[i])[0]**2
what is the relation between r
and scipy_result
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
Solution 1 | |
Solution 2 | |
Solution 3 | DankMasterDan |
Solution 4 | Kyrol |