'How can I use tensorflow to create a recommendation system that returns a ratings matrix?
I am trying to use Tensorflow to create a recommendation system. What I want to do is to read data from two csv files, one containing 'item_id' and the other contains the ratings such that the csv file contains 3 columns: 'user_id', 'item_id', 'rating'. I want to obtain a ratings matrix where the rows represents the user id, the columns represent the item id and the matrix elements represents the rating assigned between each user and item as floats.
I came across the attached example code that shows something similar but just prints 3 recommendations at the end. How can I change it so that it gives the full ratings matrix?
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
import numpy as np
import tensorflow as tf
from typing import Dict, Text
import pprint
# ratings data
rating = tfds.load('movielens/100k-ratings', split='train')
# features of all the movies
movies = tfds.load('movielens/100k-movies', split='train')
# limiting the features
rating = rating.map(lambda x:{'movie_title':x['movie_title'],'user_id':x['user_id']})
movies = movies.map(lambda x: x['movie_title'])
user_id_vocabulary = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
user_id_vocabulary.adapt(rating.map(lambda x: x['user_id']))
movies_title_vocabulary = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
movies_title_vocabulary.adapt(movies)
class MovieLensModel(tfrs.Model):
def __init__(
self,
user_model: tf.keras.Model,
movie_model: tf.keras.Model,
task: tfrs.tasks.Retrieval):
super().__init__()
# Set up user and movie representations.
self.user_model = user_model
self.movie_model = movie_model
# Set up a retrieval task.
self.task = task
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
# Define how the loss is computed.
user_embeddings = self.user_model(features["user_id"])
movie_embeddings = self.movie_model(features["movie_title"])
return self.task(user_embeddings, movie_embeddings)
users_model = tf.keras.Sequential([user_id_vocabulary,
tf.keras.layers.Embedding(user_id_vocabulary.vocab_size(),64)])
movie_model = tf.keras.Sequential([movies_title_vocabulary, tf.keras.layers.Embedding(movies_title_vocabulary.vocab_size(),64)])
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
movies.batch(128).map(movie_model)))
model = MovieLensModel(users_model,movie_model,task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
model.fit(rating.batch(4096), epochs=3)
recommends = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
recommends.index_from_dataset(movies.batch(100).map(lambda title: (title, model.movie_model(title))))
id_ = input('Enter the user_id: ')
_, titles = recommends(np.array([str(id_)]))
print('Top recommendation for user',id_,titles[0, :3])
Solution 1:[1]
I also play the same where very simple one me create in a short time but there is an idea.
[ Sample ]:
import os
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import math
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
os.environ['TF_GPU_THREAD_MODE']='gpu_private'
os.environ['TF_GPU_THREAD_COUNT']='1'
os.environ['TFDS_DATA_DIR'] = 'F:\\datasets'
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def predict_action ( list_movie_genres, list_movie_id, list_movie_title ) :
predict_DATA = tf.constant([list_movie_genres, list_movie_id, list_movie_title, list_movie_genres], shape=(1, 4, 1), dtype=tf.float32)
predictions = model.predict(predict_DATA)
result_1 = tf.where( tf.math.greater( np.asarray(predictions[0][:4], dtype=np.float32), np.zeros((1, 4))), [1], [0], name=None ).numpy()[0]
result_2 = tf.where( tf.math.greater( np.asarray(predictions[0][4:], dtype=np.float32), np.zeros((1, 4))), [1], [0], name=None ).numpy()[0]
return result_1, result_2
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
split0, split1 = tfds.even_splits('train', n=2)
ds = tfds.load('movielens/100k-ratings', split=split0)
ds_2 = tfds.load('movielens/100k-ratings', split=split1)
list_movie_genres = [ ]
list_movie_id = [ ]
list_movie_title = [ ]
list_user_gender = [ ]
list_user_rating = [ ]
for example in ds.take(200):
list_movie_genres.append(example['movie_genres'].numpy()[0])
list_movie_id.append(example['movie_id'].numpy()[0])
list_movie_title.append(example['movie_title'].numpy()[0])
if example['user_gender'].numpy() == True :
list_user_gender.append( 1 )
else :
list_user_gender.append( 0 )
list_user_rating.append( round(example['user_rating'].numpy() , 1 ) )
list_movie_genres = np.asarray( list_movie_genres, dtype=np.float32 )
list_movie_id = np.asarray( list_movie_id, dtype=np.float32 )
list_movie_title = np.asarray( list_movie_title, dtype=np.float32 )
list_movie_genres = np.asarray( list_movie_genres, dtype=np.float32 )
list_user_rating = np.asarray( list_user_rating, dtype=np.float32 )
list_val_movie_genres = [ ]
list_val_movie_id = [ ]
list_val_movie_title = [ ]
list_val_user_gender = [ ]
list_val_user_rating = [ ]
for example in ds_2.take(20):
list_val_movie_genres.append(example['movie_genres'].numpy()[0])
list_val_movie_id.append(example['movie_id'].numpy()[0])
list_val_movie_title.append(example['movie_title'].numpy()[0])
if example['user_gender'].numpy() == True :
list_val_user_gender.append( 1 )
else :
list_val_user_gender.append( 0 )
list_val_user_rating.append( round(example['user_rating'].numpy() , 1 ) )
list_val_movie_genres = np.asarray( list_val_movie_genres, dtype=np.float32 )
list_val_movie_id = np.asarray( list_val_movie_id, dtype=np.float32 )
list_val_movie_title = np.asarray( list_val_movie_title, dtype=np.float32 )
list_val_user_gender = np.asarray( list_val_user_gender, dtype=np.float32 )
list_val_user_rating = np.asarray( list_val_user_rating, dtype=np.float32 )
dataset = tf.data.Dataset.from_tensor_slices((tf.constant([list_movie_genres, list_movie_id, list_movie_title, list_movie_genres], shape=(1, 200, 4, 1), dtype=tf.float32),tf.constant([list_user_rating], shape=(1, 200, 1), dtype=tf.float32)))
valset = tf.data.Dataset.from_tensor_slices((tf.constant([list_val_movie_genres, list_val_movie_id, list_val_movie_title, list_val_user_gender], shape=(1, 20, 4, 1), dtype=tf.float32),tf.constant([list_val_user_rating], shape=(1, 20, 1), dtype=tf.float32)))
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
tf.keras.layers.InputLayer(input_shape=( 4, 1 )),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32, return_sequences=True, return_state=False )),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32 )),
tf.keras.layers.Dense( 256 ),
tf.keras.layers.Dropout(.2),
tf.keras.layers.Dense( 256 ),
])
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(8)) # 16.16 # [ 1, 1 ,1 ,1, 0, 0, 0, 0 ]
model.summary()
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
name='Nadam'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
lossfn = tf.keras.losses.BinaryCrossentropy(
from_logits=False,
label_smoothing=0.0,
axis=-1,
reduction=tf.keras.losses.Reduction.AUTO,
name='binary_crossentropy'
)
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=[tf.keras.metrics.BinaryCrossentropy( name='binary_crossentropy', dtype=tf.float32, from_logits=False, label_smoothing=0 )])
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=150 ,validation_data=(dataset)) # 15000
result_1, result_2 = predict_action( 7, 51, 79 )
print("")
print("")
print("")
print( "Rates: " + str( result_1[0] * pow( 2 + 0, result_1[0] ) - result_1[0] + result_1[1] * pow( 2 + 1, result_1[1] ) - result_1[1] + result_1[2] * pow( 2 + 2, result_1[2] ) - result_1[2] + result_1[3] * pow( 2 + 3, result_1[3] ) - result_1[3] - 1 ) + "." +
str( result_2[0] * pow( 2 + 0, result_2[0] ) - result_2[0] + result_2[1] * pow( 2 + 1, result_2[1] ) - result_2[1] + result_2[2] * pow( 2 + 2, result_2[2] ) - result_2[2] + result_2[3] * pow( 2 + 3, result_2[3] ) - result_2[3] - 1) )
input("Press Any Key!")
...
[ Output ]:
1/1 [==============================] - 0s 43ms/step - loss: 28.4547 - binary_crossentropy: 28.4547 - val_loss: 29.3992 - val_binary_crossentropy: 29.3992
Epoch 149/150
1/1 [==============================] - 0s 39ms/step - loss: 28.4590 - binary_crossentropy: 28.4590 - val_loss: 29.3691 - val_binary_crossentropy: 29.3691
Epoch 150/150
1/1 [==============================] - 0s 33ms/step - loss: 28.4475 - binary_crossentropy: 28.4475 - val_loss: 29.3345 - val_binary_crossentropy: 29.3345
Rates: 4.4
Press Any Key!
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Martijn Pieters |