'How can I use tensorflow to create a recommendation system that returns a ratings matrix?

I am trying to use Tensorflow to create a recommendation system. What I want to do is to read data from two csv files, one containing 'item_id' and the other contains the ratings such that the csv file contains 3 columns: 'user_id', 'item_id', 'rating'. I want to obtain a ratings matrix where the rows represents the user id, the columns represent the item id and the matrix elements represents the rating assigned between each user and item as floats.

I came across the attached example code that shows something similar but just prints 3 recommendations at the end. How can I change it so that it gives the full ratings matrix?

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
 
import numpy as np
import tensorflow as tf
 
from typing import Dict, Text
import pprint

# ratings data
rating = tfds.load('movielens/100k-ratings', split='train')
# features of all the movies
movies = tfds.load('movielens/100k-movies', split='train')
 
# limiting the features
rating = rating.map(lambda x:{'movie_title':x['movie_title'],'user_id':x['user_id']})
movies = movies.map(lambda x: x['movie_title'])

user_id_vocabulary = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
user_id_vocabulary.adapt(rating.map(lambda x: x['user_id']))
 
movies_title_vocabulary = tf.keras.layers.experimental.preprocessing.StringLookup(mask_token=None)
movies_title_vocabulary.adapt(movies)

class MovieLensModel(tfrs.Model):
 
  def __init__(
      self,
      user_model: tf.keras.Model,
      movie_model: tf.keras.Model,
      task: tfrs.tasks.Retrieval):
    super().__init__()
 
    # Set up user and movie representations.
    self.user_model = user_model
    self.movie_model = movie_model
 
    # Set up a retrieval task.
    self.task = task
 
  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.
 
    user_embeddings = self.user_model(features["user_id"])
    movie_embeddings = self.movie_model(features["movie_title"])
 
    return self.task(user_embeddings, movie_embeddings)

users_model = tf.keras.Sequential([user_id_vocabulary,
                                   tf.keras.layers.Embedding(user_id_vocabulary.vocab_size(),64)])
movie_model = tf.keras.Sequential([movies_title_vocabulary,                                   tf.keras.layers.Embedding(movies_title_vocabulary.vocab_size(),64)])
 
task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
    movies.batch(128).map(movie_model)))

model = MovieLensModel(users_model,movie_model,task)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))
model.fit(rating.batch(4096), epochs=3)

recommends = tfrs.layers.factorized_top_k.BruteForce(model.user_model)
recommends.index_from_dataset(movies.batch(100).map(lambda title: (title, model.movie_model(title))))
 
id_ = input('Enter the user_id: ')
_, titles = recommends(np.array([str(id_)]))
print('Top recommendation for user',id_,titles[0, :3])


Solution 1:[1]

I also play the same where very simple one me create in a short time but there is an idea.

[ Sample ]:

import os
import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np
import math

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
os.environ['TF_GPU_THREAD_MODE']='gpu_private'
os.environ['TF_GPU_THREAD_COUNT']='1'
os.environ['TFDS_DATA_DIR'] = 'F:\\datasets'

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def predict_action ( list_movie_genres, list_movie_id, list_movie_title ) :
    predict_DATA = tf.constant([list_movie_genres, list_movie_id, list_movie_title, list_movie_genres], shape=(1, 4, 1), dtype=tf.float32)
    predictions = model.predict(predict_DATA)

    result_1 = tf.where( tf.math.greater( np.asarray(predictions[0][:4], dtype=np.float32), np.zeros((1, 4))), [1], [0], name=None ).numpy()[0]
    result_2 = tf.where( tf.math.greater( np.asarray(predictions[0][4:], dtype=np.float32), np.zeros((1, 4))), [1], [0], name=None ).numpy()[0]

    return result_1, result_2

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
split0, split1 = tfds.even_splits('train', n=2)
ds = tfds.load('movielens/100k-ratings', split=split0)
ds_2 = tfds.load('movielens/100k-ratings', split=split1)

list_movie_genres = [ ]
list_movie_id = [ ]
list_movie_title = [ ]
list_user_gender = [ ]
list_user_rating = [ ]
for example in ds.take(200):
    list_movie_genres.append(example['movie_genres'].numpy()[0])
    list_movie_id.append(example['movie_id'].numpy()[0])
    list_movie_title.append(example['movie_title'].numpy()[0])
    
    if example['user_gender'].numpy() == True :
        list_user_gender.append( 1 )
    else :
        list_user_gender.append( 0 )
    
    list_user_rating.append( round(example['user_rating'].numpy() , 1 ) )

list_movie_genres = np.asarray( list_movie_genres, dtype=np.float32 )
list_movie_id = np.asarray( list_movie_id, dtype=np.float32 )
list_movie_title = np.asarray( list_movie_title, dtype=np.float32 )
list_movie_genres = np.asarray( list_movie_genres, dtype=np.float32 )
list_user_rating = np.asarray( list_user_rating, dtype=np.float32 )

list_val_movie_genres = [ ]
list_val_movie_id = [ ]
list_val_movie_title = [ ]
list_val_user_gender = [ ]
list_val_user_rating = [ ]
for example in ds_2.take(20):
    list_val_movie_genres.append(example['movie_genres'].numpy()[0])
    list_val_movie_id.append(example['movie_id'].numpy()[0])
    list_val_movie_title.append(example['movie_title'].numpy()[0])
    if example['user_gender'].numpy() == True :
        list_val_user_gender.append( 1 )
    else :
        list_val_user_gender.append( 0 )
    list_val_user_rating.append( round(example['user_rating'].numpy() , 1 ) )
    
list_val_movie_genres = np.asarray( list_val_movie_genres, dtype=np.float32 )
list_val_movie_id = np.asarray( list_val_movie_id, dtype=np.float32 )
list_val_movie_title = np.asarray( list_val_movie_title, dtype=np.float32 )
list_val_user_gender = np.asarray( list_val_user_gender, dtype=np.float32 )
list_val_user_rating = np.asarray( list_val_user_rating, dtype=np.float32 )
    
dataset = tf.data.Dataset.from_tensor_slices((tf.constant([list_movie_genres, list_movie_id, list_movie_title, list_movie_genres], shape=(1, 200, 4, 1), dtype=tf.float32),tf.constant([list_user_rating], shape=(1, 200, 1), dtype=tf.float32)))
valset = tf.data.Dataset.from_tensor_slices((tf.constant([list_val_movie_genres, list_val_movie_id, list_val_movie_title, list_val_user_gender], shape=(1, 20, 4, 1), dtype=tf.float32),tf.constant([list_val_user_rating], shape=(1, 20, 1), dtype=tf.float32)))

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 4, 1 )),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32, return_sequences=True, return_state=False )),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 32 )),
    tf.keras.layers.Dense( 256 ),
    tf.keras.layers.Dropout(.2),
    tf.keras.layers.Dense( 256 ),
])
        
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(8))                             # 16.16         # [ 1, 1 ,1 ,1, 0, 0, 0, 0  ]
model.summary()

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam(
    learning_rate=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-07,
    name='Nadam'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""                               
lossfn = tf.keras.losses.BinaryCrossentropy(
    from_logits=False,
    label_smoothing=0.0,
    axis=-1,
    reduction=tf.keras.losses.Reduction.AUTO,
    name='binary_crossentropy'
)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=[tf.keras.metrics.BinaryCrossentropy( name='binary_crossentropy', dtype=tf.float32, from_logits=False, label_smoothing=0 )])

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=150 ,validation_data=(dataset)) # 15000

result_1, result_2 = predict_action( 7, 51, 79 )
print("")
print("")
print("")
print( "Rates: " + str( result_1[0] * pow( 2 + 0, result_1[0] ) - result_1[0] + result_1[1] * pow( 2 + 1, result_1[1] ) - result_1[1] + result_1[2] * pow( 2 + 2, result_1[2] ) - result_1[2] + result_1[3] * pow( 2 + 3, result_1[3] )  - result_1[3] - 1 ) + "." + 
str( result_2[0] * pow( 2 + 0, result_2[0] ) - result_2[0] + result_2[1] * pow( 2 + 1, result_2[1] ) - result_2[1] + result_2[2] * pow( 2 + 2, result_2[2] ) - result_2[2] + result_2[3] * pow( 2 + 3, result_2[3] ) - result_2[3] - 1) ) 
    
input("Press Any Key!")

...

[ Output ]:

1/1 [==============================] - 0s 43ms/step - loss: 28.4547 - binary_crossentropy: 28.4547 - val_loss: 29.3992 - val_binary_crossentropy: 29.3992
Epoch 149/150
1/1 [==============================] - 0s 39ms/step - loss: 28.4590 - binary_crossentropy: 28.4590 - val_loss: 29.3691 - val_binary_crossentropy: 29.3691
Epoch 150/150
1/1 [==============================] - 0s 33ms/step - loss: 28.4475 - binary_crossentropy: 28.4475 - val_loss: 29.3345 - val_binary_crossentropy: 29.3345



Rates: 4.4
Press Any Key!

Sample

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Martijn Pieters