Source code for sasrec.model

from collections import UserDict
from itertools import count
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
import os
import tensorflow as tf


[docs]class MultiHeadAttention(tf.keras.layers.Layer): """ - Q (query), K (key) and V (value) are split into multiple heads (num_heads) - each tuple (q, k, v) are fed to scaled_dot_product_attention - all attention outputs are concatenated Args: attention_dim (int): Dimension of the attention embeddings. num_heads (int): Number of heads in the multi-head self-attention module. dropout_rate (float): Dropout probability. """ def __init__(self, attention_dim, num_heads, dropout_rate): super(MultiHeadAttention, self).__init__() self.num_heads = num_heads self.attention_dim = attention_dim assert attention_dim % self.num_heads == 0 self.dropout_rate = dropout_rate self.depth = attention_dim // self.num_heads self.Q = tf.keras.layers.Dense(self.attention_dim, activation=None) self.K = tf.keras.layers.Dense(self.attention_dim, activation=None) self.V = tf.keras.layers.Dense(self.attention_dim, activation=None) self.dropout = tf.keras.layers.Dropout(self.dropout_rate)
[docs] def call(self, queries, keys): """Model forward pass. Args: queries (tf.Tensor): Tensor of queries. keys (tf.Tensor): Tensor of keys Returns: tf.Tensor: Output tensor """ # Linear projections Q = self.Q(queries) # (N, T_q, C) K = self.K(keys) # (N, T_k, C) V = self.V(keys) # (N, T_k, C) # --- MULTI HEAD --- # Split and concat, Q_, K_ and V_ are all (h*N, T_q, C/h) Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0) K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0) V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0) # --- SCALED DOT PRODUCT --- # Multiplication outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k) # Scale outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5) # Key Masking key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) # (N, T_k) key_masks = tf.tile(key_masks, [self.num_heads, 1]) # (h*N, T_k) key_masks = tf.tile( tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1] ) # (h*N, T_q, T_k) paddings = tf.ones_like(outputs) * (-(2 ** 32) + 1) # outputs, (h*N, T_q, T_k) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # Future blinding (Causality) diag_vals = tf.ones_like(outputs[0, :, :]) # (T_q, T_k) tril = tf.linalg.LinearOperatorLowerTriangular( diag_vals ).to_dense() # (T_q, T_k) masks = tf.tile( tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1] ) # (h*N, T_q, T_k) paddings = tf.ones_like(masks) * (-(2 ** 32) + 1) # outputs, (h*N, T_q, T_k) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) # Activation outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k) # Query Masking, query_masks (N, T_q) query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) query_masks = tf.tile(query_masks, [self.num_heads, 1]) # (h*N, T_q) query_masks = tf.tile( tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]] ) # (h*N, T_q, T_k) outputs *= query_masks # broadcasting. (N, T_q, C) # Dropouts outputs = self.dropout(outputs) # Weighted sum outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h) # --- MULTI HEAD --- # concat heads outputs = tf.concat( tf.split(outputs, self.num_heads, axis=0), axis=2 ) # (N, T_q, C) # Residual connection outputs += queries return outputs
[docs]class PointWiseFeedForward(tf.keras.layers.Layer): """ Convolution layers with residual connection Args: conv_dims (list): List of the dimensions of the Feedforward layer. dropout_rate (float): Dropout probability. """ def __init__(self, conv_dims, dropout_rate): super(PointWiseFeedForward, self).__init__() self.conv_dims = conv_dims self.dropout_rate = dropout_rate self.conv_layer1 = tf.keras.layers.Conv1D( filters=self.conv_dims[0], kernel_size=1, activation="relu", use_bias=True ) self.conv_layer2 = tf.keras.layers.Conv1D( filters=self.conv_dims[1], kernel_size=1, activation=None, use_bias=True ) self.dropout_layer = tf.keras.layers.Dropout(self.dropout_rate)
[docs] def call(self, x): """Model forward pass. Args: x (tf.Tensor): Input tensor. Returns: tf.Tensor: Output tensor """ output = self.conv_layer1(x) output = self.dropout_layer(output) output = self.conv_layer2(output) output = self.dropout_layer(output) # Residual connection output += x return output
[docs]class EncoderLayer(tf.keras.layers.Layer): """ Transformer based encoder layer Args: seq_max_len (int): Maximum sequence length. embedding_dim (int): Embedding dimension. attention_dim (int): Dimension of the attention embeddings. num_heads (int): Number of heads in the multi-head self-attention module. conv_dims (list): List of the dimensions of the Feedforward layer. dropout_rate (float): Dropout probability. """ def __init__( self, seq_max_len, embedding_dim, attention_dim, num_heads, conv_dims, dropout_rate, ): """Initialize parameters. """ super(EncoderLayer, self).__init__() self.seq_max_len = seq_max_len self.embedding_dim = embedding_dim self.mha = MultiHeadAttention(attention_dim, num_heads, dropout_rate) self.ffn = PointWiseFeedForward(conv_dims, dropout_rate) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = tf.keras.layers.Dropout(dropout_rate) self.dropout2 = tf.keras.layers.Dropout(dropout_rate) self.layer_normalization = LayerNormalization( self.seq_max_len, self.embedding_dim, 1e-08 )
[docs] def call_(self, x, training, mask): """Model forward pass. Args: x (tf.Tensor): Input tensor. training (tf.Tensor): Training tensor. mask (tf.Tensor): Mask tensor. Returns: tf.Tensor: Output tensor """ attn_output = self.mha(queries=self.layer_normalization(x), keys=x) attn_output = self.dropout1(attn_output, training=training) out1 = self.layernorm1(x + attn_output) # feed forward network ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model) ffn_output = self.dropout2(ffn_output, training=training) out2 = self.layernorm2( out1 + ffn_output ) # (batch_size, input_seq_len, d_model) # masking out2 *= mask return out2
[docs] def call(self, x, training, mask): """Model forward pass. Args: x (tf.Tensor): Input tensor. training (tf.Tensor): Training tensor. mask (tf.Tensor): Mask tensor. Returns: tf.Tensor: Output tensor """ x_norm = self.layer_normalization(x) attn_output = self.mha(queries=x_norm, keys=x) attn_output = self.ffn(attn_output) out = attn_output * mask return out
[docs]class Encoder(tf.keras.layers.Layer): """ Invokes Transformer based encoder with user defined number of layers Args: num_layers (int): Number of layers. seq_max_len (int): Maximum sequence length. embedding_dim (int): Embedding dimension. attention_dim (int): Dimension of the attention embeddings. num_heads (int): Number of heads in the multi-head self-attention module. conv_dims (list): List of the dimensions of the Feedforward layer. dropout_rate (float): Dropout probability. """ def __init__( self, num_layers, seq_max_len, embedding_dim, attention_dim, num_heads, conv_dims, dropout_rate, ): """Initialize parameters. """ super(Encoder, self).__init__() self.num_layers = num_layers self.enc_layers = [ EncoderLayer( seq_max_len, embedding_dim, attention_dim, num_heads, conv_dims, dropout_rate, ) for _ in range(num_layers) ] self.dropout = tf.keras.layers.Dropout(dropout_rate)
[docs] def call(self, x, training, mask): """Model forward pass. Args: x (tf.Tensor): Input tensor. training (tf.Tensor): Training tensor. mask (tf.Tensor): Mask tensor. Returns: tf.Tensor: Output tensor """ for i in range(self.num_layers): x = self.enc_layers[i](x, training, mask) return x # (batch_size, input_seq_len, d_model)
[docs]class LayerNormalization(tf.keras.layers.Layer): """ Layer normalization using mean and variance gamma and beta are the learnable parameters Args: seq_max_len (int): Maximum sequence length. embedding_dim (int): Embedding dimension. epsilon (float): Epsilon value. """ def __init__(self, seq_max_len, embedding_dim, epsilon): """Initialize parameters. Args: seq_max_len (int): Maximum sequence length. embedding_dim (int): Embedding dimension. epsilon (float): Epsilon value. """ super(LayerNormalization, self).__init__() self.seq_max_len = seq_max_len self.embedding_dim = embedding_dim self.epsilon = epsilon self.params_shape = (self.seq_max_len, self.embedding_dim) g_init = tf.ones_initializer() self.gamma = tf.Variable( initial_value=g_init(shape=self.params_shape, dtype="float32"), trainable=True, ) b_init = tf.zeros_initializer() self.beta = tf.Variable( initial_value=b_init(shape=self.params_shape, dtype="float32"), trainable=True, )
[docs] def call(self, x): """Model forward pass. Args: x (tf.Tensor): Input tensor. Returns: tf.Tensor: Output tensor """ mean, variance = tf.nn.moments(x, [-1], keepdims=True) normalized = (x - mean) / ((variance + self.epsilon) ** 0.5) output = self.gamma * normalized + self.beta return output
[docs]class SASREC(tf.keras.Model): """Self-Attentive Sequential Recommendation Using Transformer Keyword Args: item_num (int): Number of items in the dataset. seq_max_len (int): Maximum number of items in user history. num_blocks (int): Number of Transformer blocks to be used. embedding_dim (int): Item embedding dimension. attention_dim (int): Transformer attention dimension. attention_num_heads (int): Transformer attention head. conv_dims (list): List of the dimensions of the Feedforward layer. dropout_rate (float): Dropout rate. l2_reg (float): Coefficient of the L2 regularization. Attributes: epoch (int): Epoch of trained model. best_score (float): Best validation HR@10 score while training. val_users (list): User list for validation. history (pd.DataFrame): Train history containing epoch, NDCG@10, and HR@10. """ def __init__(self, **kwargs): super(SASREC, self).__init__() self.epoch = 0 self.best_score=0 self.val_users = [] self.history = pd.DataFrame(columns=['epoch','NDCG@10','HR@10']) self.item_num = kwargs.get("item_num", None) self.seq_max_len = kwargs.get("seq_max_len", 100) self.num_blocks = kwargs.get("num_blocks", 2) self.embedding_dim = kwargs.get("embedding_dim", 100) self.attention_dim = kwargs.get("attention_dim", 100) self.attention_num_heads = kwargs.get("attention_num_heads", 1) self.conv_dims = kwargs.get("conv_dims", [100, 100]) self.dropout_rate = kwargs.get("dropout_rate", 0.5) self.l2_reg = kwargs.get("l2_reg", 0.0) self.item_embedding_layer = tf.keras.layers.Embedding( self.item_num + 1, self.embedding_dim, name="item_embeddings", mask_zero=True, embeddings_regularizer=tf.keras.regularizers.L2(self.l2_reg), ) self.positional_embedding_layer = tf.keras.layers.Embedding( self.seq_max_len, self.embedding_dim, name="positional_embeddings", mask_zero=False, embeddings_regularizer=tf.keras.regularizers.L2(self.l2_reg), ) self.dropout_layer = tf.keras.layers.Dropout(self.dropout_rate) self.encoder = Encoder( self.num_blocks, self.seq_max_len, self.embedding_dim, self.attention_dim, self.attention_num_heads, self.conv_dims, self.dropout_rate, ) self.mask_layer = tf.keras.layers.Masking(mask_value=0) self.layer_normalization = LayerNormalization( self.seq_max_len, self.embedding_dim, 1e-08 )
[docs] def embedding(self, input_seq): """Compute the sequence and positional embeddings. Args: input_seq (tf.Tensor): Input sequence Returns: tf.Tensor: Sequence embeddings tf.Tensor: Positional embeddings """ seq_embeddings = self.item_embedding_layer(input_seq) seq_embeddings = seq_embeddings * (self.embedding_dim ** 0.5) # FIXME positional_seq = tf.expand_dims(tf.range(tf.shape(input_seq)[1]), 0) positional_seq = tf.tile(positional_seq, [tf.shape(input_seq)[0], 1]) positional_embeddings = self.positional_embedding_layer(positional_seq) return seq_embeddings, positional_embeddings
[docs] def call(self, x, training): """Model forward pass. Args: x (tf.Tensor): Input tensor. training (tf.Tensor): Training tensor. Returns: tf.Tensor: Logits of the positive examples tf.Tensor: Logits of the negative examples tf.Tensor: Mask for nonzero targets """ input_seq = x["input_seq"] pos = x["positive"] neg = x["negative"] mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1) seq_embeddings, positional_embeddings = self.embedding(input_seq) # add positional embeddings seq_embeddings += positional_embeddings # dropout seq_embeddings = self.dropout_layer(seq_embeddings) # masking seq_embeddings *= mask # --- ATTENTION BLOCKS --- seq_attention = seq_embeddings seq_attention = self.encoder(seq_attention, training, mask) seq_attention = self.layer_normalization(seq_attention) # (b, s, d) # --- PREDICTION LAYER --- # user's sequence embedding pos = self.mask_layer(pos) neg = self.mask_layer(neg) pos = tf.reshape(pos, [tf.shape(input_seq)[0] * self.seq_max_len]) neg = tf.reshape(neg, [tf.shape(input_seq)[0] * self.seq_max_len]) pos_emb = self.item_embedding_layer(pos) neg_emb = self.item_embedding_layer(neg) seq_emb = tf.reshape( seq_attention, [tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim], ) # (b*s, d) pos_logits = tf.reduce_sum(pos_emb * seq_emb, -1) neg_logits = tf.reduce_sum(neg_emb * seq_emb, -1) pos_logits = tf.expand_dims(pos_logits, axis=-1) # (bs, 1) # pos_prob = tf.keras.layers.Dense(1, activation='sigmoid')(pos_logits) # (bs, 1) neg_logits = tf.expand_dims(neg_logits, axis=-1) # (bs, 1) # neg_prob = tf.keras.layers.Dense(1, activation='sigmoid')(neg_logits) # (bs, 1) # output = tf.concat([pos_logits, neg_logits], axis=0) # masking for loss calculation istarget = tf.reshape( tf.cast(tf.not_equal(pos, 0), dtype=tf.float32), [tf.shape(input_seq)[0] * self.seq_max_len], ) return pos_logits, neg_logits, istarget
[docs] def predict(self, inputs,neg_cand_n): """Returns the logits for the test items. Args: inputs (tf.Tensor): Input tensor. neg_cand_n: num of negative candidates Returns: tf.Tensor:Output tensor """ training = False input_seq = inputs["input_seq"] candidate = inputs["candidate"] mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1) seq_embeddings, positional_embeddings = self.embedding(input_seq) seq_embeddings += positional_embeddings # seq_embeddings = self.dropout_layer(seq_embeddings) seq_embeddings *= mask seq_attention = seq_embeddings seq_attention = self.encoder(seq_attention, training, mask) seq_attention = self.layer_normalization(seq_attention) # (b, s, d) seq_emb = tf.reshape( seq_attention, [tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim], ) # (b*s, d) # print(candidate) candidate_emb = self.item_embedding_layer(candidate) # (b, s, d) candidate_emb = tf.transpose(candidate_emb, perm=[0, 2, 1]) # (b, d, s) test_logits = tf.matmul(seq_emb, candidate_emb) # (200, 100) * (1, 101, 100)' test_logits = tf.reshape( test_logits, [tf.shape(input_seq)[0], self.seq_max_len, 1+neg_cand_n], ) # (1, 50, 1+neg_can) test_logits = test_logits[:, -1, :] # (1, 101) return test_logits
[docs] def loss_function(self, pos_logits, neg_logits, istarget): """Losses are calculated separately for the positive and negative items based on the corresponding logits. A mask is included to take care of the zero items (added for padding). Args: pos_logits (tf.Tensor): Logits of the positive examples. neg_logits (tf.Tensor): Logits of the negative examples. istarget (tf.Tensor): Mask for nonzero targets. Returns: float: loss """ pos_logits = pos_logits[:, 0] neg_logits = neg_logits[:, 0] # ignore padding items (0) # istarget = tf.reshape( # tf.cast(tf.not_equal(self.pos, 0), dtype=tf.float32), # [tf.shape(self.input_seq)[0] * self.seq_max_len], # ) # for logits loss = tf.reduce_sum( -tf.math.log(tf.math.sigmoid(pos_logits) + 1e-24) * istarget - tf.math.log(1 - tf.math.sigmoid(neg_logits) + 1e-24) * istarget ) / tf.reduce_sum(istarget) # for probabilities # loss = tf.reduce_sum( # - tf.math.log(pos_logits + 1e-24) * istarget - # tf.math.log(1 - neg_logits + 1e-24) * istarget # ) / tf.reduce_sum(istarget) reg_loss = tf.compat.v1.losses.get_regularization_loss() # reg_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) # loss += sum(reg_losses) loss += reg_loss return loss
[docs] def create_combined_dataset(self, u, seq, pos, neg): """ function to create model inputs from sampled batch data. This function is used during training. """ inputs = {} seq = tf.keras.preprocessing.sequence.pad_sequences( seq, padding="pre", truncating="pre", maxlen=self.seq_max_len ) pos = tf.keras.preprocessing.sequence.pad_sequences( pos, padding="pre", truncating="pre", maxlen=self.seq_max_len ) neg = tf.keras.preprocessing.sequence.pad_sequences( neg, padding="pre", truncating="pre", maxlen=self.seq_max_len ) inputs["users"] = np.expand_dims(np.array(u), axis=-1) inputs["input_seq"] = seq inputs["positive"] = pos inputs["negative"] = neg target = np.concatenate( [ np.repeat(1, seq.shape[0] * seq.shape[1]), np.repeat(0, seq.shape[0] * seq.shape[1]), ], axis=0, ) target = np.expand_dims(target, axis=-1) return inputs, target
[docs] def train(self, dataset, sampler, num_epochs=10,batch_size=128,lr=0.001,\ val_epoch=5,val_target_user_n=1000,target_item_n=-1,auto_save=False,\ path='./',exp_name='SASRec_exp'): """High level function for model training as well as evaluation on the validation and test dataset Args: dataset (:obj:`util.SASRecDataSet`): SASRecDataSet containing users-item interaction history. sampler (:obj:`util.WarpSampler`): WarpSampler. num_epochs (int, optional): Epoch. Defaults to 10. batch_size (int, optional): Batch size. Defaults to 128. lr (float, optional): Learning rate. Defaults to 0.001. val_epoch (int, optional): Validation term. Defaults to 5. val_target_user_n (int, optional): Number of randomly sampled users to conduct validation. Defaults to 1000. target_item_n (int, optional): Size of candidate. Defaults to -1, which means all. auto_save (bool, optional): If true, save model with best validation score. Defaults to False. path (str, optional): Path to save model. exp_name (str, optional): Experiment name. """ num_steps = int(len(dataset.user_train) / batch_size) optimizer = tf.keras.optimizers.Adam( learning_rate=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-7 ) loss_function = self.loss_function train_loss = tf.keras.metrics.Mean(name="train_loss") train_step_signature = [ { "users": tf.TensorSpec(shape=(None, 1), dtype=tf.int64), "input_seq": tf.TensorSpec( shape=(None, self.seq_max_len), dtype=tf.int64 ), "positive": tf.TensorSpec( shape=(None, self.seq_max_len), dtype=tf.int64 ), "negative": tf.TensorSpec( shape=(None, self.seq_max_len), dtype=tf.int64 ), }, tf.TensorSpec(shape=(None, 1), dtype=tf.int64), ] @tf.function(input_signature=train_step_signature) def train_step(inp, tar): with tf.GradientTape() as tape: pos_logits, neg_logits, loss_mask = self(inp, training=True) loss = loss_function(pos_logits, neg_logits, loss_mask) gradients = tape.gradient(loss, self.trainable_variables) optimizer.apply_gradients(zip(gradients, self.trainable_variables)) train_loss(loss) return loss for epoch in range(1, num_epochs + 1): print(f'epoch {epoch} / {num_epochs} -----------------------------') self.epoch = epoch step_loss = [] train_loss.reset_states() for step in tqdm( range(num_steps), total=num_steps, ncols=70, leave=False, unit="b", # disable= ~progress_bar ): u, seq, pos, neg = sampler.next_batch() inputs, target = self.create_combined_dataset(u, seq, pos, neg) loss = train_step(inputs, target) step_loss.append(loss) if epoch % val_epoch == 0: print("Evaluating...") t_test = self.evaluate(dataset,target_user_n=val_target_user_n,target_item_n=target_item_n,is_val=True) print( f"epoch: {epoch}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})" ) self.history.loc[len(self.history)] = [epoch,t_test[0],t_test[1]] if t_test[1] > self.best_score: self.best_score = t_test[1] if auto_save: self.save(path,exp_name) print('best score model updated and saved') if auto_save: self.history.to_csv(f'{path}/{exp_name}/{exp_name}_train_log.csv',index=False) return
[docs] def evaluate(self, dataset,target_user_n=1000,target_item_n=-1,rank_threshold=10,is_val=False): """Evaluate model on validation set or test set Args: dataset (:obj:`SASRecDataSet`): SASRecDataSet containing users-item interaction history. target_user_n (int, optional): Number of randomly sampled users to evaluate. Defaults to 1000. target_item_n (int, optional): Size of candidate. Defaults to -1, which means all. rank_threshold (int, optional): k value in NDCG@k and HR@k. Defaults to 10. is_val (bool, optional): If true, evaluate on validation set. If False, evaluate on test set. Defaults to False. Returns: float: NDCG@k float: HR@k """ usernum = dataset.usernum itemnum = dataset.itemnum all = dataset.User train = dataset.user_train # removing deepcopy valid = dataset.user_valid test = dataset.user_test NDCG = 0.0 HT = 0.0 valid_user = 0.0 if len(self.val_users) == 0: self.sample_val_users(dataset,target_user_n) for u in tqdm(self.val_users, ncols=70, leave=False, unit="b"): if len(train[u]) < 1 or len(test[u]) < 1: continue seq = np.zeros([self.seq_max_len], dtype=np.int32) idx = self.seq_max_len - 1 if is_val: item_idx = [valid[u][0]] else: seq[idx] = valid[u][0] idx -= 1 item_idx = [test[u][0]] for i in reversed(train[u]): seq[idx] = i idx -= 1 if idx == -1: break rated = set(all[u]) if (target_item_n == -1): item_idx=item_idx+list(set(range(1,itemnum+1)).difference(rated)) elif type(target_item_n)==int: for _ in range(target_item_n): t = np.random.randint(1, itemnum + 1) while t in rated: t = np.random.randint(1, itemnum + 1) item_idx.append(t) elif type(target_item_n)==float: for _ in range(round(itemnum*target_item_n)): t = np.random.randint(1, itemnum + 1) while t in rated: t = np.random.randint(1, itemnum + 1) item_idx.append(t) else: raise inputs = {} inputs["user"] = np.expand_dims(np.array([u]), axis=-1) inputs["input_seq"] = np.array([seq]) inputs["candidate"] = np.array([item_idx]) # print(inputs) # inverse to get descending sort predictions = -1.0 * self.predict(inputs, len(item_idx)-1) predictions = np.array(predictions) predictions = predictions[0] # print('predictions:', predictions) rank = predictions.argsort().argsort()[0] # print('rank:', rank) valid_user += 1 if rank < rank_threshold: NDCG += 1 / np.log2(rank + 2) HT += 1 return NDCG / valid_user, HT / valid_user
[docs] def recommend_item(self, dataset, user_map_dict,user_id_list, target_item_n=-1,top_n=10,exclude_purchased=True,is_test=False): """Recommend items to user Args: dataset (:obj:`util.SASRecDataSet`): SASRecDataSet containing users-item interaction history. user_map_dict (dict): Dict { user_id : encoded_user_label , ... } user_id_list (list): User list to predict. target_item_n (int, optional): Size of candidate. Defaults to -1, which means all. top_n (int, optional): Number of items to recommend. Defaults to 10. exclude_purchased (bool, optional): If true, exclude already purchased item from candidate. Defaults to True. is_test (bool, optional): If true, exclude the last item from each user's sequence. Defaults to False. Returns: pd.DataFrame: recommended items for users """ all = dataset.User itemnum = dataset.itemnum users = [user_map_dict[u] for u in user_id_list] inv_user_map = {v: k for k, v in user_map_dict.items()} return_dict={} for u in tqdm(users): seq = np.zeros([self.seq_max_len], dtype=np.int32) idx = self.seq_max_len - 1 list_to_seq = all[u] if not is_test else all[u][:-1] for i in reversed(list_to_seq): seq[idx] = i idx -= 1 if idx == -1: break if exclude_purchased: rated = set(all[u]) else: rated = set() # make empty candidate list item_idx=[] if (target_item_n == -1): item_idx=item_idx+list(set(range(1,itemnum+1)).difference(rated)) elif type(target_item_n)==int: for _ in range(target_item_n): t = np.random.randint(1, itemnum + 1) while t in rated: t = np.random.randint(1, itemnum + 1) item_idx.append(t) elif type(target_item_n)==float: for _ in range(round(itemnum*target_item_n)): t = np.random.randint(1, itemnum + 1) while t in rated: t = np.random.randint(1, itemnum + 1) item_idx.append(t) else: raise inputs = {} inputs["user"] = np.expand_dims(np.array([u]), axis=-1) inputs["input_seq"] = np.array([seq]) inputs["candidate"] = np.array([item_idx]) predictions = self.predict(inputs, len(item_idx)-1) predictions = np.array(predictions) predictions = predictions[0] pred_dict = {v : predictions[i] for i,v in enumerate(item_idx)} pred_dict = sorted(pred_dict.items(), key = lambda item: item[1], reverse = True) top_list = pred_dict[:top_n] return_dict[inv_user_map[u]] = top_list return return_dict
[docs] def old_get_user_item_score(self, dataset, user_map_dict,item_map_dict,user_id_list, item_list,is_test=False): """ Deprecated """ all = dataset.User users = [user_map_dict[u] for u in user_id_list] items = [item_map_dict[i] for i in item_list] # inv_user_map = {v: k for k, v in user_map_dict.items()} # inv_item_map = {v: k for k, v in item_map_dict.items()} score_dict = {i:[] for i in item_list} for u in tqdm(users,unit=' User',desc='Getting Scores for each user ...'): seq = np.zeros([self.seq_max_len], dtype=np.int32) idx = self.seq_max_len - 1 list_to_seq = all[u] if not is_test else all[u][:-1] for i in reversed(list_to_seq): seq[idx] = i idx -= 1 if idx == -1: break inputs = {} inputs["user"] = np.expand_dims(np.array([u]), axis=-1) inputs["input_seq"] = np.array([seq]) inputs["candidate"] = np.array([items]) predictions = self.predict(inputs, len(items)-1) predictions = np.array(predictions) predictions = predictions[0] # pred_dict = {inv_item_map[v] : predictions[i] for i,v in enumerate(items)} for i,v in enumerate(item_list): score_dict[v].append(predictions[i]) return_df = pd.DataFrame({ 'user_id':user_id_list, }) for k in score_dict: return_df[k] = score_dict[k] return_df = return_df.sort_values(by='user_id').reset_index(drop=True) return return_df
[docs] def get_user_item_score(self,dataset,user_id_list, item_list,user_map_dict,item_map_dict,batch_size=128): """Get item score for each user on batch Args: dataset (:obj:`SASRecDataSet`): SASRecDataSet containing users-item interaction history. user_id_list (list): User list to predict. item_list (list): Item list to predict user_map_dict (dict): Dict { user_id : encoded_user_label , ... } item_map_dict (dict): Dict { item : encoded_item_label , ... } batch_size (int, optional): Batch size. Defaults to 128. Raises: Exception: Batch_size must be smaller than user id list size. Returns: pd.DataFrame: user-item score """ if batch_size >= len(user_id_list): raise Exception('batch_size must be smaller than user_id_list size') user_history = dataset.User inv_user_map = {v: k for k, v in user_map_dict.items()} cand = [item_map_dict[i] for i in item_list] if len(user_id_list)%batch_size ==0: num_steps = int(len(user_id_list)/batch_size) else: num_steps = int(len(user_id_list)/batch_size) + 1 # num_steps = len(user_id_list)/batch_size # num_steps = num_steps if num_steps%1 == 0 else int(num_steps)+1 print(num_steps) score_dict = dict() start_index=0 for step in tqdm( range(num_steps), leave=True, unit="batch", ): # get user batch u = user_id_list[start_index:start_index+batch_size] if step != num_steps-1 \ else user_id_list[start_index:] start_index+=batch_size u = [user_map_dict[user] for user in u] # get sequence batch seq = [user_history[user] for user in u] inputs = self.create_combined_dataset_pred(tuple(u),tuple(seq),cand) # print(inputs) predictions = self.batch_predict(inputs, len(cand)-1) predictions = np.array(predictions) for i in range(len(u)): score_dict[inv_user_map[u[i]]]=predictions[i] return_df = pd.DataFrame(list(score_dict.items()),columns = ['user_id','score_array']).set_index('user_id',drop=True) # print(return_df) return_df[item_list] = pd.DataFrame(return_df['score_array'].tolist(), index= return_df.index) return_df = return_df.drop('score_array',axis=1).reset_index().sort_values(by='user_id').reset_index(drop=True) return return_df
[docs] def create_combined_dataset_pred(self,u,seq,cand): """ function to create model inputs from sampled batch data. This function is used during predicting on batch. """ inputs = {} seq = tf.keras.preprocessing.sequence.pad_sequences( seq, padding="pre", truncating="pre", maxlen=self.seq_max_len ) inputs["users"] = np.expand_dims(np.array(u), axis=-1) inputs["input_seq"] = seq inputs['candidate'] = np.array([cand]) return inputs
[docs] def batch_predict(self, inputs,cand_n): """Returns the logits for the item candidates. Args: inputs (tf.Tensor): Input tensor. cand_n (int): Num of candidates. Returns: tf.Tensor: Output tensor """ training = False input_seq = inputs["input_seq"] candidate = inputs["candidate"] mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1) seq_embeddings, positional_embeddings = self.embedding(input_seq) seq_embeddings += positional_embeddings # seq_embeddings = self.dropout_layer(seq_embeddings) seq_embeddings *= mask seq_attention = seq_embeddings seq_attention = self.encoder(seq_attention, training, mask) seq_attention = self.layer_normalization(seq_attention) # (b, s, d) seq_emb = tf.reshape( seq_attention, [tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim], ) # (b*s, d) # print(candidate) candidate_emb = self.item_embedding_layer(candidate) # (b, s, d) candidate_emb = tf.transpose(candidate_emb, perm=[0, 2, 1]) # (b, d, s) test_logits = tf.matmul(seq_emb, candidate_emb) # (200, 100) * (1, 101, 100)' test_logits = tf.reshape( test_logits, [tf.shape(input_seq)[0], self.seq_max_len, 1+cand_n], ) # (1, 50, 1+can) test_logits = test_logits[:, -1, :] # (1, 101) return test_logits
[docs] def save(self,path, exp_name='sas_experiment'): """Save trained SASRec Model Args: path (str): Path to save model. exp_name (str): Experiment name. Examples: >>> model.save(path, exp_name) """ # make dir if not os.path.exists(f'{path}/{exp_name}'): os.mkdir(f'{path}/{exp_name}') self.save_weights(f'{path}/{exp_name}/{exp_name}_weights') # save trained weights arg_list = ['item_num','seq_max_len','num_blocks','embedding_dim','attention_dim','attention_num_heads','dropout_rate','conv_dims','l2_reg','history'] dict_to_save = {a: self.__dict__[a] for a in arg_list} with open(f'{path}/{exp_name}/{exp_name}_model_args','wb') as f: pickle.dump(dict_to_save, f) if not os.path.isfile(f'{path}/{exp_name}/{exp_name}_save_log.txt'): with open(f'{path}/{exp_name}/{exp_name}_save_log.txt','w') as f: f.writelines(f'Model args: {dict_to_save}\n') f.writelines(f'[epoch {self.epoch}] Best HR@10 score: {self.best_score}\n') else: with open(f'{path}/{exp_name}/{exp_name}_save_log.txt','a') as f: f.writelines(f'[epoch {self.epoch}] Best HR@10 score: {self.best_score}\n') return
[docs] def sample_val_users(self,dataset,target_user_n): """Sample users for validation Args: dataset (:obj:`SASRecDataSet`): SASRec dataset used for training target_user_n (int): Number of users to sample """ usernum = dataset.usernum if usernum > target_user_n: self.val_users = random.sample(range(1, usernum + 1), target_user_n) else: self.val_users = range(1, usernum + 1)