from collections import UserDict
from itertools import count
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
import pickle
import os
import tensorflow as tf
[docs]class MultiHeadAttention(tf.keras.layers.Layer):
"""
- Q (query), K (key) and V (value) are split into multiple heads (num_heads)
- each tuple (q, k, v) are fed to scaled_dot_product_attention
- all attention outputs are concatenated
Args:
attention_dim (int): Dimension of the attention embeddings.
num_heads (int): Number of heads in the multi-head self-attention module.
dropout_rate (float): Dropout probability.
"""
def __init__(self, attention_dim, num_heads, dropout_rate):
super(MultiHeadAttention, self).__init__()
self.num_heads = num_heads
self.attention_dim = attention_dim
assert attention_dim % self.num_heads == 0
self.dropout_rate = dropout_rate
self.depth = attention_dim // self.num_heads
self.Q = tf.keras.layers.Dense(self.attention_dim, activation=None)
self.K = tf.keras.layers.Dense(self.attention_dim, activation=None)
self.V = tf.keras.layers.Dense(self.attention_dim, activation=None)
self.dropout = tf.keras.layers.Dropout(self.dropout_rate)
[docs] def call(self, queries, keys):
"""Model forward pass.
Args:
queries (tf.Tensor): Tensor of queries.
keys (tf.Tensor): Tensor of keys
Returns:
tf.Tensor: Output tensor
"""
# Linear projections
Q = self.Q(queries) # (N, T_q, C)
K = self.K(keys) # (N, T_k, C)
V = self.V(keys) # (N, T_k, C)
# --- MULTI HEAD ---
# Split and concat, Q_, K_ and V_ are all (h*N, T_q, C/h)
Q_ = tf.concat(tf.split(Q, self.num_heads, axis=2), axis=0)
K_ = tf.concat(tf.split(K, self.num_heads, axis=2), axis=0)
V_ = tf.concat(tf.split(V, self.num_heads, axis=2), axis=0)
# --- SCALED DOT PRODUCT ---
# Multiplication
outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) # (h*N, T_q, T_k)
# Scale
outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)
# Key Masking
key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) # (N, T_k)
key_masks = tf.tile(key_masks, [self.num_heads, 1]) # (h*N, T_k)
key_masks = tf.tile(
tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]
) # (h*N, T_q, T_k)
paddings = tf.ones_like(outputs) * (-(2 ** 32) + 1)
# outputs, (h*N, T_q, T_k)
outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)
# Future blinding (Causality)
diag_vals = tf.ones_like(outputs[0, :, :]) # (T_q, T_k)
tril = tf.linalg.LinearOperatorLowerTriangular(
diag_vals
).to_dense() # (T_q, T_k)
masks = tf.tile(
tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]
) # (h*N, T_q, T_k)
paddings = tf.ones_like(masks) * (-(2 ** 32) + 1)
# outputs, (h*N, T_q, T_k)
outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
# Activation
outputs = tf.nn.softmax(outputs) # (h*N, T_q, T_k)
# Query Masking, query_masks (N, T_q)
query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
query_masks = tf.tile(query_masks, [self.num_heads, 1]) # (h*N, T_q)
query_masks = tf.tile(
tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]
) # (h*N, T_q, T_k)
outputs *= query_masks # broadcasting. (N, T_q, C)
# Dropouts
outputs = self.dropout(outputs)
# Weighted sum
outputs = tf.matmul(outputs, V_) # ( h*N, T_q, C/h)
# --- MULTI HEAD ---
# concat heads
outputs = tf.concat(
tf.split(outputs, self.num_heads, axis=0), axis=2
) # (N, T_q, C)
# Residual connection
outputs += queries
return outputs
[docs]class PointWiseFeedForward(tf.keras.layers.Layer):
"""
Convolution layers with residual connection
Args:
conv_dims (list): List of the dimensions of the Feedforward layer.
dropout_rate (float): Dropout probability.
"""
def __init__(self, conv_dims, dropout_rate):
super(PointWiseFeedForward, self).__init__()
self.conv_dims = conv_dims
self.dropout_rate = dropout_rate
self.conv_layer1 = tf.keras.layers.Conv1D(
filters=self.conv_dims[0], kernel_size=1, activation="relu", use_bias=True
)
self.conv_layer2 = tf.keras.layers.Conv1D(
filters=self.conv_dims[1], kernel_size=1, activation=None, use_bias=True
)
self.dropout_layer = tf.keras.layers.Dropout(self.dropout_rate)
[docs] def call(self, x):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
Returns:
tf.Tensor: Output tensor
"""
output = self.conv_layer1(x)
output = self.dropout_layer(output)
output = self.conv_layer2(output)
output = self.dropout_layer(output)
# Residual connection
output += x
return output
[docs]class EncoderLayer(tf.keras.layers.Layer):
"""
Transformer based encoder layer
Args:
seq_max_len (int): Maximum sequence length.
embedding_dim (int): Embedding dimension.
attention_dim (int): Dimension of the attention embeddings.
num_heads (int): Number of heads in the multi-head self-attention module.
conv_dims (list): List of the dimensions of the Feedforward layer.
dropout_rate (float): Dropout probability.
"""
def __init__(
self,
seq_max_len,
embedding_dim,
attention_dim,
num_heads,
conv_dims,
dropout_rate,
):
"""Initialize parameters.
"""
super(EncoderLayer, self).__init__()
self.seq_max_len = seq_max_len
self.embedding_dim = embedding_dim
self.mha = MultiHeadAttention(attention_dim, num_heads, dropout_rate)
self.ffn = PointWiseFeedForward(conv_dims, dropout_rate)
self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
self.layer_normalization = LayerNormalization(
self.seq_max_len, self.embedding_dim, 1e-08
)
[docs] def call_(self, x, training, mask):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
mask (tf.Tensor): Mask tensor.
Returns:
tf.Tensor: Output tensor
"""
attn_output = self.mha(queries=self.layer_normalization(x), keys=x)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(x + attn_output)
# feed forward network
ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)
ffn_output = self.dropout2(ffn_output, training=training)
out2 = self.layernorm2(
out1 + ffn_output
) # (batch_size, input_seq_len, d_model)
# masking
out2 *= mask
return out2
[docs] def call(self, x, training, mask):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
mask (tf.Tensor): Mask tensor.
Returns:
tf.Tensor: Output tensor
"""
x_norm = self.layer_normalization(x)
attn_output = self.mha(queries=x_norm, keys=x)
attn_output = self.ffn(attn_output)
out = attn_output * mask
return out
[docs]class Encoder(tf.keras.layers.Layer):
"""
Invokes Transformer based encoder with user defined number of layers
Args:
num_layers (int): Number of layers.
seq_max_len (int): Maximum sequence length.
embedding_dim (int): Embedding dimension.
attention_dim (int): Dimension of the attention embeddings.
num_heads (int): Number of heads in the multi-head self-attention module.
conv_dims (list): List of the dimensions of the Feedforward layer.
dropout_rate (float): Dropout probability.
"""
def __init__(
self,
num_layers,
seq_max_len,
embedding_dim,
attention_dim,
num_heads,
conv_dims,
dropout_rate,
):
"""Initialize parameters.
"""
super(Encoder, self).__init__()
self.num_layers = num_layers
self.enc_layers = [
EncoderLayer(
seq_max_len,
embedding_dim,
attention_dim,
num_heads,
conv_dims,
dropout_rate,
)
for _ in range(num_layers)
]
self.dropout = tf.keras.layers.Dropout(dropout_rate)
[docs] def call(self, x, training, mask):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
mask (tf.Tensor): Mask tensor.
Returns:
tf.Tensor: Output tensor
"""
for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
return x # (batch_size, input_seq_len, d_model)
[docs]class LayerNormalization(tf.keras.layers.Layer):
"""
Layer normalization using mean and variance
gamma and beta are the learnable parameters
Args:
seq_max_len (int): Maximum sequence length.
embedding_dim (int): Embedding dimension.
epsilon (float): Epsilon value.
"""
def __init__(self, seq_max_len, embedding_dim, epsilon):
"""Initialize parameters.
Args:
seq_max_len (int): Maximum sequence length.
embedding_dim (int): Embedding dimension.
epsilon (float): Epsilon value.
"""
super(LayerNormalization, self).__init__()
self.seq_max_len = seq_max_len
self.embedding_dim = embedding_dim
self.epsilon = epsilon
self.params_shape = (self.seq_max_len, self.embedding_dim)
g_init = tf.ones_initializer()
self.gamma = tf.Variable(
initial_value=g_init(shape=self.params_shape, dtype="float32"),
trainable=True,
)
b_init = tf.zeros_initializer()
self.beta = tf.Variable(
initial_value=b_init(shape=self.params_shape, dtype="float32"),
trainable=True,
)
[docs] def call(self, x):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
Returns:
tf.Tensor: Output tensor
"""
mean, variance = tf.nn.moments(x, [-1], keepdims=True)
normalized = (x - mean) / ((variance + self.epsilon) ** 0.5)
output = self.gamma * normalized + self.beta
return output
[docs]class SASREC(tf.keras.Model):
"""Self-Attentive Sequential Recommendation Using Transformer
Keyword Args:
item_num (int): Number of items in the dataset.
seq_max_len (int): Maximum number of items in user history.
num_blocks (int): Number of Transformer blocks to be used.
embedding_dim (int): Item embedding dimension.
attention_dim (int): Transformer attention dimension.
attention_num_heads (int): Transformer attention head.
conv_dims (list): List of the dimensions of the Feedforward layer.
dropout_rate (float): Dropout rate.
l2_reg (float): Coefficient of the L2 regularization.
Attributes:
epoch (int): Epoch of trained model.
best_score (float): Best validation HR@10 score while training.
val_users (list): User list for validation.
history (pd.DataFrame): Train history containing epoch, NDCG@10, and HR@10.
"""
def __init__(self, **kwargs):
super(SASREC, self).__init__()
self.epoch = 0
self.best_score=0
self.val_users = []
self.history = pd.DataFrame(columns=['epoch','NDCG@10','HR@10'])
self.item_num = kwargs.get("item_num", None)
self.seq_max_len = kwargs.get("seq_max_len", 100)
self.num_blocks = kwargs.get("num_blocks", 2)
self.embedding_dim = kwargs.get("embedding_dim", 100)
self.attention_dim = kwargs.get("attention_dim", 100)
self.attention_num_heads = kwargs.get("attention_num_heads", 1)
self.conv_dims = kwargs.get("conv_dims", [100, 100])
self.dropout_rate = kwargs.get("dropout_rate", 0.5)
self.l2_reg = kwargs.get("l2_reg", 0.0)
self.item_embedding_layer = tf.keras.layers.Embedding(
self.item_num + 1,
self.embedding_dim,
name="item_embeddings",
mask_zero=True,
embeddings_regularizer=tf.keras.regularizers.L2(self.l2_reg),
)
self.positional_embedding_layer = tf.keras.layers.Embedding(
self.seq_max_len,
self.embedding_dim,
name="positional_embeddings",
mask_zero=False,
embeddings_regularizer=tf.keras.regularizers.L2(self.l2_reg),
)
self.dropout_layer = tf.keras.layers.Dropout(self.dropout_rate)
self.encoder = Encoder(
self.num_blocks,
self.seq_max_len,
self.embedding_dim,
self.attention_dim,
self.attention_num_heads,
self.conv_dims,
self.dropout_rate,
)
self.mask_layer = tf.keras.layers.Masking(mask_value=0)
self.layer_normalization = LayerNormalization(
self.seq_max_len, self.embedding_dim, 1e-08
)
[docs] def embedding(self, input_seq):
"""Compute the sequence and positional embeddings.
Args:
input_seq (tf.Tensor): Input sequence
Returns:
tf.Tensor: Sequence embeddings
tf.Tensor: Positional embeddings
"""
seq_embeddings = self.item_embedding_layer(input_seq)
seq_embeddings = seq_embeddings * (self.embedding_dim ** 0.5)
# FIXME
positional_seq = tf.expand_dims(tf.range(tf.shape(input_seq)[1]), 0)
positional_seq = tf.tile(positional_seq, [tf.shape(input_seq)[0], 1])
positional_embeddings = self.positional_embedding_layer(positional_seq)
return seq_embeddings, positional_embeddings
[docs] def call(self, x, training):
"""Model forward pass.
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
Returns:
tf.Tensor: Logits of the positive examples
tf.Tensor: Logits of the negative examples
tf.Tensor: Mask for nonzero targets
"""
input_seq = x["input_seq"]
pos = x["positive"]
neg = x["negative"]
mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1)
seq_embeddings, positional_embeddings = self.embedding(input_seq)
# add positional embeddings
seq_embeddings += positional_embeddings
# dropout
seq_embeddings = self.dropout_layer(seq_embeddings)
# masking
seq_embeddings *= mask
# --- ATTENTION BLOCKS ---
seq_attention = seq_embeddings
seq_attention = self.encoder(seq_attention, training, mask)
seq_attention = self.layer_normalization(seq_attention) # (b, s, d)
# --- PREDICTION LAYER ---
# user's sequence embedding
pos = self.mask_layer(pos)
neg = self.mask_layer(neg)
pos = tf.reshape(pos, [tf.shape(input_seq)[0] * self.seq_max_len])
neg = tf.reshape(neg, [tf.shape(input_seq)[0] * self.seq_max_len])
pos_emb = self.item_embedding_layer(pos)
neg_emb = self.item_embedding_layer(neg)
seq_emb = tf.reshape(
seq_attention,
[tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim],
) # (b*s, d)
pos_logits = tf.reduce_sum(pos_emb * seq_emb, -1)
neg_logits = tf.reduce_sum(neg_emb * seq_emb, -1)
pos_logits = tf.expand_dims(pos_logits, axis=-1) # (bs, 1)
# pos_prob = tf.keras.layers.Dense(1, activation='sigmoid')(pos_logits) # (bs, 1)
neg_logits = tf.expand_dims(neg_logits, axis=-1) # (bs, 1)
# neg_prob = tf.keras.layers.Dense(1, activation='sigmoid')(neg_logits) # (bs, 1)
# output = tf.concat([pos_logits, neg_logits], axis=0)
# masking for loss calculation
istarget = tf.reshape(
tf.cast(tf.not_equal(pos, 0), dtype=tf.float32),
[tf.shape(input_seq)[0] * self.seq_max_len],
)
return pos_logits, neg_logits, istarget
[docs] def predict(self, inputs,neg_cand_n):
"""Returns the logits for the test items.
Args:
inputs (tf.Tensor): Input tensor.
neg_cand_n: num of negative candidates
Returns:
tf.Tensor:Output tensor
"""
training = False
input_seq = inputs["input_seq"]
candidate = inputs["candidate"]
mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1)
seq_embeddings, positional_embeddings = self.embedding(input_seq)
seq_embeddings += positional_embeddings
# seq_embeddings = self.dropout_layer(seq_embeddings)
seq_embeddings *= mask
seq_attention = seq_embeddings
seq_attention = self.encoder(seq_attention, training, mask)
seq_attention = self.layer_normalization(seq_attention) # (b, s, d)
seq_emb = tf.reshape(
seq_attention,
[tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim],
) # (b*s, d)
# print(candidate)
candidate_emb = self.item_embedding_layer(candidate) # (b, s, d)
candidate_emb = tf.transpose(candidate_emb, perm=[0, 2, 1]) # (b, d, s)
test_logits = tf.matmul(seq_emb, candidate_emb)
# (200, 100) * (1, 101, 100)'
test_logits = tf.reshape(
test_logits,
[tf.shape(input_seq)[0], self.seq_max_len, 1+neg_cand_n],
) # (1, 50, 1+neg_can)
test_logits = test_logits[:, -1, :] # (1, 101)
return test_logits
[docs] def loss_function(self, pos_logits, neg_logits, istarget):
"""Losses are calculated separately for the positive and negative
items based on the corresponding logits. A mask is included to
take care of the zero items (added for padding).
Args:
pos_logits (tf.Tensor): Logits of the positive examples.
neg_logits (tf.Tensor): Logits of the negative examples.
istarget (tf.Tensor): Mask for nonzero targets.
Returns:
float: loss
"""
pos_logits = pos_logits[:, 0]
neg_logits = neg_logits[:, 0]
# ignore padding items (0)
# istarget = tf.reshape(
# tf.cast(tf.not_equal(self.pos, 0), dtype=tf.float32),
# [tf.shape(self.input_seq)[0] * self.seq_max_len],
# )
# for logits
loss = tf.reduce_sum(
-tf.math.log(tf.math.sigmoid(pos_logits) + 1e-24) * istarget
- tf.math.log(1 - tf.math.sigmoid(neg_logits) + 1e-24) * istarget
) / tf.reduce_sum(istarget)
# for probabilities
# loss = tf.reduce_sum(
# - tf.math.log(pos_logits + 1e-24) * istarget -
# tf.math.log(1 - neg_logits + 1e-24) * istarget
# ) / tf.reduce_sum(istarget)
reg_loss = tf.compat.v1.losses.get_regularization_loss()
# reg_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
# loss += sum(reg_losses)
loss += reg_loss
return loss
[docs] def create_combined_dataset(self, u, seq, pos, neg):
"""
function to create model inputs from sampled batch data.
This function is used during training.
"""
inputs = {}
seq = tf.keras.preprocessing.sequence.pad_sequences(
seq, padding="pre", truncating="pre", maxlen=self.seq_max_len
)
pos = tf.keras.preprocessing.sequence.pad_sequences(
pos, padding="pre", truncating="pre", maxlen=self.seq_max_len
)
neg = tf.keras.preprocessing.sequence.pad_sequences(
neg, padding="pre", truncating="pre", maxlen=self.seq_max_len
)
inputs["users"] = np.expand_dims(np.array(u), axis=-1)
inputs["input_seq"] = seq
inputs["positive"] = pos
inputs["negative"] = neg
target = np.concatenate(
[
np.repeat(1, seq.shape[0] * seq.shape[1]),
np.repeat(0, seq.shape[0] * seq.shape[1]),
],
axis=0,
)
target = np.expand_dims(target, axis=-1)
return inputs, target
[docs] def train(self, dataset, sampler, num_epochs=10,batch_size=128,lr=0.001,\
val_epoch=5,val_target_user_n=1000,target_item_n=-1,auto_save=False,\
path='./',exp_name='SASRec_exp'):
"""High level function for model training as well as evaluation on the validation and test dataset
Args:
dataset (:obj:`util.SASRecDataSet`): SASRecDataSet containing users-item interaction history.
sampler (:obj:`util.WarpSampler`): WarpSampler.
num_epochs (int, optional): Epoch. Defaults to 10.
batch_size (int, optional): Batch size. Defaults to 128.
lr (float, optional): Learning rate. Defaults to 0.001.
val_epoch (int, optional): Validation term. Defaults to 5.
val_target_user_n (int, optional): Number of randomly sampled users to conduct validation. Defaults to 1000.
target_item_n (int, optional): Size of candidate. Defaults to -1, which means all.
auto_save (bool, optional): If true, save model with best validation score. Defaults to False.
path (str, optional): Path to save model.
exp_name (str, optional): Experiment name.
"""
num_steps = int(len(dataset.user_train) / batch_size)
optimizer = tf.keras.optimizers.Adam(
learning_rate=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-7
)
loss_function = self.loss_function
train_loss = tf.keras.metrics.Mean(name="train_loss")
train_step_signature = [
{
"users": tf.TensorSpec(shape=(None, 1), dtype=tf.int64),
"input_seq": tf.TensorSpec(
shape=(None, self.seq_max_len), dtype=tf.int64
),
"positive": tf.TensorSpec(
shape=(None, self.seq_max_len), dtype=tf.int64
),
"negative": tf.TensorSpec(
shape=(None, self.seq_max_len), dtype=tf.int64
),
},
tf.TensorSpec(shape=(None, 1), dtype=tf.int64),
]
@tf.function(input_signature=train_step_signature)
def train_step(inp, tar):
with tf.GradientTape() as tape:
pos_logits, neg_logits, loss_mask = self(inp, training=True)
loss = loss_function(pos_logits, neg_logits, loss_mask)
gradients = tape.gradient(loss, self.trainable_variables)
optimizer.apply_gradients(zip(gradients, self.trainable_variables))
train_loss(loss)
return loss
for epoch in range(1, num_epochs + 1):
print(f'epoch {epoch} / {num_epochs} -----------------------------')
self.epoch = epoch
step_loss = []
train_loss.reset_states()
for step in tqdm(
range(num_steps), total=num_steps, ncols=70, leave=False, unit="b",
# disable= ~progress_bar
):
u, seq, pos, neg = sampler.next_batch()
inputs, target = self.create_combined_dataset(u, seq, pos, neg)
loss = train_step(inputs, target)
step_loss.append(loss)
if epoch % val_epoch == 0:
print("Evaluating...")
t_test = self.evaluate(dataset,target_user_n=val_target_user_n,target_item_n=target_item_n,is_val=True)
print(
f"epoch: {epoch}, test (NDCG@10: {t_test[0]}, HR@10: {t_test[1]})"
)
self.history.loc[len(self.history)] = [epoch,t_test[0],t_test[1]]
if t_test[1] > self.best_score:
self.best_score = t_test[1]
if auto_save:
self.save(path,exp_name)
print('best score model updated and saved')
if auto_save:
self.history.to_csv(f'{path}/{exp_name}/{exp_name}_train_log.csv',index=False)
return
[docs] def evaluate(self, dataset,target_user_n=1000,target_item_n=-1,rank_threshold=10,is_val=False):
"""Evaluate model on validation set or test set
Args:
dataset (:obj:`SASRecDataSet`): SASRecDataSet containing users-item interaction history.
target_user_n (int, optional): Number of randomly sampled users to evaluate. Defaults to 1000.
target_item_n (int, optional): Size of candidate. Defaults to -1, which means all.
rank_threshold (int, optional): k value in NDCG@k and HR@k. Defaults to 10.
is_val (bool, optional): If true, evaluate on validation set. If False, evaluate on test set. Defaults to False.
Returns:
float: NDCG@k
float: HR@k
"""
usernum = dataset.usernum
itemnum = dataset.itemnum
all = dataset.User
train = dataset.user_train # removing deepcopy
valid = dataset.user_valid
test = dataset.user_test
NDCG = 0.0
HT = 0.0
valid_user = 0.0
if len(self.val_users) == 0:
self.sample_val_users(dataset,target_user_n)
for u in tqdm(self.val_users, ncols=70, leave=False, unit="b"):
if len(train[u]) < 1 or len(test[u]) < 1:
continue
seq = np.zeros([self.seq_max_len], dtype=np.int32)
idx = self.seq_max_len - 1
if is_val:
item_idx = [valid[u][0]]
else:
seq[idx] = valid[u][0]
idx -= 1
item_idx = [test[u][0]]
for i in reversed(train[u]):
seq[idx] = i
idx -= 1
if idx == -1:
break
rated = set(all[u])
if (target_item_n == -1):
item_idx=item_idx+list(set(range(1,itemnum+1)).difference(rated))
elif type(target_item_n)==int:
for _ in range(target_item_n):
t = np.random.randint(1, itemnum + 1)
while t in rated:
t = np.random.randint(1, itemnum + 1)
item_idx.append(t)
elif type(target_item_n)==float:
for _ in range(round(itemnum*target_item_n)):
t = np.random.randint(1, itemnum + 1)
while t in rated:
t = np.random.randint(1, itemnum + 1)
item_idx.append(t)
else:
raise
inputs = {}
inputs["user"] = np.expand_dims(np.array([u]), axis=-1)
inputs["input_seq"] = np.array([seq])
inputs["candidate"] = np.array([item_idx])
# print(inputs)
# inverse to get descending sort
predictions = -1.0 * self.predict(inputs, len(item_idx)-1)
predictions = np.array(predictions)
predictions = predictions[0]
# print('predictions:', predictions)
rank = predictions.argsort().argsort()[0]
# print('rank:', rank)
valid_user += 1
if rank < rank_threshold:
NDCG += 1 / np.log2(rank + 2)
HT += 1
return NDCG / valid_user, HT / valid_user
[docs] def recommend_item(self, dataset, user_map_dict,user_id_list, target_item_n=-1,top_n=10,exclude_purchased=True,is_test=False):
"""Recommend items to user
Args:
dataset (:obj:`util.SASRecDataSet`): SASRecDataSet containing users-item interaction history.
user_map_dict (dict): Dict { user_id : encoded_user_label , ... }
user_id_list (list): User list to predict.
target_item_n (int, optional): Size of candidate. Defaults to -1, which means all.
top_n (int, optional): Number of items to recommend. Defaults to 10.
exclude_purchased (bool, optional): If true, exclude already purchased item from candidate. Defaults to True.
is_test (bool, optional): If true, exclude the last item from each user's sequence. Defaults to False.
Returns:
pd.DataFrame: recommended items for users
"""
all = dataset.User
itemnum = dataset.itemnum
users = [user_map_dict[u] for u in user_id_list]
inv_user_map = {v: k for k, v in user_map_dict.items()}
return_dict={}
for u in tqdm(users):
seq = np.zeros([self.seq_max_len], dtype=np.int32)
idx = self.seq_max_len - 1
list_to_seq = all[u] if not is_test else all[u][:-1]
for i in reversed(list_to_seq):
seq[idx] = i
idx -= 1
if idx == -1:
break
if exclude_purchased:
rated = set(all[u])
else:
rated = set()
# make empty candidate list
item_idx=[]
if (target_item_n == -1):
item_idx=item_idx+list(set(range(1,itemnum+1)).difference(rated))
elif type(target_item_n)==int:
for _ in range(target_item_n):
t = np.random.randint(1, itemnum + 1)
while t in rated:
t = np.random.randint(1, itemnum + 1)
item_idx.append(t)
elif type(target_item_n)==float:
for _ in range(round(itemnum*target_item_n)):
t = np.random.randint(1, itemnum + 1)
while t in rated:
t = np.random.randint(1, itemnum + 1)
item_idx.append(t)
else:
raise
inputs = {}
inputs["user"] = np.expand_dims(np.array([u]), axis=-1)
inputs["input_seq"] = np.array([seq])
inputs["candidate"] = np.array([item_idx])
predictions = self.predict(inputs, len(item_idx)-1)
predictions = np.array(predictions)
predictions = predictions[0]
pred_dict = {v : predictions[i] for i,v in enumerate(item_idx)}
pred_dict = sorted(pred_dict.items(), key = lambda item: item[1], reverse = True)
top_list = pred_dict[:top_n]
return_dict[inv_user_map[u]] = top_list
return return_dict
[docs] def old_get_user_item_score(self, dataset, user_map_dict,item_map_dict,user_id_list, item_list,is_test=False):
"""
Deprecated
"""
all = dataset.User
users = [user_map_dict[u] for u in user_id_list]
items = [item_map_dict[i] for i in item_list]
# inv_user_map = {v: k for k, v in user_map_dict.items()}
# inv_item_map = {v: k for k, v in item_map_dict.items()}
score_dict = {i:[] for i in item_list}
for u in tqdm(users,unit=' User',desc='Getting Scores for each user ...'):
seq = np.zeros([self.seq_max_len], dtype=np.int32)
idx = self.seq_max_len - 1
list_to_seq = all[u] if not is_test else all[u][:-1]
for i in reversed(list_to_seq):
seq[idx] = i
idx -= 1
if idx == -1:
break
inputs = {}
inputs["user"] = np.expand_dims(np.array([u]), axis=-1)
inputs["input_seq"] = np.array([seq])
inputs["candidate"] = np.array([items])
predictions = self.predict(inputs, len(items)-1)
predictions = np.array(predictions)
predictions = predictions[0]
# pred_dict = {inv_item_map[v] : predictions[i] for i,v in enumerate(items)}
for i,v in enumerate(item_list):
score_dict[v].append(predictions[i])
return_df = pd.DataFrame({
'user_id':user_id_list,
})
for k in score_dict:
return_df[k] = score_dict[k]
return_df = return_df.sort_values(by='user_id').reset_index(drop=True)
return return_df
[docs] def get_user_item_score(self,dataset,user_id_list, item_list,user_map_dict,item_map_dict,batch_size=128):
"""Get item score for each user on batch
Args:
dataset (:obj:`SASRecDataSet`): SASRecDataSet containing users-item interaction history.
user_id_list (list): User list to predict.
item_list (list): Item list to predict
user_map_dict (dict): Dict { user_id : encoded_user_label , ... }
item_map_dict (dict): Dict { item : encoded_item_label , ... }
batch_size (int, optional): Batch size. Defaults to 128.
Raises:
Exception: Batch_size must be smaller than user id list size.
Returns:
pd.DataFrame: user-item score
"""
if batch_size >= len(user_id_list):
raise Exception('batch_size must be smaller than user_id_list size')
user_history = dataset.User
inv_user_map = {v: k for k, v in user_map_dict.items()}
cand = [item_map_dict[i] for i in item_list]
if len(user_id_list)%batch_size ==0:
num_steps = int(len(user_id_list)/batch_size)
else:
num_steps = int(len(user_id_list)/batch_size) + 1
# num_steps = len(user_id_list)/batch_size
# num_steps = num_steps if num_steps%1 == 0 else int(num_steps)+1
print(num_steps)
score_dict = dict()
start_index=0
for step in tqdm(
range(num_steps), leave=True, unit="batch",
):
# get user batch
u = user_id_list[start_index:start_index+batch_size] if step != num_steps-1 \
else user_id_list[start_index:]
start_index+=batch_size
u = [user_map_dict[user] for user in u]
# get sequence batch
seq = [user_history[user] for user in u]
inputs = self.create_combined_dataset_pred(tuple(u),tuple(seq),cand)
# print(inputs)
predictions = self.batch_predict(inputs, len(cand)-1)
predictions = np.array(predictions)
for i in range(len(u)):
score_dict[inv_user_map[u[i]]]=predictions[i]
return_df = pd.DataFrame(list(score_dict.items()),columns = ['user_id','score_array']).set_index('user_id',drop=True)
# print(return_df)
return_df[item_list] = pd.DataFrame(return_df['score_array'].tolist(), index= return_df.index)
return_df = return_df.drop('score_array',axis=1).reset_index().sort_values(by='user_id').reset_index(drop=True)
return return_df
[docs] def create_combined_dataset_pred(self,u,seq,cand):
"""
function to create model inputs from sampled batch data.
This function is used during predicting on batch.
"""
inputs = {}
seq = tf.keras.preprocessing.sequence.pad_sequences(
seq, padding="pre", truncating="pre", maxlen=self.seq_max_len
)
inputs["users"] = np.expand_dims(np.array(u), axis=-1)
inputs["input_seq"] = seq
inputs['candidate'] = np.array([cand])
return inputs
[docs] def batch_predict(self, inputs,cand_n):
"""Returns the logits for the item candidates.
Args:
inputs (tf.Tensor): Input tensor.
cand_n (int): Num of candidates.
Returns:
tf.Tensor: Output tensor
"""
training = False
input_seq = inputs["input_seq"]
candidate = inputs["candidate"]
mask = tf.expand_dims(tf.cast(tf.not_equal(input_seq, 0), tf.float32), -1)
seq_embeddings, positional_embeddings = self.embedding(input_seq)
seq_embeddings += positional_embeddings
# seq_embeddings = self.dropout_layer(seq_embeddings)
seq_embeddings *= mask
seq_attention = seq_embeddings
seq_attention = self.encoder(seq_attention, training, mask)
seq_attention = self.layer_normalization(seq_attention) # (b, s, d)
seq_emb = tf.reshape(
seq_attention,
[tf.shape(input_seq)[0] * self.seq_max_len, self.embedding_dim],
) # (b*s, d)
# print(candidate)
candidate_emb = self.item_embedding_layer(candidate) # (b, s, d)
candidate_emb = tf.transpose(candidate_emb, perm=[0, 2, 1]) # (b, d, s)
test_logits = tf.matmul(seq_emb, candidate_emb)
# (200, 100) * (1, 101, 100)'
test_logits = tf.reshape(
test_logits,
[tf.shape(input_seq)[0], self.seq_max_len, 1+cand_n],
) # (1, 50, 1+can)
test_logits = test_logits[:, -1, :] # (1, 101)
return test_logits
[docs] def save(self,path, exp_name='sas_experiment'):
"""Save trained SASRec Model
Args:
path (str): Path to save model.
exp_name (str): Experiment name.
Examples:
>>> model.save(path, exp_name)
"""
# make dir
if not os.path.exists(f'{path}/{exp_name}'):
os.mkdir(f'{path}/{exp_name}')
self.save_weights(f'{path}/{exp_name}/{exp_name}_weights') # save trained weights
arg_list = ['item_num','seq_max_len','num_blocks','embedding_dim','attention_dim','attention_num_heads','dropout_rate','conv_dims','l2_reg','history']
dict_to_save = {a: self.__dict__[a] for a in arg_list}
with open(f'{path}/{exp_name}/{exp_name}_model_args','wb') as f:
pickle.dump(dict_to_save, f)
if not os.path.isfile(f'{path}/{exp_name}/{exp_name}_save_log.txt'):
with open(f'{path}/{exp_name}/{exp_name}_save_log.txt','w') as f:
f.writelines(f'Model args: {dict_to_save}\n')
f.writelines(f'[epoch {self.epoch}] Best HR@10 score: {self.best_score}\n')
else:
with open(f'{path}/{exp_name}/{exp_name}_save_log.txt','a') as f:
f.writelines(f'[epoch {self.epoch}] Best HR@10 score: {self.best_score}\n')
return
[docs] def sample_val_users(self,dataset,target_user_n):
"""Sample users for validation
Args:
dataset (:obj:`SASRecDataSet`): SASRec dataset used for training
target_user_n (int): Number of users to sample
"""
usernum = dataset.usernum
if usernum > target_user_n:
self.val_users = random.sample(range(1, usernum + 1), target_user_n)
else:
self.val_users = range(1, usernum + 1)