Skip to content

Self Attention Model

Implementation of an attention-based model for item recommendation.

SelfAttentionModel

Bases: BaseBasketModel

Class for the self attention model for basket recommendation.

Basket Choice Modeling Inspired by the paper: "Next Item Recommendation with Self-Attention" Shuai Zhang, Lina Yao, Yi Tay, and Aixin Sun. The algorithm was modified and adapted to the basket recommendation task.

Source code in choice_learn/basket_models/self_attention_model.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
class SelfAttentionModel(BaseBasketModel):
    """Class for the self attention model for basket recommendation.

    Basket Choice Modeling
    Inspired by the paper: "Next Item Recommendation with Self-Attention"  Shuai Zhang, Lina Yao,
    Yi Tay, and Aixin Sun.
    The algorithm was modified and adapted to the basket recommendation task.
    """

    def __init__(
        self,
        latent_sizes: dict[str, int] = {"short_term": 10, "long_term": 10},
        hinge_margin: float = 0.5,
        short_term_ratio: float = 0.5,
        n_negative_samples: int = 2,
        optimizer: str = "adam",
        callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
        lr: float = 1e-3,
        epochs: int = 10,
        batch_size: int = 32,
        grad_clip_value: Union[float, None] = None,
        weight_decay: Union[float, None] = None,
        momentum: float = 0.0,
        l2_regularization: float = 0.0,
        dropout_rate: float = 0.0,
        **kwargs,
    ) -> None:
        """Initialize the model with hyperparameters.

        Parameters
        ----------
        latent_size : int
            Size of the item embeddings.
        hinge_margin : float
            Margin parameter for the hinge loss.
        short_term_weight : float
            Weighting factor between long-term and short-term preferences.
        n_negative_samples : int
            Number of negative samples to use in training.
        optimizer : str
            Optimizer to use for training. Default is "Adam".
        callbacks : tf.keras.callbacks.CallbackList or None
            List of callbacks to use during training. Default is None.
        lr : float
            Learning rate for the optimizer.
        epochs : int
            Number of training epochs.
        batch_size : int
            Size of the batches for training. Default is 32.
        grad_clip_value : float or None
            Value for gradient clipping. Default is None (no clipping).
        weight_decay : float or None
            Weight decay (L2 regularization) factor. Default is None (no weight decay).
        momentum : float
            Momentum factor for optimizers that support it. Default is 0.0.
        """
        self.instantiated = False

        for val in latent_sizes.keys():
            if val not in ["short_term", "long_term"]:
                raise ValueError(f"Unknown value for latent_sizes dict: {val}.")
        if "short_term" not in latent_sizes:
            latent_sizes["short_term"] = 10
        if "long_term" not in latent_sizes:
            latent_sizes["long_term"] = 10

        self.hinge_margin = hinge_margin
        self.short_term_ratio = short_term_ratio
        self.n_negative_samples = n_negative_samples

        self.latent_sizes = latent_sizes
        self.d = self.latent_sizes["short_term"]
        self.d_long = self.latent_sizes["long_term"]
        self.l2_regularization = l2_regularization
        self.dropout_rate = dropout_rate

        super().__init__(
            optimizer=optimizer,
            callbacks=callbacks,
            lr=lr,
            epochs=epochs,
            batch_size=batch_size,
            grad_clip_value=grad_clip_value,
            weight_decay=weight_decay,
            momentum=momentum,
            **kwargs,
        )

    def instantiate(
        self,
        n_items: int,
        n_users: int,
    ) -> None:
        """Initialize the model parameters.

        Parameters
        ----------
        n_items : int
            Number of unique items in the dataset.
        n_users : int
            Number of unique users in the dataset.

        Variables
        ----------
        X : tf.Variable
            Item embedding matrix for short-term preferences, size (n_items, d).
        V : tf.Variable
            Item embedding matrix for long-term preferences, size (n_items, d_long).
        U : tf.Variable
            User embedding matrix for long-term preferences, size (n_users, d_long).
        Wq : tf.Variable
            Weight matrix for query transformation in attention mechanism, size (d, d).
        Wk : tf.Variable
            Weight matrix for key transformation in attention mechanism, size (d, d).
        """
        self.n_items = n_items
        self.n_users = n_users
        ##############

        self.X = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(n_items + 1, self.d)),
            trainable=True,
            name="X",
        )

        self.V = tf.Variable(
            tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42)(
                shape=(n_items, self.d_long)
            ),
            trainable=True,
            name="V",
        )

        self.U = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(
                shape=(self.n_users, self.d_long)
            ),
            trainable=True,
            name="U",
        )

        self.Wq = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(self.d, self.d)),
            trainable=True,
            name="Wq",
        )

        self.Wk = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(self.d, self.d)),
            trainable=True,
            name="Wk",
        )

        self.instantiated = True

    @property
    def trainable_weights(self):
        """Return the trainable weights of the model.

        Returns
        -------
            list
                List of trainable weights (X, V, U, Wq, Wk).
        """
        return [self.X, self.V, self.U, self.Wq, self.Wk]

    @property
    def train_iter_method(self) -> str:
        """Method used to generate sub-baskets from a purchased one.

        Available methods are:
        - 'shopper': randomly orders the purchases and creates the ordered sub-baskets:
                        (1|0); (2|1); (3|1,2); (4|1,2,3); etc...
        - 'aleacarta': creates all the sub-baskets with N-1 items:
                        (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

        Returns
        -------
        str
            Data generation method.
        """
        return "aleacarta"

    def masked_attention(self, basket_batch, scaled_scores):
        """Compute the masked attention weights.

        Applying a mask to ignore padding items. Also applied a mask on
        the diagonal to avoid attending to the same item, if activated
        """
        batch_size = tf.shape(basket_batch)[0]
        mask = tf.not_equal(
            basket_batch, self.n_items
        )  # shape: (batch_size, L), True si pas padding

        if tf.shape(basket_batch)[1] == 1:
            attention_weights = tf.ones_like(scaled_scores)  # Shape: (batch_size, L, 1)

        else:
            # Masque de la diagonale, désactivé pour l'instant
            diag_mask = tf.eye(tf.shape(basket_batch)[1], batch_shape=[batch_size], dtype=tf.bool)
            scaled_scores = tf.where(
                diag_mask,
                tf.constant(-np.inf, dtype=scaled_scores.dtype),
                scaled_scores,
            )

            # Masque des padding items
            mask_col = tf.expand_dims(mask, axis=1)  # (batch_size, 1, L)
            scaled_scores = tf.where(
                mask_col, scaled_scores, tf.constant(-np.inf, dtype=scaled_scores.dtype)
            )

            all_inf_row = tf.reduce_all(tf.math.is_inf(scaled_scores), axis=-1)  # (batch_size, L)
            # We set to zero the first value of the rows where all values are -inf to avoid NaNs in
            # softmax
            indices = tf.where(all_inf_row)
            indices_full = tf.concat([indices, tf.zeros_like(indices[:, :1])], axis=1)
            updates = tf.zeros([tf.shape(indices_full)[0]], dtype=scaled_scores.dtype)
            scaled_scores = tf.tensor_scatter_nd_update(scaled_scores, indices_full, updates)

            attention_weights = tf.nn.softmax(scaled_scores, axis=-1)  # Shape: (batch_size, L, L)

        return attention_weights

    def embed_basket(self, basket_batch: tf.Tensor, is_training: bool = False) -> tf.Tensor:
        """Return the context embedding matrix.

        Parameters
        ----------
            basket_batch : tf.Tensor
                [batch_size, L]
                Tensor containing the list of the context items.
            is_training : bool
                Whether the model is in training mode or not, to activate dropout if needed.

        Returns
        -------
            basket_embedding : tf.Tensor
                [batch_size, latent_size] tf.Tensor
                Tensor containing the vector of contexts embeddings.
            attention_weights : tf.Tensor
                [batch_size, L, L] tf.Tensor
                Tensor containing the attention matrix.
        """
        padding_vector = tf.zeros(shape=[1, self.d])  # Shape (1, d)
        padded_items = tf.concat([self.X, padding_vector], axis=0)
        x_basket = tf.gather(padded_items, indices=basket_batch)  # Shape: (batch_size, L, d)

        q_prime = tf.nn.relu(tf.matmul(x_basket, self.Wq))  # Shape: (batch_size, L, d)
        k_prime = tf.nn.relu(tf.matmul(x_basket, self.Wk))

        if is_training:
            q_prime = tf.nn.dropout(q_prime, rate=self.dropout_rate)
            k_prime = tf.nn.dropout(k_prime, rate=self.dropout_rate)

        scores = tf.matmul(q_prime, k_prime, transpose_b=True)
        scaled_scores = scores / tf.sqrt(float(self.d))
        attention_weights = self.masked_attention(
            basket_batch, scaled_scores
        )  # Shape: (batch_size, L, L)

        attention_output = tf.matmul(attention_weights, x_basket)  # Shape: (batch_size, L, d)

        mask = tf.not_equal(basket_batch, self.n_items)
        mask_float = tf.cast(mask, dtype=tf.float32)
        mask_float = tf.expand_dims(mask_float, axis=-1)
        masked_attention_output = attention_output * mask_float  # (batch_size, L, d)

        # Number of items in each basket (excluding padding)
        num_items_by_basket = tf.reduce_sum(mask_float, axis=1)  # (batch_size, 1)

        basket_embedding = tf.math.divide_no_nan(
            tf.reduce_sum(masked_attention_output, axis=1, keepdims=True),
            num_items_by_basket[:, tf.newaxis, :],
        )
        basket_embedding = tf.squeeze(basket_embedding, axis=1)  # Shape: (batch_size,)
        # ----- Handle empty baskets by assigning a padding embedding -----
        padding_embedding = self.X[-1]  # shape: (d,)
        empty_basket_mask = tf.squeeze(tf.equal(num_items_by_basket, 0), axis=1)  # (batch_size,)
        basket_embedding = tf.where(
            tf.expand_dims(empty_basket_mask, axis=1),
            tf.broadcast_to(padding_embedding, tf.shape(basket_embedding)),
            basket_embedding,
        )
        # -----------------------------------------------------------------
        return basket_embedding, attention_weights

    def compute_batch_short_distance(
        self,
        item_batch: Union[np.ndarray, tf.Tensor],
        basket_embedding: tf.Tensor,
    ) -> tf.Tensor:
        """Compute the short distance of the items in item_batch given the items in basket_batch.

        Parameters
        ----------
        item_batch: or tf.Tensor
            Batch of the purchased items ID (integers) for which to compute the distance from their
            basket.
            Shape must be (batch_size,)
            (positive and negative samples concatenated together)
        basket_embedding: tf.Tensor
            Batch of context embeddings for each purchased item
            Shape must be (batch_size, latent_size)

        Returns
        -------
        short_term_distance: tf.Tensor
            Distance of all the items in item_batch from their ground truth embedding (X)
            Shape must be (batch_size,)
        """
        x_item_target = tf.gather(self.X, indices=item_batch)  # Shape: (batch_size, d)

        return tf.reduce_sum(
            tf.square(tf.expand_dims(basket_embedding, axis=1) - x_item_target), axis=-1
        )

    def compute_batch_long_distance(
        self,
        item_batch: Union[np.ndarray, tf.Tensor],
        user_batch: np.ndarray,
    ) -> tf.Tensor:
        """Compute the long distance of all the items in item_batch given the user.

        Parameters
        ----------
        item_batch: np.ndarray or tf.Tensor
            Batch of the purchased items ID (integers) for which to compute the distance from their
            user.
            Shape must be (batch_size,)
            (positive and negative samples concatenated together)

        user_batch: np.ndarray
            Batch of user IDs (integers) for each purchased item
            Shape must be (batch_size,)

        Returns
        -------
        long_term_distance: tf.Tensor
            Distance of all the items in item_batch from their ground truth embedding (V)
            Shape must be (batch_size,)
        """
        v_future_batch = tf.gather(self.V, indices=item_batch)  # Shape: (batch_size, d)

        u_user_batch = tf.gather(self.U, indices=user_batch)  # Shape: (batch_size, d)
        return tf.reduce_sum(
            tf.square(tf.expand_dims(u_user_batch, axis=1) - v_future_batch), axis=-1
        )  # Shape: (batch_size, 1)

    def compute_batch_distance(
        self,
        item_batch: np.ndarray,
        basket_batch: np.ndarray,
        user_batch: np.ndarray,
        is_training: bool,
    ) -> tf.Tensor:
        """Compute the total distance (long + short term) of all the items in item_batch.

        Parameters
        ----------
        item_batch: np.ndarray
            Batch of the purchased items ID (integers) for which to compute the distance from their
            basket.
            Shape must be (batch_size, None)
            (positive and negative samples concatenated together)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        user_batch: np.ndarray
            Batch of user IDs (integers) for each purchased item
            Shape must be (batch_size,)
        is_training : bool
            Whether the model is in training mode or not, to activate dropout if needed.

        Returns
        -------
        total_distance: tf.Tensor
            Total distance of all the items in item_batch from their ground truth embeddings
            Shape must be (batch_size, None)
        """
        basket_batch_ragged = tf.cast(
            tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
            dtype=tf.int32,
        )
        basket_batch = basket_batch_ragged.to_tensor(self.n_items)
        basket_embedding, _ = self.embed_basket(basket_batch, is_training)  # Shape: (batch_size, d)

        long_distance = self.compute_batch_long_distance(item_batch, user_batch)

        short_distance = self.compute_batch_short_distance(item_batch, basket_embedding)
        return self.short_term_ratio * long_distance + (1 - self.short_term_ratio) * short_distance

    def compute_batch_utility(
        self,
        item_batch,
        basket_batch,
        store_batch,
        week_batch,
        price_batch,
        available_item_batch,
        user_batch,
        is_training: bool = False,
    ) -> tf.Tensor:
        """Compute the utility of all the items in item_batch.

        Parameters
        ----------
        item_batch: np.ndarray
            Batch of the purchased items ID (integers) for which to compute the utility from their
            basket.
            Shape must be (batch_size,)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (floats) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            List of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)
        user_batch: np.ndarray
            Batch of user IDs (integers) for each purchased item
            Shape must be (batch_size,)
        is_training : bool
            Whether the model is in training mode or not, to activate dropout if needed.

        Returns
        -------
        tf.Tensor
            Utility of all the items in item_batch
            Shape must be (batch_size,)
        """
        _ = store_batch  # Unused for this model
        _ = week_batch  # Unused for this model
        _ = price_batch  # Unused for this model
        _ = available_item_batch  # Unused for this model
        return -self.compute_batch_distance(
            item_batch=item_batch,
            basket_batch=basket_batch,
            user_batch=user_batch,
            is_training=is_training,
        )

    def get_negative_samples(
        self,
        available_items: np.ndarray,
        purchased_items: np.ndarray,
        next_item: int,
        n_samples: int,
    ) -> list[int]:
        """Sample randomly a set of items.

        (set of items not already purchased and *not necessarily* from the basket)

        Parameters
        ----------
        available_items: np.ndarray
            Matrix indicating the availability (1) or not (0) of the products
            Shape must be (n_items,)
        purchased_items: np.ndarray
            List of items already purchased (already in the basket)
        next_item: int
            Next item (to be added in the basket)
        n_samples: int
            Number of samples to draw

        Returns
        -------
        list[int]
            Random sample of items, each of them distinct from
            the next item and from the items already in the basket
        """
        # Convert inputs to tensors
        available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
        purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
        next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

        # Get the list of available items based on the availability matrix
        item_ids = tf.range(self.n_items)
        available_mask = tf.equal(available_items, 1)
        assortment = tf.boolean_mask(item_ids, available_mask)

        not_to_be_chosen = tf.concat([purchased_items, tf.expand_dims(next_item, axis=0)], axis=0)

        # Sample negative items from the assortment excluding not_to_be_chosen
        negative_samples = tf.boolean_mask(
            tensor=assortment,
            # Reduce the 2nd dimension of the boolean mask to get a 1D mask
            mask=~tf.reduce_any(
                tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
            ),
        )

        error_message = (
            "The number of negative samples to draw must be less than "
            "the number of available items not already purchased and "
            "distinct from the next item."
        )
        # Raise an error if n_samples > tf.size(negative_samples)
        tf.debugging.assert_greater_equal(
            tf.size(negative_samples), n_samples, message=error_message
        )

        # Randomize the sampling
        negative_samples = tf.random.shuffle(negative_samples)

        # Keep only n_samples
        return negative_samples[:n_samples]

    def compute_batch_loss(
        self,
        item_batch: np.ndarray,
        basket_batch: np.ndarray,
        future_batch: np.ndarray,
        store_batch: np.ndarray,
        week_batch: np.ndarray,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
        user_batch: np.ndarray,
        is_training: bool = True,
    ) -> tuple[tf.Variable]:
        """Compute total loss.

        Parameters
        ----------
        item_batch: np.ndarray
            Batch of purchased items ID (integers)
            Shape must be (batch_size,)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        future_batch: np.ndarray
            Batch of items to be purchased in the future (ID of items not yet in the
            basket) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
            Here for signature reasons, unused for this model
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (floats) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            List of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)
        user_batch: np.ndarray
            Batch of user IDs (integers) for each purchased item
            Shape must be (batch_size,)
        is_training: bool
            Whether the model is in training mode or not, to activate dropout if needed.
            True by default, cause compute_batch_loss is only used during training.

        Returns
        -------
        tf.Variable
            Value of the loss for the batch (Hinge loss),
            Shape must be (1,)
        _: None
            Placeholder to match the signature of the parent class method
        """
        _ = future_batch  # Unused for this model
        _ = store_batch  # Unused for this model
        _ = week_batch  # Unused for this model
        _ = price_batch  # Unused for this model

        batch_size = len(item_batch)

        negative_samples = tf.stack(
            [
                self.get_negative_samples(
                    available_items=available_item_batch[idx],
                    purchased_items=basket_batch[idx],
                    next_item=item_batch[idx],
                    n_samples=self.n_negative_samples,
                )
                for idx in range(batch_size)
            ],
            axis=0,
        )  # Shape: (batch_size, n_negative_samples)

        item_batch = tf.cast(item_batch, tf.int32)
        negative_samples = tf.cast(negative_samples, tf.int32)

        augmented_item_batch = tf.cast(
            tf.concat([tf.expand_dims(item_batch, axis=-1), negative_samples], axis=1),
            dtype=tf.int32,
        )  # Shape: (batch_size, 1 + n_negative_samples)

        basket_batch_ragged = tf.cast(
            tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
            dtype=tf.int32,
        )
        basket_batch = basket_batch_ragged.to_tensor(self.n_items)
        all_distances = self.compute_batch_distance(
            item_batch=augmented_item_batch,
            basket_batch=basket_batch,
            user_batch=user_batch,
            is_training=is_training,
        )  # Shape: (batch_size, 1 + n_negative_samples)

        positive_samples_distance = tf.gather(
            params=all_distances, indices=[0], axis=1
        )  # (batch_size, 1)
        neg = tf.gather(
            params=all_distances, indices=tf.range(1, self.n_negative_samples + 1), axis=1
        )  # (batch_size, n_negative_samples)
        pos = tf.tile(
            positive_samples_distance, [1, self.n_negative_samples]
        )  # (batch_size, n_negative_samples)

        ridge_regularization = self.l2_regularization * (
            tf.nn.l2_loss(self.U)
            + tf.nn.l2_loss(self.V)
            + tf.nn.l2_loss(self.X)
            + tf.nn.l2_loss(self.Wq)
            + tf.nn.l2_loss(self.Wk)
        )

        hinge_loss = (
            tf.maximum(float(0), self.hinge_margin + pos - neg) + ridge_regularization
        )  # (batch_size, n_negative_samples)
        total_loss = tf.reduce_sum(hinge_loss)

        # Normalize by the batch size and the number of negative samples
        if tf.reduce_any(self.hinge_margin > 0):
            return (
                total_loss / (batch_size * self.n_negative_samples * self.hinge_margin),
                _,
            )
        return total_loss / (batch_size * self.n_negative_samples), _

train_iter_method: str property

Method used to generate sub-baskets from a purchased one.

Available methods are: - 'shopper': randomly orders the purchases and creates the ordered sub-baskets: (1|0); (2|1); (3|1,2); (4|1,2,3); etc... - 'aleacarta': creates all the sub-baskets with N-1 items: (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

Returns:

Type Description
str

Data generation method.

trainable_weights property

Return the trainable weights of the model.

Returns:

Type Description
list

List of trainable weights (X, V, U, Wq, Wk).

__init__(latent_sizes={'short_term': 10, 'long_term': 10}, hinge_margin=0.5, short_term_ratio=0.5, n_negative_samples=2, optimizer='adam', callbacks=None, lr=0.001, epochs=10, batch_size=32, grad_clip_value=None, weight_decay=None, momentum=0.0, l2_regularization=0.0, dropout_rate=0.0, **kwargs)

Initialize the model with hyperparameters.

Parameters:

Name Type Description Default
latent_size int

Size of the item embeddings.

required
hinge_margin float

Margin parameter for the hinge loss.

0.5
short_term_weight float

Weighting factor between long-term and short-term preferences.

required
n_negative_samples int

Number of negative samples to use in training.

2
optimizer str

Optimizer to use for training. Default is "Adam".

'adam'
callbacks CallbackList or None

List of callbacks to use during training. Default is None.

None
lr float

Learning rate for the optimizer.

0.001
epochs int

Number of training epochs.

10
batch_size int

Size of the batches for training. Default is 32.

32
grad_clip_value float or None

Value for gradient clipping. Default is None (no clipping).

None
weight_decay float or None

Weight decay (L2 regularization) factor. Default is None (no weight decay).

None
momentum float

Momentum factor for optimizers that support it. Default is 0.0.

0.0
Source code in choice_learn/basket_models/self_attention_model.py
def __init__(
    self,
    latent_sizes: dict[str, int] = {"short_term": 10, "long_term": 10},
    hinge_margin: float = 0.5,
    short_term_ratio: float = 0.5,
    n_negative_samples: int = 2,
    optimizer: str = "adam",
    callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
    lr: float = 1e-3,
    epochs: int = 10,
    batch_size: int = 32,
    grad_clip_value: Union[float, None] = None,
    weight_decay: Union[float, None] = None,
    momentum: float = 0.0,
    l2_regularization: float = 0.0,
    dropout_rate: float = 0.0,
    **kwargs,
) -> None:
    """Initialize the model with hyperparameters.

    Parameters
    ----------
    latent_size : int
        Size of the item embeddings.
    hinge_margin : float
        Margin parameter for the hinge loss.
    short_term_weight : float
        Weighting factor between long-term and short-term preferences.
    n_negative_samples : int
        Number of negative samples to use in training.
    optimizer : str
        Optimizer to use for training. Default is "Adam".
    callbacks : tf.keras.callbacks.CallbackList or None
        List of callbacks to use during training. Default is None.
    lr : float
        Learning rate for the optimizer.
    epochs : int
        Number of training epochs.
    batch_size : int
        Size of the batches for training. Default is 32.
    grad_clip_value : float or None
        Value for gradient clipping. Default is None (no clipping).
    weight_decay : float or None
        Weight decay (L2 regularization) factor. Default is None (no weight decay).
    momentum : float
        Momentum factor for optimizers that support it. Default is 0.0.
    """
    self.instantiated = False

    for val in latent_sizes.keys():
        if val not in ["short_term", "long_term"]:
            raise ValueError(f"Unknown value for latent_sizes dict: {val}.")
    if "short_term" not in latent_sizes:
        latent_sizes["short_term"] = 10
    if "long_term" not in latent_sizes:
        latent_sizes["long_term"] = 10

    self.hinge_margin = hinge_margin
    self.short_term_ratio = short_term_ratio
    self.n_negative_samples = n_negative_samples

    self.latent_sizes = latent_sizes
    self.d = self.latent_sizes["short_term"]
    self.d_long = self.latent_sizes["long_term"]
    self.l2_regularization = l2_regularization
    self.dropout_rate = dropout_rate

    super().__init__(
        optimizer=optimizer,
        callbacks=callbacks,
        lr=lr,
        epochs=epochs,
        batch_size=batch_size,
        grad_clip_value=grad_clip_value,
        weight_decay=weight_decay,
        momentum=momentum,
        **kwargs,
    )

compute_batch_distance(item_batch, basket_batch, user_batch, is_training)

Compute the total distance (long + short term) of all the items in item_batch.

Parameters:

Name Type Description Default
item_batch ndarray

Batch of the purchased items ID (integers) for which to compute the distance from their basket. Shape must be (batch_size, None) (positive and negative samples concatenated together)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
user_batch ndarray

Batch of user IDs (integers) for each purchased item Shape must be (batch_size,)

required
is_training bool

Whether the model is in training mode or not, to activate dropout if needed.

required

Returns:

Name Type Description
total_distance Tensor

Total distance of all the items in item_batch from their ground truth embeddings Shape must be (batch_size, None)

Source code in choice_learn/basket_models/self_attention_model.py
def compute_batch_distance(
    self,
    item_batch: np.ndarray,
    basket_batch: np.ndarray,
    user_batch: np.ndarray,
    is_training: bool,
) -> tf.Tensor:
    """Compute the total distance (long + short term) of all the items in item_batch.

    Parameters
    ----------
    item_batch: np.ndarray
        Batch of the purchased items ID (integers) for which to compute the distance from their
        basket.
        Shape must be (batch_size, None)
        (positive and negative samples concatenated together)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    user_batch: np.ndarray
        Batch of user IDs (integers) for each purchased item
        Shape must be (batch_size,)
    is_training : bool
        Whether the model is in training mode or not, to activate dropout if needed.

    Returns
    -------
    total_distance: tf.Tensor
        Total distance of all the items in item_batch from their ground truth embeddings
        Shape must be (batch_size, None)
    """
    basket_batch_ragged = tf.cast(
        tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
        dtype=tf.int32,
    )
    basket_batch = basket_batch_ragged.to_tensor(self.n_items)
    basket_embedding, _ = self.embed_basket(basket_batch, is_training)  # Shape: (batch_size, d)

    long_distance = self.compute_batch_long_distance(item_batch, user_batch)

    short_distance = self.compute_batch_short_distance(item_batch, basket_embedding)
    return self.short_term_ratio * long_distance + (1 - self.short_term_ratio) * short_distance

compute_batch_long_distance(item_batch, user_batch)

Compute the long distance of all the items in item_batch given the user.

Parameters:

Name Type Description Default
item_batch Union[ndarray, Tensor]

Batch of the purchased items ID (integers) for which to compute the distance from their user. Shape must be (batch_size,) (positive and negative samples concatenated together)

required
user_batch ndarray

Batch of user IDs (integers) for each purchased item Shape must be (batch_size,)

required

Returns:

Name Type Description
long_term_distance Tensor

Distance of all the items in item_batch from their ground truth embedding (V) Shape must be (batch_size,)

Source code in choice_learn/basket_models/self_attention_model.py
def compute_batch_long_distance(
    self,
    item_batch: Union[np.ndarray, tf.Tensor],
    user_batch: np.ndarray,
) -> tf.Tensor:
    """Compute the long distance of all the items in item_batch given the user.

    Parameters
    ----------
    item_batch: np.ndarray or tf.Tensor
        Batch of the purchased items ID (integers) for which to compute the distance from their
        user.
        Shape must be (batch_size,)
        (positive and negative samples concatenated together)

    user_batch: np.ndarray
        Batch of user IDs (integers) for each purchased item
        Shape must be (batch_size,)

    Returns
    -------
    long_term_distance: tf.Tensor
        Distance of all the items in item_batch from their ground truth embedding (V)
        Shape must be (batch_size,)
    """
    v_future_batch = tf.gather(self.V, indices=item_batch)  # Shape: (batch_size, d)

    u_user_batch = tf.gather(self.U, indices=user_batch)  # Shape: (batch_size, d)
    return tf.reduce_sum(
        tf.square(tf.expand_dims(u_user_batch, axis=1) - v_future_batch), axis=-1
    )  # Shape: (batch_size, 1)

compute_batch_loss(item_batch, basket_batch, future_batch, store_batch, week_batch, price_batch, available_item_batch, user_batch, is_training=True)

Compute total loss.

Parameters:

Name Type Description Default
item_batch ndarray

Batch of purchased items ID (integers) Shape must be (batch_size,)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
future_batch ndarray

Batch of items to be purchased in the future (ID of items not yet in the basket) (arrays) for each purchased item Shape must be (batch_size, max_basket_size) Here for signature reasons, unused for this model

required
store_batch ndarray

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch ndarray

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch ndarray

Batch of prices (floats) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

List of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required
user_batch ndarray

Batch of user IDs (integers) for each purchased item Shape must be (batch_size,)

required
is_training bool

Whether the model is in training mode or not, to activate dropout if needed. True by default, cause compute_batch_loss is only used during training.

True

Returns:

Name Type Description
Variable

Value of the loss for the batch (Hinge loss), Shape must be (1,)

_ None

Placeholder to match the signature of the parent class method

Source code in choice_learn/basket_models/self_attention_model.py
def compute_batch_loss(
    self,
    item_batch: np.ndarray,
    basket_batch: np.ndarray,
    future_batch: np.ndarray,
    store_batch: np.ndarray,
    week_batch: np.ndarray,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
    user_batch: np.ndarray,
    is_training: bool = True,
) -> tuple[tf.Variable]:
    """Compute total loss.

    Parameters
    ----------
    item_batch: np.ndarray
        Batch of purchased items ID (integers)
        Shape must be (batch_size,)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    future_batch: np.ndarray
        Batch of items to be purchased in the future (ID of items not yet in the
        basket) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
        Here for signature reasons, unused for this model
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (floats) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        List of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)
    user_batch: np.ndarray
        Batch of user IDs (integers) for each purchased item
        Shape must be (batch_size,)
    is_training: bool
        Whether the model is in training mode or not, to activate dropout if needed.
        True by default, cause compute_batch_loss is only used during training.

    Returns
    -------
    tf.Variable
        Value of the loss for the batch (Hinge loss),
        Shape must be (1,)
    _: None
        Placeholder to match the signature of the parent class method
    """
    _ = future_batch  # Unused for this model
    _ = store_batch  # Unused for this model
    _ = week_batch  # Unused for this model
    _ = price_batch  # Unused for this model

    batch_size = len(item_batch)

    negative_samples = tf.stack(
        [
            self.get_negative_samples(
                available_items=available_item_batch[idx],
                purchased_items=basket_batch[idx],
                next_item=item_batch[idx],
                n_samples=self.n_negative_samples,
            )
            for idx in range(batch_size)
        ],
        axis=0,
    )  # Shape: (batch_size, n_negative_samples)

    item_batch = tf.cast(item_batch, tf.int32)
    negative_samples = tf.cast(negative_samples, tf.int32)

    augmented_item_batch = tf.cast(
        tf.concat([tf.expand_dims(item_batch, axis=-1), negative_samples], axis=1),
        dtype=tf.int32,
    )  # Shape: (batch_size, 1 + n_negative_samples)

    basket_batch_ragged = tf.cast(
        tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
        dtype=tf.int32,
    )
    basket_batch = basket_batch_ragged.to_tensor(self.n_items)
    all_distances = self.compute_batch_distance(
        item_batch=augmented_item_batch,
        basket_batch=basket_batch,
        user_batch=user_batch,
        is_training=is_training,
    )  # Shape: (batch_size, 1 + n_negative_samples)

    positive_samples_distance = tf.gather(
        params=all_distances, indices=[0], axis=1
    )  # (batch_size, 1)
    neg = tf.gather(
        params=all_distances, indices=tf.range(1, self.n_negative_samples + 1), axis=1
    )  # (batch_size, n_negative_samples)
    pos = tf.tile(
        positive_samples_distance, [1, self.n_negative_samples]
    )  # (batch_size, n_negative_samples)

    ridge_regularization = self.l2_regularization * (
        tf.nn.l2_loss(self.U)
        + tf.nn.l2_loss(self.V)
        + tf.nn.l2_loss(self.X)
        + tf.nn.l2_loss(self.Wq)
        + tf.nn.l2_loss(self.Wk)
    )

    hinge_loss = (
        tf.maximum(float(0), self.hinge_margin + pos - neg) + ridge_regularization
    )  # (batch_size, n_negative_samples)
    total_loss = tf.reduce_sum(hinge_loss)

    # Normalize by the batch size and the number of negative samples
    if tf.reduce_any(self.hinge_margin > 0):
        return (
            total_loss / (batch_size * self.n_negative_samples * self.hinge_margin),
            _,
        )
    return total_loss / (batch_size * self.n_negative_samples), _

compute_batch_short_distance(item_batch, basket_embedding)

Compute the short distance of the items in item_batch given the items in basket_batch.

Parameters:

Name Type Description Default
item_batch Union[ndarray, Tensor]

Batch of the purchased items ID (integers) for which to compute the distance from their basket. Shape must be (batch_size,) (positive and negative samples concatenated together)

required
basket_embedding Tensor

Batch of context embeddings for each purchased item Shape must be (batch_size, latent_size)

required

Returns:

Name Type Description
short_term_distance Tensor

Distance of all the items in item_batch from their ground truth embedding (X) Shape must be (batch_size,)

Source code in choice_learn/basket_models/self_attention_model.py
def compute_batch_short_distance(
    self,
    item_batch: Union[np.ndarray, tf.Tensor],
    basket_embedding: tf.Tensor,
) -> tf.Tensor:
    """Compute the short distance of the items in item_batch given the items in basket_batch.

    Parameters
    ----------
    item_batch: or tf.Tensor
        Batch of the purchased items ID (integers) for which to compute the distance from their
        basket.
        Shape must be (batch_size,)
        (positive and negative samples concatenated together)
    basket_embedding: tf.Tensor
        Batch of context embeddings for each purchased item
        Shape must be (batch_size, latent_size)

    Returns
    -------
    short_term_distance: tf.Tensor
        Distance of all the items in item_batch from their ground truth embedding (X)
        Shape must be (batch_size,)
    """
    x_item_target = tf.gather(self.X, indices=item_batch)  # Shape: (batch_size, d)

    return tf.reduce_sum(
        tf.square(tf.expand_dims(basket_embedding, axis=1) - x_item_target), axis=-1
    )

compute_batch_utility(item_batch, basket_batch, store_batch, week_batch, price_batch, available_item_batch, user_batch, is_training=False)

Compute the utility of all the items in item_batch.

Parameters:

Name Type Description Default
item_batch

Batch of the purchased items ID (integers) for which to compute the utility from their basket. Shape must be (batch_size,)

required
basket_batch

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
store_batch

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch

Batch of prices (floats) for each purchased item Shape must be (batch_size,)

required
available_item_batch

List of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required
user_batch

Batch of user IDs (integers) for each purchased item Shape must be (batch_size,)

required
is_training bool

Whether the model is in training mode or not, to activate dropout if needed.

False

Returns:

Type Description
Tensor

Utility of all the items in item_batch Shape must be (batch_size,)

Source code in choice_learn/basket_models/self_attention_model.py
def compute_batch_utility(
    self,
    item_batch,
    basket_batch,
    store_batch,
    week_batch,
    price_batch,
    available_item_batch,
    user_batch,
    is_training: bool = False,
) -> tf.Tensor:
    """Compute the utility of all the items in item_batch.

    Parameters
    ----------
    item_batch: np.ndarray
        Batch of the purchased items ID (integers) for which to compute the utility from their
        basket.
        Shape must be (batch_size,)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (floats) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        List of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)
    user_batch: np.ndarray
        Batch of user IDs (integers) for each purchased item
        Shape must be (batch_size,)
    is_training : bool
        Whether the model is in training mode or not, to activate dropout if needed.

    Returns
    -------
    tf.Tensor
        Utility of all the items in item_batch
        Shape must be (batch_size,)
    """
    _ = store_batch  # Unused for this model
    _ = week_batch  # Unused for this model
    _ = price_batch  # Unused for this model
    _ = available_item_batch  # Unused for this model
    return -self.compute_batch_distance(
        item_batch=item_batch,
        basket_batch=basket_batch,
        user_batch=user_batch,
        is_training=is_training,
    )

embed_basket(basket_batch, is_training=False)

Return the context embedding matrix.

Returns:

Type Description
basket_embedding : tf.Tensor
[batch_size, latent_size] tf.Tensor
Tensor containing the vector of contexts embeddings.

attention_weights : tf.Tensor [batch_size, L, L] tf.Tensor Tensor containing the attention matrix.

Source code in choice_learn/basket_models/self_attention_model.py
def embed_basket(self, basket_batch: tf.Tensor, is_training: bool = False) -> tf.Tensor:
    """Return the context embedding matrix.

    Parameters
    ----------
        basket_batch : tf.Tensor
            [batch_size, L]
            Tensor containing the list of the context items.
        is_training : bool
            Whether the model is in training mode or not, to activate dropout if needed.

    Returns
    -------
        basket_embedding : tf.Tensor
            [batch_size, latent_size] tf.Tensor
            Tensor containing the vector of contexts embeddings.
        attention_weights : tf.Tensor
            [batch_size, L, L] tf.Tensor
            Tensor containing the attention matrix.
    """
    padding_vector = tf.zeros(shape=[1, self.d])  # Shape (1, d)
    padded_items = tf.concat([self.X, padding_vector], axis=0)
    x_basket = tf.gather(padded_items, indices=basket_batch)  # Shape: (batch_size, L, d)

    q_prime = tf.nn.relu(tf.matmul(x_basket, self.Wq))  # Shape: (batch_size, L, d)
    k_prime = tf.nn.relu(tf.matmul(x_basket, self.Wk))

    if is_training:
        q_prime = tf.nn.dropout(q_prime, rate=self.dropout_rate)
        k_prime = tf.nn.dropout(k_prime, rate=self.dropout_rate)

    scores = tf.matmul(q_prime, k_prime, transpose_b=True)
    scaled_scores = scores / tf.sqrt(float(self.d))
    attention_weights = self.masked_attention(
        basket_batch, scaled_scores
    )  # Shape: (batch_size, L, L)

    attention_output = tf.matmul(attention_weights, x_basket)  # Shape: (batch_size, L, d)

    mask = tf.not_equal(basket_batch, self.n_items)
    mask_float = tf.cast(mask, dtype=tf.float32)
    mask_float = tf.expand_dims(mask_float, axis=-1)
    masked_attention_output = attention_output * mask_float  # (batch_size, L, d)

    # Number of items in each basket (excluding padding)
    num_items_by_basket = tf.reduce_sum(mask_float, axis=1)  # (batch_size, 1)

    basket_embedding = tf.math.divide_no_nan(
        tf.reduce_sum(masked_attention_output, axis=1, keepdims=True),
        num_items_by_basket[:, tf.newaxis, :],
    )
    basket_embedding = tf.squeeze(basket_embedding, axis=1)  # Shape: (batch_size,)
    # ----- Handle empty baskets by assigning a padding embedding -----
    padding_embedding = self.X[-1]  # shape: (d,)
    empty_basket_mask = tf.squeeze(tf.equal(num_items_by_basket, 0), axis=1)  # (batch_size,)
    basket_embedding = tf.where(
        tf.expand_dims(empty_basket_mask, axis=1),
        tf.broadcast_to(padding_embedding, tf.shape(basket_embedding)),
        basket_embedding,
    )
    # -----------------------------------------------------------------
    return basket_embedding, attention_weights

get_negative_samples(available_items, purchased_items, next_item, n_samples)

Sample randomly a set of items.

(set of items not already purchased and not necessarily from the basket)

Parameters:

Name Type Description Default
available_items ndarray

Matrix indicating the availability (1) or not (0) of the products Shape must be (n_items,)

required
purchased_items ndarray

List of items already purchased (already in the basket)

required
next_item int

Next item (to be added in the basket)

required
n_samples int

Number of samples to draw

required

Returns:

Type Description
list[int]

Random sample of items, each of them distinct from the next item and from the items already in the basket

Source code in choice_learn/basket_models/self_attention_model.py
def get_negative_samples(
    self,
    available_items: np.ndarray,
    purchased_items: np.ndarray,
    next_item: int,
    n_samples: int,
) -> list[int]:
    """Sample randomly a set of items.

    (set of items not already purchased and *not necessarily* from the basket)

    Parameters
    ----------
    available_items: np.ndarray
        Matrix indicating the availability (1) or not (0) of the products
        Shape must be (n_items,)
    purchased_items: np.ndarray
        List of items already purchased (already in the basket)
    next_item: int
        Next item (to be added in the basket)
    n_samples: int
        Number of samples to draw

    Returns
    -------
    list[int]
        Random sample of items, each of them distinct from
        the next item and from the items already in the basket
    """
    # Convert inputs to tensors
    available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
    purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
    next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

    # Get the list of available items based on the availability matrix
    item_ids = tf.range(self.n_items)
    available_mask = tf.equal(available_items, 1)
    assortment = tf.boolean_mask(item_ids, available_mask)

    not_to_be_chosen = tf.concat([purchased_items, tf.expand_dims(next_item, axis=0)], axis=0)

    # Sample negative items from the assortment excluding not_to_be_chosen
    negative_samples = tf.boolean_mask(
        tensor=assortment,
        # Reduce the 2nd dimension of the boolean mask to get a 1D mask
        mask=~tf.reduce_any(
            tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
        ),
    )

    error_message = (
        "The number of negative samples to draw must be less than "
        "the number of available items not already purchased and "
        "distinct from the next item."
    )
    # Raise an error if n_samples > tf.size(negative_samples)
    tf.debugging.assert_greater_equal(
        tf.size(negative_samples), n_samples, message=error_message
    )

    # Randomize the sampling
    negative_samples = tf.random.shuffle(negative_samples)

    # Keep only n_samples
    return negative_samples[:n_samples]

instantiate(n_items, n_users)

Initialize the model parameters.

Parameters:

Name Type Description Default
n_items int

Number of unique items in the dataset.

required
n_users int

Number of unique users in the dataset.

required
Variables

X : tf.Variable Item embedding matrix for short-term preferences, size (n_items, d). V : tf.Variable Item embedding matrix for long-term preferences, size (n_items, d_long). U : tf.Variable User embedding matrix for long-term preferences, size (n_users, d_long). Wq : tf.Variable Weight matrix for query transformation in attention mechanism, size (d, d). Wk : tf.Variable Weight matrix for key transformation in attention mechanism, size (d, d).

Source code in choice_learn/basket_models/self_attention_model.py
def instantiate(
    self,
    n_items: int,
    n_users: int,
) -> None:
    """Initialize the model parameters.

    Parameters
    ----------
    n_items : int
        Number of unique items in the dataset.
    n_users : int
        Number of unique users in the dataset.

    Variables
    ----------
    X : tf.Variable
        Item embedding matrix for short-term preferences, size (n_items, d).
    V : tf.Variable
        Item embedding matrix for long-term preferences, size (n_items, d_long).
    U : tf.Variable
        User embedding matrix for long-term preferences, size (n_users, d_long).
    Wq : tf.Variable
        Weight matrix for query transformation in attention mechanism, size (d, d).
    Wk : tf.Variable
        Weight matrix for key transformation in attention mechanism, size (d, d).
    """
    self.n_items = n_items
    self.n_users = n_users
    ##############

    self.X = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(n_items + 1, self.d)),
        trainable=True,
        name="X",
    )

    self.V = tf.Variable(
        tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=42)(
            shape=(n_items, self.d_long)
        ),
        trainable=True,
        name="V",
    )

    self.U = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(
            shape=(self.n_users, self.d_long)
        ),
        trainable=True,
        name="U",
    )

    self.Wq = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(self.d, self.d)),
        trainable=True,
        name="Wq",
    )

    self.Wk = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=0.01, seed=42)(shape=(self.d, self.d)),
        trainable=True,
        name="Wk",
    )

    self.instantiated = True

masked_attention(basket_batch, scaled_scores)

Compute the masked attention weights.

Applying a mask to ignore padding items. Also applied a mask on the diagonal to avoid attending to the same item, if activated

Source code in choice_learn/basket_models/self_attention_model.py
def masked_attention(self, basket_batch, scaled_scores):
    """Compute the masked attention weights.

    Applying a mask to ignore padding items. Also applied a mask on
    the diagonal to avoid attending to the same item, if activated
    """
    batch_size = tf.shape(basket_batch)[0]
    mask = tf.not_equal(
        basket_batch, self.n_items
    )  # shape: (batch_size, L), True si pas padding

    if tf.shape(basket_batch)[1] == 1:
        attention_weights = tf.ones_like(scaled_scores)  # Shape: (batch_size, L, 1)

    else:
        # Masque de la diagonale, désactivé pour l'instant
        diag_mask = tf.eye(tf.shape(basket_batch)[1], batch_shape=[batch_size], dtype=tf.bool)
        scaled_scores = tf.where(
            diag_mask,
            tf.constant(-np.inf, dtype=scaled_scores.dtype),
            scaled_scores,
        )

        # Masque des padding items
        mask_col = tf.expand_dims(mask, axis=1)  # (batch_size, 1, L)
        scaled_scores = tf.where(
            mask_col, scaled_scores, tf.constant(-np.inf, dtype=scaled_scores.dtype)
        )

        all_inf_row = tf.reduce_all(tf.math.is_inf(scaled_scores), axis=-1)  # (batch_size, L)
        # We set to zero the first value of the rows where all values are -inf to avoid NaNs in
        # softmax
        indices = tf.where(all_inf_row)
        indices_full = tf.concat([indices, tf.zeros_like(indices[:, :1])], axis=1)
        updates = tf.zeros([tf.shape(indices_full)[0]], dtype=scaled_scores.dtype)
        scaled_scores = tf.tensor_scatter_nd_update(scaled_scores, indices_full, updates)

        attention_weights = tf.nn.softmax(scaled_scores, axis=-1)  # Shape: (batch_size, L, L)

    return attention_weights