Skip to content

Basic Attention Model

Implementation of an attention-based model for item recommendation.

AttentionBasedContextEmbedding

Bases: BaseBasketModel

Class for the attention-based model.

Wang, Shoujin, Liang Hu, Longbing Cao, Xiaoshui Huang, Defu Lian, and Wei Liu. "Attention-based transactional context embedding for next-item recommendation." In Proceedings of the AAAI conference on artificial intelligence, vol. 32, no. 1. 2018.

Source code in choice_learn/basket_models/basic_attention_model.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
class AttentionBasedContextEmbedding(BaseBasketModel):
    """
    Class for the attention-based model.

    Wang, Shoujin, Liang Hu, Longbing Cao, Xiaoshui Huang, Defu Lian, and Wei Liu.
    "Attention-based transactional context embedding for next-item recommendation."
    In Proceedings of the AAAI conference on artificial intelligence, vol. 32, no. 1. 2018.
    """

    def __init__(
        self,
        latent_size: int = 4,
        n_negative_samples: int = 2,
        nce_distribution="natural",
        optimizer: str = "adam",
        callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
        lr: float = 1e-3,
        epochs: int = 10,
        batch_size: int = 32,
        grad_clip_value: Union[float, None] = None,
        weight_decay: Union[float, None] = None,
        momentum: float = 0.0,
        **kwargs,
    ) -> None:
        """Initialize the model with hyperparameters.

        Parameters
        ----------
        epochs : int
            Number of training epochs.
        lr : float
            Learning rate for the optimizer.
        latent_size : int
            Size of the item embeddings.
        n_negative_samples : int
            Number of negative samples to use in training.
        batch_size : int
            Size of the batches for training. Default is 50.
        optimizer : str
            Optimizer to use for training. Default is "Adam".
        nce_distribution: str
            Items distribution to be used to compute the NCE Loss
            Currentlry available: 'natural' to estimate the distribution
            from the train dataset and 'uniform' where all items have the
            same disitrbution, 1/n_items. Default is 'natural'.
        """
        self.instantiated = False

        self.latent_size = latent_size
        self.n_negative_samples = n_negative_samples
        self.nce_distribution = nce_distribution

        super().__init__(
            optimizer=optimizer,
            callbacks=callbacks,
            lr=lr,
            epochs=epochs,
            batch_size=batch_size,
            grad_clip_value=grad_clip_value,
            weight_decay=weight_decay,
            momentum=momentum,
            **kwargs,
        )

    def instantiate(
        self,
        n_items: int,
    ) -> None:
        """Initialize the model parameters.

        Parameters
        ----------
        n_items : int
            Number of unique items in the dataset.
        """
        self.n_items = n_items

        self.Wi = tf.Variable(
            tf.random.normal((self.n_items, self.latent_size), stddev=0.1, seed=42), name="Wi"
        )
        self.Wo = tf.Variable(
            tf.random.normal((self.n_items, self.latent_size), stddev=0.1, seed=42), name="Wo"
        )
        self.wa = tf.Variable(tf.random.normal((self.latent_size,), stddev=0.1, seed=42), name="wa")

        self.empty_context_embedding = tf.Variable(
            tf.random.normal((self.latent_size,), stddev=0.1, seed=42),
            name="empty_context_embedding",
        )

        self.loss = NoiseConstrastiveEstimation()
        self.is_trained = False
        self.instantiated = True

    @property
    def trainable_weights(self):
        """Return the trainable weights of the model.

        Returns
        -------
            list
                List of trainable weights (Wi, wa, Wo).
        """
        return [self.Wi, self.wa, self.Wo, self.empty_context_embedding]

    @property
    def train_iter_method(self) -> str:
        """Method used to generate sub-baskets from a purchased one.

        Available methods are:
        - 'shopper': randomly orders the purchases and creates the ordered sub-baskets:
                        (1|0); (2|1); (3|1,2); (4|1,2,3); etc...
        - 'aleacarta': creates all the sub-baskets with N-1 items:
                        (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

        Returns
        -------
        str
            Data generation method.
        """
        return "aleacarta"

    def embed_context(self, context_items: tf.Tensor) -> tf.Tensor:
        """Return the context embedding matrix.

        Parameters
        ----------
            context_items : tf.Tensor
                [batch_size, variable_length] tf.RaggedTensor
                Tensor containing the list of the context items.

        Returns
        -------
            tf.Tensor
                [batch_size, latent_size] tf.Tensor
                Tensor containing the matrix of contexts embeddings.
        """
        context_emb = tf.gather(self.Wi, tf.cast(context_items, tf.int32), axis=0)
        return tf.map_fn(
            lambda x: tf.cond(
                tf.equal(tf.shape(x)[0], 0),
                lambda: self.empty_context_embedding,
                lambda: tf.reduce_sum(
                    tf.transpose(x) * tf.nn.softmax(tf.tensordot(x, self.wa, axes=1)),
                    axis=1,
                ),
            ),
            context_emb,
            fn_output_signature=tf.float32,
        )

    def compute_batch_utility(
        self,
        item_batch: Union[np.ndarray, tf.Tensor],
        basket_batch: np.ndarray,
        store_batch: np.ndarray,
        week_batch: np.ndarray,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
    ) -> tf.Tensor:
        """Compute the utility of all the items in item_batch given the items in basket_batch.

        Parameters
        ----------
        item_batch: np.ndarray or tf.Tensor
            Batch of the purchased items ID (integers) for which to compute the utility
            Shape must be (batch_size,)
            (positive and negative samples concatenated together)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (floats) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            Batch of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)

        Returns
        -------
        item_utilities: tf.Tensor
            Utility of all the items in item_batch
            Shape must be (batch_size,)
        """
        _ = store_batch
        _ = price_batch
        _ = week_batch
        _ = available_item_batch
        basket_batch_ragged = tf.cast(
            tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
            dtype=tf.int32,
        )
        context_embedding = self.embed_context(basket_batch_ragged)
        return tf.reduce_sum(
            tf.multiply(tf.gather(self.Wo, tf.cast(item_batch, tf.int32)), context_embedding),
            axis=1,
        )

    def get_negative_samples(
        self,
        available_items: np.ndarray,
        purchased_items: np.ndarray,
        next_item: int,
        n_samples: int,
    ) -> list[int]:
        """Sample randomly a set of items.

        (set of items not already purchased and *not necessarily* from the basket)

        Parameters
        ----------
        available_items: np.ndarray
            Matrix indicating the availability (1) or not (0) of the products
            Shape must be (n_items,)
        purchased_items: np.ndarray
            List of items already purchased (already in the basket)
        next_item: int
            Next item (to be added in the basket)
        n_samples: int
            Number of samples to draw

        Returns
        -------
        list[int]
            Random sample of items, each of them distinct from
            the next item and from the items already in the basket
        """
        # Convert inputs to tensors
        available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
        purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
        next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

        # Get the list of available items based on the availability matrix
        item_ids = tf.range(self.n_items)
        available_mask = tf.equal(available_items, 1)
        assortment = tf.boolean_mask(item_ids, available_mask)

        not_to_be_chosen = tf.concat([purchased_items, tf.expand_dims(next_item, axis=0)], axis=0)

        # Sample negative items from the assortment excluding not_to_be_chosen
        negative_samples = tf.boolean_mask(
            tensor=assortment,
            # Reduce the 2nd dimension of the boolean mask to get a 1D mask
            mask=~tf.reduce_any(
                tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
            ),
        )

        error_message = (
            "The number of negative samples to draw must be less than "
            "the number of available items not already purchased and "
            "distinct from the next item."
        )
        # Raise an error if n_samples > tf.size(negative_samples)
        tf.debugging.assert_greater_equal(
            tf.size(negative_samples), n_samples, message=error_message
        )

        # Randomize the sampling
        negative_samples = tf.random.shuffle(negative_samples)

        # Keep only n_samples
        return negative_samples[:n_samples]

    def _get_items_frequencies(self, dataset: TripDataset) -> tf.Tensor:
        """Count the occurrences of each item in the dataset.

        Parameters
        ----------
            dataset : TripDataset
                Dataset containing the baskets.

        Returns
        -------
            tf.Tensor
                Tensor containing the count of each item.
        """
        item_counts = np.zeros(self.n_items, dtype=np.int32)
        for trip in dataset.trips:
            for item in trip.purchases:
                item_counts[item] += 1
        items_distribution = item_counts / item_counts.sum()
        return tf.constant(items_distribution, dtype=tf.float32)

    def compute_batch_loss(
        self,
        item_batch: np.ndarray,
        basket_batch: np.ndarray,
        future_batch: np.ndarray,
        store_batch: np.ndarray,
        week_batch: np.ndarray,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
    ) -> tuple[tf.Variable]:
        """Compute log-likelihood and loss for one batch of items.

        Parameters
        ----------
        item_batch: np.ndarray
            Batch of purchased items ID (integers)
            Shape must be (batch_size,)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        future_batch: np.ndarray
            Batch of items to be purchased in the future (ID of items not yet in the
            basket) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
            Here for signature reasons, unused for this model
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (floats) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            List of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)

        Returns
        -------
        tf.Variable
            Value of the loss for the batch (binary cross-entropy),
            Shape must be (1,)
        loglikelihood: tf.Variable
            Computed log-likelihood of the batch of items
            Approximated by difference of utilities between positive and negative samples
            Shape must be (1,)
        """
        _ = future_batch
        negative_samples = tf.transpose(
            tf.stack(
                [
                    self.get_negative_samples(
                        available_items=available_item_batch[idx],
                        purchased_items=basket_batch[idx],
                        next_item=item_batch[idx],
                        n_samples=self.n_negative_samples,
                    )
                    for idx in range(len(item_batch))
                ],
                axis=0,
            ),
        )
        pos_score = self.compute_batch_utility(
            item_batch, basket_batch, store_batch, week_batch, price_batch, available_item_batch
        )
        neg_scores = tf.map_fn(
            lambda neg_items: self.compute_batch_utility(
                item_batch=neg_items,
                basket_batch=basket_batch,
                store_batch=store_batch,
                week_batch=week_batch,
                price_batch=price_batch,
                available_item_batch=available_item_batch,
            ),
            negative_samples,
            fn_output_signature=tf.float32,
        )
        # neg_scores = tf.reshape(neg_scores, (-1, self.n_negative_samples))
        return self.loss(
            logit_true=pos_score,
            logit_negative=tf.transpose(neg_scores),
            freq_true=tf.gather(self.negative_samples_distribution, tf.cast(item_batch, tf.int32)),
            freq_negative=tf.gather(
                self.negative_samples_distribution,
                tf.cast(tf.transpose(negative_samples), tf.int32),
            ),
        ), 1e-10

    def fit(
        self,
        trip_dataset: TripDataset,
        val_dataset: Union[TripDataset, None] = None,
        verbose: int = 0,
    ) -> None:
        """Trains the model for a specified number of epochs.

        Parameters
        ----------
            dataset : TripDataset
                Dataset of baskets to train the model on.
        """
        if not self.instantiated:
            self.instantiate(n_items=trip_dataset.n_items)

        if not isinstance(trip_dataset, TripDataset):
            raise TypeError("Dataset must be a TripDataset.")

        if (
            max([len(trip.purchases) for trip in trip_dataset.trips]) + self.n_negative_samples
            > self.n_items
        ):
            raise ValueError(
                "The number of items in the dataset is less than the number of negative samples."
            )

        if self.nce_distribution == "natural":
            self.negative_samples_distribution = self._get_items_frequencies(trip_dataset)
        else:
            self.negative_samples_distribution = (1 / trip_dataset.n_items) * np.ones(
                (trip_dataset.n_items,)
            )

        history = super().fit(trip_dataset=trip_dataset, val_dataset=val_dataset, verbose=verbose)

        self.is_trained = True

        return history

train_iter_method: str property

Method used to generate sub-baskets from a purchased one.

Available methods are: - 'shopper': randomly orders the purchases and creates the ordered sub-baskets: (1|0); (2|1); (3|1,2); (4|1,2,3); etc... - 'aleacarta': creates all the sub-baskets with N-1 items: (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

Returns:

Type Description
str

Data generation method.

trainable_weights property

Return the trainable weights of the model.

Returns:

Type Description
list

List of trainable weights (Wi, wa, Wo).

__init__(latent_size=4, n_negative_samples=2, nce_distribution='natural', optimizer='adam', callbacks=None, lr=0.001, epochs=10, batch_size=32, grad_clip_value=None, weight_decay=None, momentum=0.0, **kwargs)

Initialize the model with hyperparameters.

Parameters:

Name Type Description Default
epochs int

Number of training epochs.

10
lr float

Learning rate for the optimizer.

0.001
latent_size int

Size of the item embeddings.

4
n_negative_samples int

Number of negative samples to use in training.

2
batch_size int

Size of the batches for training. Default is 50.

32
optimizer str

Optimizer to use for training. Default is "Adam".

'adam'
nce_distribution

Items distribution to be used to compute the NCE Loss Currentlry available: 'natural' to estimate the distribution from the train dataset and 'uniform' where all items have the same disitrbution, 1/n_items. Default is 'natural'.

'natural'
Source code in choice_learn/basket_models/basic_attention_model.py
def __init__(
    self,
    latent_size: int = 4,
    n_negative_samples: int = 2,
    nce_distribution="natural",
    optimizer: str = "adam",
    callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
    lr: float = 1e-3,
    epochs: int = 10,
    batch_size: int = 32,
    grad_clip_value: Union[float, None] = None,
    weight_decay: Union[float, None] = None,
    momentum: float = 0.0,
    **kwargs,
) -> None:
    """Initialize the model with hyperparameters.

    Parameters
    ----------
    epochs : int
        Number of training epochs.
    lr : float
        Learning rate for the optimizer.
    latent_size : int
        Size of the item embeddings.
    n_negative_samples : int
        Number of negative samples to use in training.
    batch_size : int
        Size of the batches for training. Default is 50.
    optimizer : str
        Optimizer to use for training. Default is "Adam".
    nce_distribution: str
        Items distribution to be used to compute the NCE Loss
        Currentlry available: 'natural' to estimate the distribution
        from the train dataset and 'uniform' where all items have the
        same disitrbution, 1/n_items. Default is 'natural'.
    """
    self.instantiated = False

    self.latent_size = latent_size
    self.n_negative_samples = n_negative_samples
    self.nce_distribution = nce_distribution

    super().__init__(
        optimizer=optimizer,
        callbacks=callbacks,
        lr=lr,
        epochs=epochs,
        batch_size=batch_size,
        grad_clip_value=grad_clip_value,
        weight_decay=weight_decay,
        momentum=momentum,
        **kwargs,
    )

compute_batch_loss(item_batch, basket_batch, future_batch, store_batch, week_batch, price_batch, available_item_batch)

Compute log-likelihood and loss for one batch of items.

Parameters:

Name Type Description Default
item_batch ndarray

Batch of purchased items ID (integers) Shape must be (batch_size,)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
future_batch ndarray

Batch of items to be purchased in the future (ID of items not yet in the basket) (arrays) for each purchased item Shape must be (batch_size, max_basket_size) Here for signature reasons, unused for this model

required
store_batch ndarray

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch ndarray

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch ndarray

Batch of prices (floats) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

List of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required

Returns:

Name Type Description
Variable

Value of the loss for the batch (binary cross-entropy), Shape must be (1,)

loglikelihood Variable

Computed log-likelihood of the batch of items Approximated by difference of utilities between positive and negative samples Shape must be (1,)

Source code in choice_learn/basket_models/basic_attention_model.py
def compute_batch_loss(
    self,
    item_batch: np.ndarray,
    basket_batch: np.ndarray,
    future_batch: np.ndarray,
    store_batch: np.ndarray,
    week_batch: np.ndarray,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
) -> tuple[tf.Variable]:
    """Compute log-likelihood and loss for one batch of items.

    Parameters
    ----------
    item_batch: np.ndarray
        Batch of purchased items ID (integers)
        Shape must be (batch_size,)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    future_batch: np.ndarray
        Batch of items to be purchased in the future (ID of items not yet in the
        basket) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
        Here for signature reasons, unused for this model
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (floats) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        List of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)

    Returns
    -------
    tf.Variable
        Value of the loss for the batch (binary cross-entropy),
        Shape must be (1,)
    loglikelihood: tf.Variable
        Computed log-likelihood of the batch of items
        Approximated by difference of utilities between positive and negative samples
        Shape must be (1,)
    """
    _ = future_batch
    negative_samples = tf.transpose(
        tf.stack(
            [
                self.get_negative_samples(
                    available_items=available_item_batch[idx],
                    purchased_items=basket_batch[idx],
                    next_item=item_batch[idx],
                    n_samples=self.n_negative_samples,
                )
                for idx in range(len(item_batch))
            ],
            axis=0,
        ),
    )
    pos_score = self.compute_batch_utility(
        item_batch, basket_batch, store_batch, week_batch, price_batch, available_item_batch
    )
    neg_scores = tf.map_fn(
        lambda neg_items: self.compute_batch_utility(
            item_batch=neg_items,
            basket_batch=basket_batch,
            store_batch=store_batch,
            week_batch=week_batch,
            price_batch=price_batch,
            available_item_batch=available_item_batch,
        ),
        negative_samples,
        fn_output_signature=tf.float32,
    )
    # neg_scores = tf.reshape(neg_scores, (-1, self.n_negative_samples))
    return self.loss(
        logit_true=pos_score,
        logit_negative=tf.transpose(neg_scores),
        freq_true=tf.gather(self.negative_samples_distribution, tf.cast(item_batch, tf.int32)),
        freq_negative=tf.gather(
            self.negative_samples_distribution,
            tf.cast(tf.transpose(negative_samples), tf.int32),
        ),
    ), 1e-10

compute_batch_utility(item_batch, basket_batch, store_batch, week_batch, price_batch, available_item_batch)

Compute the utility of all the items in item_batch given the items in basket_batch.

Parameters:

Name Type Description Default
item_batch Union[ndarray, Tensor]

Batch of the purchased items ID (integers) for which to compute the utility Shape must be (batch_size,) (positive and negative samples concatenated together)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
store_batch ndarray

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch ndarray

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch ndarray

Batch of prices (floats) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

Batch of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required

Returns:

Name Type Description
item_utilities Tensor

Utility of all the items in item_batch Shape must be (batch_size,)

Source code in choice_learn/basket_models/basic_attention_model.py
def compute_batch_utility(
    self,
    item_batch: Union[np.ndarray, tf.Tensor],
    basket_batch: np.ndarray,
    store_batch: np.ndarray,
    week_batch: np.ndarray,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
) -> tf.Tensor:
    """Compute the utility of all the items in item_batch given the items in basket_batch.

    Parameters
    ----------
    item_batch: np.ndarray or tf.Tensor
        Batch of the purchased items ID (integers) for which to compute the utility
        Shape must be (batch_size,)
        (positive and negative samples concatenated together)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (floats) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        Batch of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)

    Returns
    -------
    item_utilities: tf.Tensor
        Utility of all the items in item_batch
        Shape must be (batch_size,)
    """
    _ = store_batch
    _ = price_batch
    _ = week_batch
    _ = available_item_batch
    basket_batch_ragged = tf.cast(
        tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
        dtype=tf.int32,
    )
    context_embedding = self.embed_context(basket_batch_ragged)
    return tf.reduce_sum(
        tf.multiply(tf.gather(self.Wo, tf.cast(item_batch, tf.int32)), context_embedding),
        axis=1,
    )

embed_context(context_items)

Return the context embedding matrix.

Returns:

Type Description
tf.Tensor

[batch_size, latent_size] tf.Tensor Tensor containing the matrix of contexts embeddings.

Source code in choice_learn/basket_models/basic_attention_model.py
def embed_context(self, context_items: tf.Tensor) -> tf.Tensor:
    """Return the context embedding matrix.

    Parameters
    ----------
        context_items : tf.Tensor
            [batch_size, variable_length] tf.RaggedTensor
            Tensor containing the list of the context items.

    Returns
    -------
        tf.Tensor
            [batch_size, latent_size] tf.Tensor
            Tensor containing the matrix of contexts embeddings.
    """
    context_emb = tf.gather(self.Wi, tf.cast(context_items, tf.int32), axis=0)
    return tf.map_fn(
        lambda x: tf.cond(
            tf.equal(tf.shape(x)[0], 0),
            lambda: self.empty_context_embedding,
            lambda: tf.reduce_sum(
                tf.transpose(x) * tf.nn.softmax(tf.tensordot(x, self.wa, axes=1)),
                axis=1,
            ),
        ),
        context_emb,
        fn_output_signature=tf.float32,
    )

fit(trip_dataset, val_dataset=None, verbose=0)

Trains the model for a specified number of epochs.

Source code in choice_learn/basket_models/basic_attention_model.py
def fit(
    self,
    trip_dataset: TripDataset,
    val_dataset: Union[TripDataset, None] = None,
    verbose: int = 0,
) -> None:
    """Trains the model for a specified number of epochs.

    Parameters
    ----------
        dataset : TripDataset
            Dataset of baskets to train the model on.
    """
    if not self.instantiated:
        self.instantiate(n_items=trip_dataset.n_items)

    if not isinstance(trip_dataset, TripDataset):
        raise TypeError("Dataset must be a TripDataset.")

    if (
        max([len(trip.purchases) for trip in trip_dataset.trips]) + self.n_negative_samples
        > self.n_items
    ):
        raise ValueError(
            "The number of items in the dataset is less than the number of negative samples."
        )

    if self.nce_distribution == "natural":
        self.negative_samples_distribution = self._get_items_frequencies(trip_dataset)
    else:
        self.negative_samples_distribution = (1 / trip_dataset.n_items) * np.ones(
            (trip_dataset.n_items,)
        )

    history = super().fit(trip_dataset=trip_dataset, val_dataset=val_dataset, verbose=verbose)

    self.is_trained = True

    return history

get_negative_samples(available_items, purchased_items, next_item, n_samples)

Sample randomly a set of items.

(set of items not already purchased and not necessarily from the basket)

Parameters:

Name Type Description Default
available_items ndarray

Matrix indicating the availability (1) or not (0) of the products Shape must be (n_items,)

required
purchased_items ndarray

List of items already purchased (already in the basket)

required
next_item int

Next item (to be added in the basket)

required
n_samples int

Number of samples to draw

required

Returns:

Type Description
list[int]

Random sample of items, each of them distinct from the next item and from the items already in the basket

Source code in choice_learn/basket_models/basic_attention_model.py
def get_negative_samples(
    self,
    available_items: np.ndarray,
    purchased_items: np.ndarray,
    next_item: int,
    n_samples: int,
) -> list[int]:
    """Sample randomly a set of items.

    (set of items not already purchased and *not necessarily* from the basket)

    Parameters
    ----------
    available_items: np.ndarray
        Matrix indicating the availability (1) or not (0) of the products
        Shape must be (n_items,)
    purchased_items: np.ndarray
        List of items already purchased (already in the basket)
    next_item: int
        Next item (to be added in the basket)
    n_samples: int
        Number of samples to draw

    Returns
    -------
    list[int]
        Random sample of items, each of them distinct from
        the next item and from the items already in the basket
    """
    # Convert inputs to tensors
    available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
    purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
    next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

    # Get the list of available items based on the availability matrix
    item_ids = tf.range(self.n_items)
    available_mask = tf.equal(available_items, 1)
    assortment = tf.boolean_mask(item_ids, available_mask)

    not_to_be_chosen = tf.concat([purchased_items, tf.expand_dims(next_item, axis=0)], axis=0)

    # Sample negative items from the assortment excluding not_to_be_chosen
    negative_samples = tf.boolean_mask(
        tensor=assortment,
        # Reduce the 2nd dimension of the boolean mask to get a 1D mask
        mask=~tf.reduce_any(
            tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
        ),
    )

    error_message = (
        "The number of negative samples to draw must be less than "
        "the number of available items not already purchased and "
        "distinct from the next item."
    )
    # Raise an error if n_samples > tf.size(negative_samples)
    tf.debugging.assert_greater_equal(
        tf.size(negative_samples), n_samples, message=error_message
    )

    # Randomize the sampling
    negative_samples = tf.random.shuffle(negative_samples)

    # Keep only n_samples
    return negative_samples[:n_samples]

instantiate(n_items)

Initialize the model parameters.

Parameters:

Name Type Description Default
n_items int

Number of unique items in the dataset.

required
Source code in choice_learn/basket_models/basic_attention_model.py
def instantiate(
    self,
    n_items: int,
) -> None:
    """Initialize the model parameters.

    Parameters
    ----------
    n_items : int
        Number of unique items in the dataset.
    """
    self.n_items = n_items

    self.Wi = tf.Variable(
        tf.random.normal((self.n_items, self.latent_size), stddev=0.1, seed=42), name="Wi"
    )
    self.Wo = tf.Variable(
        tf.random.normal((self.n_items, self.latent_size), stddev=0.1, seed=42), name="Wo"
    )
    self.wa = tf.Variable(tf.random.normal((self.latent_size,), stddev=0.1, seed=42), name="wa")

    self.empty_context_embedding = tf.Variable(
        tf.random.normal((self.latent_size,), stddev=0.1, seed=42),
        name="empty_context_embedding",
    )

    self.loss = NoiseConstrastiveEstimation()
    self.is_trained = False
    self.instantiated = True