Skip to content

SHOPPER Model

Implementation of the Shopper model.

Shopper

Bases: BaseBasketModel

Class for the Shopper model.

SHOPPER: A Probabilistic Model of Consumer Choice with Substitutes and Complements, Ruiz, F. J. R.; Athey, S.; Blei, D. M. (2019)

Source code in choice_learn/basket_models/shopper.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
class Shopper(BaseBasketModel):
    """Class for the Shopper model.

    SHOPPER: A Probabilistic Model of Consumer Choice with Substitutes and Complements,
    Ruiz, F. J. R.; Athey, S.; Blei, D. M. (2019)
    """

    def __init__(
        self,
        item_intercept: bool = True,
        price_effects: bool = False,
        seasonal_effects: bool = False,
        think_ahead: bool = False,
        latent_sizes: dict[str] = {"preferences": 4, "price": 4, "season": 4},
        n_negative_samples: int = 2,
        optimizer: str = "adam",
        callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
        lr: float = 1e-3,
        epochs: int = 10,
        batch_size: int = 32,
        grad_clip_value: Union[float, None] = None,
        weight_decay: Union[float, None] = None,
        momentum: float = 0.0,
        epsilon_price: float = 1e-5,
        **kwargs,
    ) -> None:
        """Initialize the Shopper model.

        Parameters
        ----------
        item_intercept: bool, optional
            Whether to include item intercept in the model, by default True
            Corresponds to the item intercept
        price_effects: bool, optional
            Whether to include price effects in the model, by default True
        seasonal_effects: bool, optional
            Whether to include seasonal effects in the model, by default True
        think_ahead: bool, optional
            Whether to include "thinking ahead" in the model, by default False
        latent_sizes: dict[str]
            Lengths of the vector representation of the latent parameters
            latent_sizes["preferences"]: length of one vector of theta, alpha, rho
            latent_sizes["price"]: length of one vector of gamma, beta
            latent_sizes["season"]: length of one vector of delta, mu
            by default {"preferences": 4, "price": 4, "season": 4}
        n_negative_samples: int, optional
            Number of negative samples to draw for each positive sample for the training,
            by default 2
            Must be > 0
        optimizer: str, optional
            Optimizer to use for training, by default "adam"
        callbacks: tf.keras.callbacks.Callbacklist, optional
            List of callbacks to add to model.fit, by default None and only add History
        lr: float, optional
            Learning rate, by default 1e-3
        epochs: int, optional
            Number of epochs, by default 100
        batch_size: int, optional
            Batch size, by default 32
        grad_clip_value: float, optional
            Value to clip the gradient, by default None
        weight_decay: float, optional
            Weight decay, by default None
        momentum: float, optional
            Momentum for the optimizer, by default 0. For SGD only
        epsilon_price: float, optional
            Epsilon value to add to prices to avoid NaN values (log(0)), by default 1e-5
        """
        self.item_intercept = item_intercept
        self.price_effects = price_effects
        self.seasonal_effects = seasonal_effects
        self.think_ahead = think_ahead

        if "preferences" not in latent_sizes.keys():
            logging.warning(
                "No latent size value has been specified for preferences, "
                "switching to default value 4."
            )
            latent_sizes["preferences"] = 4
        if "price" not in latent_sizes.keys() and self.price_effects:
            logging.warning(
                "No latent size value has been specified for price_effects, "
                "switching to default value 4."
            )
            latent_sizes["price"] = 4
        if "season" not in latent_sizes.keys() and self.seasonal_effects:
            logging.warning(
                "No latent size value has been specified for seasonal_effects, "
                "switching to default value 4."
            )
            latent_sizes["season"] = 4

        for val in latent_sizes.keys():
            if val not in ["preferences", "price", "season"]:
                raise ValueError(f"Unknown value for latent_sizes dict: {val}.")

        if n_negative_samples <= 0:
            raise ValueError("n_negative_samples must be > 0.")

        self.latent_sizes = latent_sizes
        self.n_negative_samples = n_negative_samples

        self.epsilon_price = epsilon_price

        super().__init__(
            optimizer=optimizer,
            callbacks=callbacks,
            lr=lr,
            epochs=epochs,
            batch_size=batch_size,
            grad_clip_value=grad_clip_value,
            weight_decay=weight_decay,
            momentum=momentum,
            **kwargs,
        )

        if len(tf.config.get_visible_devices("GPU")):
            # At least one available GPU
            self.on_gpu = True
        else:
            # No available GPU
            self.on_gpu = False
        # /!\ If a model trained on GPU is loaded on CPU, self.on_gpu must be set
        # to False manually after loading the model, and vice versa

        self.instantiated = False

    def instantiate(
        self,
        n_items: int,
        n_stores: int = 0,
    ) -> None:
        """Instantiate the Shopper model.

        Parameters
        ----------
        n_items: int
            Number of items to consider, i.e. the number of items in the dataset
            (includes the checkout item)
        n_stores: int
            Number of stores in the population
        """
        self.n_items = n_items
        if n_stores == 0 and self.price_effects:
            # To take into account the price effects, the number of stores must be > 0
            # to have a gamma embedding
            # (By default, the store id is 0)
            n_stores = 1
        self.n_stores = n_stores

        self.rho = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                shape=(n_items, self.latent_sizes["preferences"])
            ),  # Dimension for 1 item: latent_sizes["preferences"]
            trainable=True,
            name="rho",
        )
        self.alpha = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                shape=(n_items, self.latent_sizes["preferences"])
            ),  # Dimension for 1 item: latent_sizes["preferences"]
            trainable=True,
            name="alpha",
        )
        self.theta = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                shape=(n_stores, self.latent_sizes["preferences"])
            ),  # Dimension for 1 item: latent_sizes["preferences"]
            trainable=True,
            name="theta",
        )

        if self.item_intercept:
            # Add item intercept
            self.lambda_ = tf.Variable(
                tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                    # No lambda for the checkout item (set to 0 later)
                    shape=(n_items - 1,)  # Dimension for 1 item: 1
                ),
                trainable=True,
                name="lambda_",
            )

        if self.price_effects:
            # Add price sensitivity
            self.beta = tf.Variable(
                tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                    shape=(n_items, self.latent_sizes["price"])
                ),  # Dimension for 1 item: latent_sizes["price"]
                trainable=True,
                name="beta",
            )
            self.gamma = tf.Variable(
                tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                    shape=(n_stores, self.latent_sizes["price"])
                ),  # Dimension for 1 item: latent_sizes["price"]
                trainable=True,
                name="gamma",
            )

        if self.seasonal_effects:
            # Add seasonal effects
            self.mu = tf.Variable(
                tf.random_normal_initializer(mean=0, stddev=0.1, seed=42)(
                    shape=(n_items, self.latent_sizes["season"])
                ),  # Dimension for 1 item: latent_sizes["season"]
                trainable=True,
                name="mu",
            )
            self.delta = tf.Variable(
                tf.random_normal_initializer(mean=0, stddev=0.1, seed=42)(
                    shape=(52, self.latent_sizes["season"])
                ),  # Dimension for 1 item: latent_sizes["season"]
                trainable=True,
                name="delta",
            )

        self.instantiated = True

    @property
    def trainable_weights(self) -> list[tf.Variable]:
        """Latent parameters of the model.

        Returns
        -------
        list[tf.Variable]
            Latent parameters of the model
        """
        weights = [self.rho, self.alpha, self.theta]

        if self.item_intercept:
            weights.append(self.lambda_)

        if self.price_effects:
            weights.extend([self.beta, self.gamma])

        if self.seasonal_effects:
            weights.extend([self.mu, self.delta])

        return weights

    @property
    def train_iter_method(self):
        """Method used to generate sub-baskets from a purchased one.

        Available methods are:
        - 'shopper': randomly orders the purchases and creates the ordered sub-baskets:
                        (1|0); (2|1); (3|1,2); (4|1,2,3); etc...
        - 'aleacarta': creates all the sub-baskets with N-1 items:
                        (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

        Returns
        -------
        str
            Data generation method.
        """
        return "shopper"

    def thinking_ahead(
        self,
        item_batch: Union[np.ndarray, tf.Tensor],
        ragged_basket_batch: tf.RaggedTensor,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
        theta_store: tf.Tensor,
        gamma_store: tf.Tensor,
        delta_week: tf.Tensor,
    ) -> tf.Tensor:
        """Compute the utility of all the items in item_batch.

        Parameters
        ----------
        item_batch: np.ndarray or tf.Tensor
            Batch of the purchased items ID (integers) for which to compute the utility
            Shape must be (batch_size,)
            (positive and negative samples concatenated together)
        ragged_basket_batch: tf.RaggedTensor
            Batch of baskets (ID of items already in the baskets) (arrays) without padding
            for each purchased item
            Shape must be (batch_size, None)
        price_batch: np.ndarray
            Batch of prices (integers) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            Batch of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)
        theta_store: tf.Tensor
            Slices from theta embedding gathered according to the indices that correspond
            to the store of each purchased item in the batch
            Shape must be (batch_size, latent_sizes["preferences"])
        gamma_store: tf.Tensor
            Slices from gamma embedding gathered according to the indices that correspond
            to the store of each purchased item in the batch
            Shape must be (batch_size, latent_sizes["price"])
        delta_week: tf.Tensor
            Slices from delta embedding gathered according to the indices that correspond
            to the week of each purchased item in the batch
            Shape must be (batch_size, latent_sizes["season"])

        Returns
        -------
        total_next_step_utilities: tf.Tensor
            Nex step utility of all the items in item_batch
            Shape must be (batch_size,)
        """
        total_next_step_utilities = tf.zeros_like(item_batch, dtype=tf.float32)
        # Compute the next step item utility for each element of the batch, one by one
        # TODO: avoid a for loop on ragged_basket_batch at a later stage
        for idx in tf.range(ragged_basket_batch.shape[0]):
            basket = tf.gather(ragged_basket_batch, idx)
            if len(basket) != 0 and basket[-1] == 0:
                # No thinking ahead when the basket ends already with the checkout item 0
                total_next_step_utilities = tf.tensor_scatter_nd_update(
                    tensor=total_next_step_utilities, indices=[[idx]], updates=[0]
                )

            else:
                # Basket with the hypothetical current item
                next_basket = tf.concat([basket, [item_batch[idx]]], axis=0)
                # Get the list of available items based on the availability matrix
                item_ids = tf.range(self.n_items)
                available_mask = tf.equal(available_item_batch[idx], 1)
                assortment = tf.boolean_mask(item_ids, available_mask)
                hypothetical_next_purchases = tf.boolean_mask(
                    assortment,
                    ~tf.reduce_any(
                        tf.equal(tf.expand_dims(assortment, axis=1), next_basket), axis=1
                    ),
                )
                # Check if there are still items to purchase during the next step
                if len(hypothetical_next_purchases) == 0:
                    # No more items to purchase: next step impossible
                    total_next_step_utilities = tf.tensor_scatter_nd_update(
                        tensor=total_next_step_utilities, indices=[[idx]], updates=[0]
                    )
                else:
                    # Compute the dot product along the last dimension between the embeddings
                    # of the given store's theta and alpha of all the items
                    hypothetical_store_preferences = tf.reduce_sum(
                        theta_store[idx] * self.alpha, axis=1
                    )

                    if self.item_intercept:
                        # Manually enforce the lambda of the checkout item to be 0
                        # (equivalent to translating the lambda values)
                        hypothetical_item_intercept = tf.concat([[0.0], self.lambda_], axis=0)
                    else:
                        hypothetical_item_intercept = tf.zeros_like(hypothetical_store_preferences)

                    if self.price_effects:
                        hypothetical_price_effects = (
                            -1
                            # Compute the dot product along the last dimension between
                            # the embeddings of the given store's gamma and beta
                            # of all the items
                            * tf.reduce_sum(gamma_store[idx] * self.beta, axis=1)
                            * tf.math.log(price_batch[idx] + self.epsilon_price)
                        )
                    else:
                        hypothetical_price_effects = tf.zeros_like(hypothetical_store_preferences)

                    if self.seasonal_effects:
                        # Compute the dot product along the last dimension between the embeddings
                        # of delta of the given week and mu of all the items
                        hypothetical_seasonal_effects = tf.reduce_sum(
                            delta_week[idx] * self.mu, axis=1
                        )
                    else:
                        hypothetical_seasonal_effects = tf.zeros_like(
                            hypothetical_store_preferences
                        )

                    # The effects of item intercept, store preferences, price sensitivity
                    # and seasonal effects are combined in the per-item per-trip latent variable
                    hypothetical_psi = tf.reduce_sum(
                        [
                            hypothetical_item_intercept,  # 0 if self.item_intercept is False
                            hypothetical_store_preferences,
                            hypothetical_price_effects,  # 0 if self.price_effects is False
                            hypothetical_seasonal_effects,  # 0 if self.seasonal_effects is False
                        ],
                        axis=0,
                    )  # Shape: (n_items,)

                    # Shape: (len(hypothetical_next_purchases),)
                    next_psi = tf.gather(hypothetical_psi, indices=hypothetical_next_purchases)

                    # Consider hypothetical "next" item one by one
                    next_step_basket_interaction_utilities = tf.zeros(
                        (len(hypothetical_next_purchases),), dtype=tf.float32
                    )
                    for inner_idx in tf.range(len(hypothetical_next_purchases)):
                        next_item_id = tf.gather(hypothetical_next_purchases, inner_idx)
                        rho_next_item = tf.gather(
                            self.rho, indices=next_item_id
                        )  # Shape: (latent_size,)
                        # Gather the embeddings using a tensor of indices
                        # (before ensure that indices are integers)
                        next_alpha_by_basket = tf.gather(
                            self.alpha, indices=tf.cast(next_basket, dtype=tf.int32)
                        )  # Shape: (len(next_basket), latent_size)
                        # Divide the sum of alpha embeddings by the number of items
                        # in the basket of the next step (always > 0)
                        next_alpha_average = tf.reduce_sum(next_alpha_by_basket, axis=0) / tf.cast(
                            len(next_basket), dtype=tf.float32
                        )  # Shape: (latent_size,)
                        next_step_basket_interaction_utilities = tf.tensor_scatter_nd_update(
                            tensor=next_step_basket_interaction_utilities,
                            indices=[[inner_idx]],
                            # Compute the dot product along the last dimension, shape: (1,)
                            updates=[tf.reduce_sum(rho_next_item * next_alpha_average)],
                        )

                    # Optimal next step: take the maximum utility among all possible next purchases
                    next_step_utility = tf.reduce_max(
                        next_psi + next_step_basket_interaction_utilities, axis=0
                    )  # Shape: (1,)
                    total_next_step_utilities = tf.tensor_scatter_nd_update(
                        tensor=total_next_step_utilities,
                        indices=[[idx]],
                        updates=[next_step_utility],
                    )

        return total_next_step_utilities  # Shape: (batch_size,)

    def compute_batch_utility(
        self,
        item_batch: Union[np.ndarray, tf.Tensor],
        basket_batch: np.ndarray,
        store_batch: np.ndarray,
        week_batch: np.ndarray,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
    ) -> tf.Tensor:
        """Compute the utility of all the items in item_batch.

        Parameters
        ----------
        item_batch: np.ndarray or tf.Tensor
            Batch of the purchased items ID (integers) for which to compute the utility
            Shape must be (batch_size,)
            (positive and negative samples concatenated together)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (integers) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            Batch of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)

        Returns
        -------
        item_utilities: tf.Tensor
            Utility of all the items in item_batch
            Shape must be (batch_size,)
        """
        item_batch = tf.cast(item_batch, dtype=tf.int32)
        basket_batch = tf.cast(basket_batch, dtype=tf.int32)
        store_batch = tf.cast(store_batch, dtype=tf.int32)
        week_batch = tf.cast(week_batch, dtype=tf.int32)
        price_batch = tf.cast(price_batch, dtype=tf.float32)
        available_item_batch = tf.cast(available_item_batch, dtype=tf.int32)

        theta_store = tf.gather(self.theta, indices=store_batch)
        alpha_item = tf.gather(self.alpha, indices=item_batch)
        # Compute the dot product along the last dimension
        store_preferences = tf.reduce_sum(theta_store * alpha_item, axis=1)

        if self.item_intercept:
            # Manually enforce the lambda of the checkout item to be 0
            # (equivalent to translating the lambda values)
            item_intercept = tf.gather(tf.concat([[0.0], self.lambda_], axis=0), indices=item_batch)
        else:
            item_intercept = tf.zeros_like(store_preferences)

        if self.price_effects:
            gamma_store = tf.gather(self.gamma, indices=store_batch)
            beta_item = tf.gather(self.beta, indices=item_batch)
            # Add epsilon to avoid NaN values (log(0))
            price_effects = (
                -1
                # Compute the dot product along the last dimension
                * tf.reduce_sum(gamma_store * beta_item, axis=1)
                * tf.math.log(price_batch + self.epsilon_price)
            )
        else:
            gamma_store = tf.zeros_like(store_batch)
            price_effects = tf.zeros_like(store_preferences)

        if self.seasonal_effects:
            delta_week = tf.gather(self.delta, indices=week_batch)
            mu_item = tf.gather(self.mu, indices=item_batch)
            # Compute the dot product along the last dimension
            seasonal_effects = tf.reduce_sum(delta_week * mu_item, axis=1)
        else:
            delta_week = tf.zeros_like(week_batch)
            seasonal_effects = tf.zeros_like(store_preferences)

        # The effects of item intercept, store preferences, price sensitivity
        # and seasonal effects are combined in the per-item per-trip latent variable
        psi = tf.reduce_sum(
            [
                item_intercept,
                store_preferences,
                price_effects,
                seasonal_effects,
            ],
            axis=0,
        )  # Shape: (batch_size,)

        # Apply boolean mask to mask out the padding value -1
        masked_baskets = tf.where(
            condition=basket_batch > -1,  # If False: padding value -1
            x=1,  # Output where condition is True
            y=0,  # Output where condition is False
        )
        # Number of items in each basket
        count_items_in_basket = tf.reduce_sum(masked_baskets, axis=1)

        # Create a RaggedTensor from the indices with padding removed
        item_indices_ragged = tf.cast(
            tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
            dtype=tf.int32,
        )

        if tf.size(item_indices_ragged) == 0:
            # Empty baskets: no alpha embeddings to gather
            # (It must be a ragged tensor here because TF's GraphMode requires the same
            # nested structure to be returned from all branches of a conditional)
            alpha_by_basket = tf.RaggedTensor.from_tensor(
                tf.zeros((len(item_batch), 0, self.alpha.shape[1]))
            )
        else:
            # Gather the embeddings using a ragged tensor of indices
            alpha_by_basket = tf.ragged.map_flat_values(tf.gather, self.alpha, item_indices_ragged)

        # Compute the sum of the alpha embeddings for each basket
        alpha_sum = tf.reduce_sum(alpha_by_basket, axis=1)

        rho_item = tf.gather(self.rho, indices=item_batch)

        # Divide each sum of alpha embeddings by the number of items in the corresponding basket
        # Avoid NaN values (division by 0)
        count_items_in_basket_expanded = tf.expand_dims(
            tf.cast(count_items_in_basket, dtype=tf.float32), -1
        )

        # Apply boolean mask for case distinction
        alpha_average = tf.where(
            condition=count_items_in_basket_expanded != 0,  # If True: count_items_in_basket > 0
            x=alpha_sum / count_items_in_basket_expanded,  # Output if condition is True
            y=tf.zeros_like(alpha_sum),  # Output if condition is False
        )

        # Compute the dot product along the last dimension
        basket_interaction_utility = tf.reduce_sum(rho_item * alpha_average, axis=1)

        item_utilities = psi + basket_interaction_utility

        # No thinking ahead
        if not self.think_ahead:
            return item_utilities

        # Thinking ahead
        next_step_utilities = self.thinking_ahead(
            item_batch=item_batch,
            ragged_basket_batch=item_indices_ragged,
            price_batch=price_batch,
            available_item_batch=available_item_batch,
            theta_store=theta_store,
            gamma_store=gamma_store,  # 0 if self.price_effects is False
            delta_week=delta_week,  # 0 if self.seasonal_effects is False
        )

        return item_utilities + next_step_utilities

    def get_negative_samples(
        self,
        available_items: np.ndarray,
        purchased_items: np.ndarray,
        future_purchases: np.ndarray,
        next_item: int,
        n_samples: int,
    ) -> list[int]:
        """Sample randomly a set of items.

        (set of items not already purchased and *not necessarily* from the basket)

        Parameters
        ----------
        available_items: np.ndarray
            Matrix indicating the availability (1) or not (0) of the products
            Shape must be (n_items,)
        purchased_items: np.ndarray
            List of items already purchased (already in the basket)
        future_purchases: np.ndarray
            List of items to be purchased in the future (not yet in the basket)
        next_item: int
            Next item (to be added in the basket)
        n_samples: int
            Number of samples to draw

        Returns
        -------
        list[int]
            Random sample of items, each of them distinct from
            the next item and from the items already in the basket
        """
        # Convert inputs to tensors
        available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
        purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
        future_purchases = tf.cast(tf.convert_to_tensor(future_purchases), dtype=tf.int32)
        next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

        # Get the list of available items based on the availability matrix
        item_ids = tf.range(self.n_items)
        available_mask = tf.equal(available_items, 1)
        assortment = tf.boolean_mask(item_ids, available_mask)

        not_to_be_chosen = tf.concat(
            [purchased_items, future_purchases, tf.expand_dims(next_item, axis=0)], axis=0
        )

        # Ensure that the checkout item 0 can be picked as a negative sample
        # if it is not the next item
        # (otherwise 0 is always in not_to_be_chosen because it's in future_purchases)
        if not tf.equal(next_item, 0):
            not_to_be_chosen = tf.boolean_mask(not_to_be_chosen, not_to_be_chosen != 0)

        # Sample negative items from the assortment excluding not_to_be_chosen
        negative_samples = tf.boolean_mask(
            tensor=assortment,
            # Reduce the 2nd dimension of the boolean mask to get a 1D mask
            mask=~tf.reduce_any(
                tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
            ),
        )

        error_message = (
            "The number of negative samples to draw must be less than "
            "the number of available items not already purchased and "
            "distinct from the next item."
        )
        # Raise an error if n_samples > tf.size(negative_samples)
        tf.debugging.assert_greater_equal(
            tf.size(negative_samples), n_samples, message=error_message
        )

        # Randomize the sampling
        negative_samples = tf.random.shuffle(negative_samples)

        # Keep only n_samples
        return negative_samples[:n_samples]

    @tf.function  # Graph mode
    def compute_batch_loss(
        self,
        item_batch: np.ndarray,
        basket_batch: np.ndarray,
        future_batch: np.ndarray,
        store_batch: np.ndarray,
        week_batch: np.ndarray,
        price_batch: np.ndarray,
        available_item_batch: np.ndarray,
    ) -> tuple[tf.Variable]:
        """Compute log-likelihood and loss for one batch of items.

        Parameters
        ----------
        item_batch: np.ndarray
            Batch of purchased items ID (integers)
            Shape must be (batch_size,)
        basket_batch: np.ndarray
            Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        future_batch: np.ndarray
            Batch of items to be purchased in the future (ID of items not yet in the
            basket) (arrays) for each purchased item
            Shape must be (batch_size, max_basket_size)
        store_batch: np.ndarray
            Batch of store IDs (integers) for each purchased item
            Shape must be (batch_size,)
        week_batch: np.ndarray
            Batch of week numbers (integers) for each purchased item
            Shape must be (batch_size,)
        price_batch: np.ndarray
            Batch of prices (integers) for each purchased item
            Shape must be (batch_size,)
        available_item_batch: np.ndarray
            List of availability matrices (indicating the availability (1) or not (0)
            of the products) (arrays) for each purchased item
            Shape must be (batch_size, n_items)

        Returns
        -------
        batch_loss: tf.Variable
            Value of the loss for the batch (normalized negative log-likelihood),
            Shape must be (1,)
        loglikelihood: tf.Variable
            Computed log-likelihood of the batch of items
            Approximated by difference of utilities between positive and negative samples
            Shape must be (1,)
        """
        batch_size = len(item_batch)
        item_batch = tf.cast(item_batch, dtype=tf.int32)

        # Negative sampling
        negative_samples = tf.reshape(
            tf.transpose(
                tf.reshape(
                    tf.concat(
                        [
                            self.get_negative_samples(
                                available_items=available_item_batch[idx],
                                purchased_items=basket_batch[idx],
                                future_purchases=future_batch[idx],
                                next_item=item_batch[idx],
                                n_samples=self.n_negative_samples,
                            )
                            for idx in range(batch_size)
                        ],
                        axis=0,
                    ),
                    # Reshape to have at the beginning of the array all the first negative samples
                    # of all positive samples, then all the second negative samples, etc.
                    # (same logic as for the calls to np.tile)
                    [batch_size, self.n_negative_samples],
                ),
            ),
            # Flatten 2D --> 1D
            shape=[-1],
        )

        augmented_item_batch = tf.cast(
            tf.concat([item_batch, negative_samples], axis=0), dtype=tf.int32
        )
        prices_tiled = tf.tile(price_batch, [self.n_negative_samples + 1, 1])
        # Each time, pick only the price of the item in augmented_item_batch from the
        # corresponding price array
        augmented_price_batch = tf.gather(
            params=prices_tiled,
            indices=augmented_item_batch,
            # batch_dims=1 is equivalent to having an outer loop over
            # the first axis of params and indices
            batch_dims=1,
        )

        # Compute the utility of all the available items
        all_utilities = self.compute_batch_utility(
            item_batch=augmented_item_batch,
            basket_batch=tf.tile(basket_batch, [self.n_negative_samples + 1, 1]),
            store_batch=tf.tile(store_batch, [self.n_negative_samples + 1]),
            week_batch=tf.tile(week_batch, [self.n_negative_samples + 1]),
            price_batch=augmented_price_batch,
            available_item_batch=tf.tile(available_item_batch, [self.n_negative_samples + 1, 1]),
        )

        positive_samples_utilities = tf.gather(all_utilities, tf.range(batch_size))
        negative_samples_utilities = tf.gather(
            all_utilities, tf.range(batch_size, tf.shape(all_utilities)[0])
        )

        # Log-likelihood of a batch = sum of log-likelihoods of its samples
        # Add a small epsilon to gain numerical stability (avoid log(0))
        epsilon = 0.0  # No epsilon added for now
        loglikelihood = tf.reduce_sum(
            tf.math.log(
                tf.sigmoid(
                    tf.tile(
                        positive_samples_utilities,
                        [self.n_negative_samples],
                    )
                    - negative_samples_utilities
                )
                + epsilon
            ),
        )  # Shape of loglikelihood: (1,)

        # Maximize the predicted log-likelihood (ie minimize the negative log-likelihood)
        # normalized by the batch size and the number of negative samples
        batch_loss = -1 * loglikelihood / (batch_size * self.n_negative_samples)

        return batch_loss, loglikelihood

train_iter_method property

Method used to generate sub-baskets from a purchased one.

Available methods are: - 'shopper': randomly orders the purchases and creates the ordered sub-baskets: (1|0); (2|1); (3|1,2); (4|1,2,3); etc... - 'aleacarta': creates all the sub-baskets with N-1 items: (4|1,2,3); (3|1,2,4); (2|1,3,4); (1|2,3,4)

Returns:

Type Description
str

Data generation method.

trainable_weights: list[tf.Variable] property

Latent parameters of the model.

Returns:

Type Description
list[Variable]

Latent parameters of the model

__init__(item_intercept=True, price_effects=False, seasonal_effects=False, think_ahead=False, latent_sizes={'preferences': 4, 'price': 4, 'season': 4}, n_negative_samples=2, optimizer='adam', callbacks=None, lr=0.001, epochs=10, batch_size=32, grad_clip_value=None, weight_decay=None, momentum=0.0, epsilon_price=1e-05, **kwargs)

Initialize the Shopper model.

Parameters:

Name Type Description Default
item_intercept bool

Whether to include item intercept in the model, by default True Corresponds to the item intercept

True
price_effects bool

Whether to include price effects in the model, by default True

False
seasonal_effects bool

Whether to include seasonal effects in the model, by default True

False
think_ahead bool

Whether to include "thinking ahead" in the model, by default False

False
latent_sizes dict[str]

Lengths of the vector representation of the latent parameters latent_sizes["preferences"]: length of one vector of theta, alpha, rho latent_sizes["price"]: length of one vector of gamma, beta latent_sizes["season"]: length of one vector of delta, mu by default {"preferences": 4, "price": 4, "season": 4}

{'preferences': 4, 'price': 4, 'season': 4}
n_negative_samples int

Number of negative samples to draw for each positive sample for the training, by default 2 Must be > 0

2
optimizer str

Optimizer to use for training, by default "adam"

'adam'
callbacks Union[CallbackList, None]

List of callbacks to add to model.fit, by default None and only add History

None
lr float

Learning rate, by default 1e-3

0.001
epochs int

Number of epochs, by default 100

10
batch_size int

Batch size, by default 32

32
grad_clip_value Union[float, None]

Value to clip the gradient, by default None

None
weight_decay Union[float, None]

Weight decay, by default None

None
momentum float

Momentum for the optimizer, by default 0. For SGD only

0.0
epsilon_price float

Epsilon value to add to prices to avoid NaN values (log(0)), by default 1e-5

1e-05
Source code in choice_learn/basket_models/shopper.py
def __init__(
    self,
    item_intercept: bool = True,
    price_effects: bool = False,
    seasonal_effects: bool = False,
    think_ahead: bool = False,
    latent_sizes: dict[str] = {"preferences": 4, "price": 4, "season": 4},
    n_negative_samples: int = 2,
    optimizer: str = "adam",
    callbacks: Union[tf.keras.callbacks.CallbackList, None] = None,
    lr: float = 1e-3,
    epochs: int = 10,
    batch_size: int = 32,
    grad_clip_value: Union[float, None] = None,
    weight_decay: Union[float, None] = None,
    momentum: float = 0.0,
    epsilon_price: float = 1e-5,
    **kwargs,
) -> None:
    """Initialize the Shopper model.

    Parameters
    ----------
    item_intercept: bool, optional
        Whether to include item intercept in the model, by default True
        Corresponds to the item intercept
    price_effects: bool, optional
        Whether to include price effects in the model, by default True
    seasonal_effects: bool, optional
        Whether to include seasonal effects in the model, by default True
    think_ahead: bool, optional
        Whether to include "thinking ahead" in the model, by default False
    latent_sizes: dict[str]
        Lengths of the vector representation of the latent parameters
        latent_sizes["preferences"]: length of one vector of theta, alpha, rho
        latent_sizes["price"]: length of one vector of gamma, beta
        latent_sizes["season"]: length of one vector of delta, mu
        by default {"preferences": 4, "price": 4, "season": 4}
    n_negative_samples: int, optional
        Number of negative samples to draw for each positive sample for the training,
        by default 2
        Must be > 0
    optimizer: str, optional
        Optimizer to use for training, by default "adam"
    callbacks: tf.keras.callbacks.Callbacklist, optional
        List of callbacks to add to model.fit, by default None and only add History
    lr: float, optional
        Learning rate, by default 1e-3
    epochs: int, optional
        Number of epochs, by default 100
    batch_size: int, optional
        Batch size, by default 32
    grad_clip_value: float, optional
        Value to clip the gradient, by default None
    weight_decay: float, optional
        Weight decay, by default None
    momentum: float, optional
        Momentum for the optimizer, by default 0. For SGD only
    epsilon_price: float, optional
        Epsilon value to add to prices to avoid NaN values (log(0)), by default 1e-5
    """
    self.item_intercept = item_intercept
    self.price_effects = price_effects
    self.seasonal_effects = seasonal_effects
    self.think_ahead = think_ahead

    if "preferences" not in latent_sizes.keys():
        logging.warning(
            "No latent size value has been specified for preferences, "
            "switching to default value 4."
        )
        latent_sizes["preferences"] = 4
    if "price" not in latent_sizes.keys() and self.price_effects:
        logging.warning(
            "No latent size value has been specified for price_effects, "
            "switching to default value 4."
        )
        latent_sizes["price"] = 4
    if "season" not in latent_sizes.keys() and self.seasonal_effects:
        logging.warning(
            "No latent size value has been specified for seasonal_effects, "
            "switching to default value 4."
        )
        latent_sizes["season"] = 4

    for val in latent_sizes.keys():
        if val not in ["preferences", "price", "season"]:
            raise ValueError(f"Unknown value for latent_sizes dict: {val}.")

    if n_negative_samples <= 0:
        raise ValueError("n_negative_samples must be > 0.")

    self.latent_sizes = latent_sizes
    self.n_negative_samples = n_negative_samples

    self.epsilon_price = epsilon_price

    super().__init__(
        optimizer=optimizer,
        callbacks=callbacks,
        lr=lr,
        epochs=epochs,
        batch_size=batch_size,
        grad_clip_value=grad_clip_value,
        weight_decay=weight_decay,
        momentum=momentum,
        **kwargs,
    )

    if len(tf.config.get_visible_devices("GPU")):
        # At least one available GPU
        self.on_gpu = True
    else:
        # No available GPU
        self.on_gpu = False
    # /!\ If a model trained on GPU is loaded on CPU, self.on_gpu must be set
    # to False manually after loading the model, and vice versa

    self.instantiated = False

compute_batch_loss(item_batch, basket_batch, future_batch, store_batch, week_batch, price_batch, available_item_batch)

Compute log-likelihood and loss for one batch of items.

Parameters:

Name Type Description Default
item_batch ndarray

Batch of purchased items ID (integers) Shape must be (batch_size,)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
future_batch ndarray

Batch of items to be purchased in the future (ID of items not yet in the basket) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
store_batch ndarray

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch ndarray

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch ndarray

Batch of prices (integers) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

List of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required

Returns:

Name Type Description
batch_loss Variable

Value of the loss for the batch (normalized negative log-likelihood), Shape must be (1,)

loglikelihood Variable

Computed log-likelihood of the batch of items Approximated by difference of utilities between positive and negative samples Shape must be (1,)

Source code in choice_learn/basket_models/shopper.py
@tf.function  # Graph mode
def compute_batch_loss(
    self,
    item_batch: np.ndarray,
    basket_batch: np.ndarray,
    future_batch: np.ndarray,
    store_batch: np.ndarray,
    week_batch: np.ndarray,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
) -> tuple[tf.Variable]:
    """Compute log-likelihood and loss for one batch of items.

    Parameters
    ----------
    item_batch: np.ndarray
        Batch of purchased items ID (integers)
        Shape must be (batch_size,)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    future_batch: np.ndarray
        Batch of items to be purchased in the future (ID of items not yet in the
        basket) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (integers) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        List of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)

    Returns
    -------
    batch_loss: tf.Variable
        Value of the loss for the batch (normalized negative log-likelihood),
        Shape must be (1,)
    loglikelihood: tf.Variable
        Computed log-likelihood of the batch of items
        Approximated by difference of utilities between positive and negative samples
        Shape must be (1,)
    """
    batch_size = len(item_batch)
    item_batch = tf.cast(item_batch, dtype=tf.int32)

    # Negative sampling
    negative_samples = tf.reshape(
        tf.transpose(
            tf.reshape(
                tf.concat(
                    [
                        self.get_negative_samples(
                            available_items=available_item_batch[idx],
                            purchased_items=basket_batch[idx],
                            future_purchases=future_batch[idx],
                            next_item=item_batch[idx],
                            n_samples=self.n_negative_samples,
                        )
                        for idx in range(batch_size)
                    ],
                    axis=0,
                ),
                # Reshape to have at the beginning of the array all the first negative samples
                # of all positive samples, then all the second negative samples, etc.
                # (same logic as for the calls to np.tile)
                [batch_size, self.n_negative_samples],
            ),
        ),
        # Flatten 2D --> 1D
        shape=[-1],
    )

    augmented_item_batch = tf.cast(
        tf.concat([item_batch, negative_samples], axis=0), dtype=tf.int32
    )
    prices_tiled = tf.tile(price_batch, [self.n_negative_samples + 1, 1])
    # Each time, pick only the price of the item in augmented_item_batch from the
    # corresponding price array
    augmented_price_batch = tf.gather(
        params=prices_tiled,
        indices=augmented_item_batch,
        # batch_dims=1 is equivalent to having an outer loop over
        # the first axis of params and indices
        batch_dims=1,
    )

    # Compute the utility of all the available items
    all_utilities = self.compute_batch_utility(
        item_batch=augmented_item_batch,
        basket_batch=tf.tile(basket_batch, [self.n_negative_samples + 1, 1]),
        store_batch=tf.tile(store_batch, [self.n_negative_samples + 1]),
        week_batch=tf.tile(week_batch, [self.n_negative_samples + 1]),
        price_batch=augmented_price_batch,
        available_item_batch=tf.tile(available_item_batch, [self.n_negative_samples + 1, 1]),
    )

    positive_samples_utilities = tf.gather(all_utilities, tf.range(batch_size))
    negative_samples_utilities = tf.gather(
        all_utilities, tf.range(batch_size, tf.shape(all_utilities)[0])
    )

    # Log-likelihood of a batch = sum of log-likelihoods of its samples
    # Add a small epsilon to gain numerical stability (avoid log(0))
    epsilon = 0.0  # No epsilon added for now
    loglikelihood = tf.reduce_sum(
        tf.math.log(
            tf.sigmoid(
                tf.tile(
                    positive_samples_utilities,
                    [self.n_negative_samples],
                )
                - negative_samples_utilities
            )
            + epsilon
        ),
    )  # Shape of loglikelihood: (1,)

    # Maximize the predicted log-likelihood (ie minimize the negative log-likelihood)
    # normalized by the batch size and the number of negative samples
    batch_loss = -1 * loglikelihood / (batch_size * self.n_negative_samples)

    return batch_loss, loglikelihood

compute_batch_utility(item_batch, basket_batch, store_batch, week_batch, price_batch, available_item_batch)

Compute the utility of all the items in item_batch.

Parameters:

Name Type Description Default
item_batch Union[ndarray, Tensor]

Batch of the purchased items ID (integers) for which to compute the utility Shape must be (batch_size,) (positive and negative samples concatenated together)

required
basket_batch ndarray

Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item Shape must be (batch_size, max_basket_size)

required
store_batch ndarray

Batch of store IDs (integers) for each purchased item Shape must be (batch_size,)

required
week_batch ndarray

Batch of week numbers (integers) for each purchased item Shape must be (batch_size,)

required
price_batch ndarray

Batch of prices (integers) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

Batch of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required

Returns:

Name Type Description
item_utilities Tensor

Utility of all the items in item_batch Shape must be (batch_size,)

Source code in choice_learn/basket_models/shopper.py
def compute_batch_utility(
    self,
    item_batch: Union[np.ndarray, tf.Tensor],
    basket_batch: np.ndarray,
    store_batch: np.ndarray,
    week_batch: np.ndarray,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
) -> tf.Tensor:
    """Compute the utility of all the items in item_batch.

    Parameters
    ----------
    item_batch: np.ndarray or tf.Tensor
        Batch of the purchased items ID (integers) for which to compute the utility
        Shape must be (batch_size,)
        (positive and negative samples concatenated together)
    basket_batch: np.ndarray
        Batch of baskets (ID of items already in the baskets) (arrays) for each purchased item
        Shape must be (batch_size, max_basket_size)
    store_batch: np.ndarray
        Batch of store IDs (integers) for each purchased item
        Shape must be (batch_size,)
    week_batch: np.ndarray
        Batch of week numbers (integers) for each purchased item
        Shape must be (batch_size,)
    price_batch: np.ndarray
        Batch of prices (integers) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        Batch of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)

    Returns
    -------
    item_utilities: tf.Tensor
        Utility of all the items in item_batch
        Shape must be (batch_size,)
    """
    item_batch = tf.cast(item_batch, dtype=tf.int32)
    basket_batch = tf.cast(basket_batch, dtype=tf.int32)
    store_batch = tf.cast(store_batch, dtype=tf.int32)
    week_batch = tf.cast(week_batch, dtype=tf.int32)
    price_batch = tf.cast(price_batch, dtype=tf.float32)
    available_item_batch = tf.cast(available_item_batch, dtype=tf.int32)

    theta_store = tf.gather(self.theta, indices=store_batch)
    alpha_item = tf.gather(self.alpha, indices=item_batch)
    # Compute the dot product along the last dimension
    store_preferences = tf.reduce_sum(theta_store * alpha_item, axis=1)

    if self.item_intercept:
        # Manually enforce the lambda of the checkout item to be 0
        # (equivalent to translating the lambda values)
        item_intercept = tf.gather(tf.concat([[0.0], self.lambda_], axis=0), indices=item_batch)
    else:
        item_intercept = tf.zeros_like(store_preferences)

    if self.price_effects:
        gamma_store = tf.gather(self.gamma, indices=store_batch)
        beta_item = tf.gather(self.beta, indices=item_batch)
        # Add epsilon to avoid NaN values (log(0))
        price_effects = (
            -1
            # Compute the dot product along the last dimension
            * tf.reduce_sum(gamma_store * beta_item, axis=1)
            * tf.math.log(price_batch + self.epsilon_price)
        )
    else:
        gamma_store = tf.zeros_like(store_batch)
        price_effects = tf.zeros_like(store_preferences)

    if self.seasonal_effects:
        delta_week = tf.gather(self.delta, indices=week_batch)
        mu_item = tf.gather(self.mu, indices=item_batch)
        # Compute the dot product along the last dimension
        seasonal_effects = tf.reduce_sum(delta_week * mu_item, axis=1)
    else:
        delta_week = tf.zeros_like(week_batch)
        seasonal_effects = tf.zeros_like(store_preferences)

    # The effects of item intercept, store preferences, price sensitivity
    # and seasonal effects are combined in the per-item per-trip latent variable
    psi = tf.reduce_sum(
        [
            item_intercept,
            store_preferences,
            price_effects,
            seasonal_effects,
        ],
        axis=0,
    )  # Shape: (batch_size,)

    # Apply boolean mask to mask out the padding value -1
    masked_baskets = tf.where(
        condition=basket_batch > -1,  # If False: padding value -1
        x=1,  # Output where condition is True
        y=0,  # Output where condition is False
    )
    # Number of items in each basket
    count_items_in_basket = tf.reduce_sum(masked_baskets, axis=1)

    # Create a RaggedTensor from the indices with padding removed
    item_indices_ragged = tf.cast(
        tf.ragged.boolean_mask(basket_batch, basket_batch != -1),
        dtype=tf.int32,
    )

    if tf.size(item_indices_ragged) == 0:
        # Empty baskets: no alpha embeddings to gather
        # (It must be a ragged tensor here because TF's GraphMode requires the same
        # nested structure to be returned from all branches of a conditional)
        alpha_by_basket = tf.RaggedTensor.from_tensor(
            tf.zeros((len(item_batch), 0, self.alpha.shape[1]))
        )
    else:
        # Gather the embeddings using a ragged tensor of indices
        alpha_by_basket = tf.ragged.map_flat_values(tf.gather, self.alpha, item_indices_ragged)

    # Compute the sum of the alpha embeddings for each basket
    alpha_sum = tf.reduce_sum(alpha_by_basket, axis=1)

    rho_item = tf.gather(self.rho, indices=item_batch)

    # Divide each sum of alpha embeddings by the number of items in the corresponding basket
    # Avoid NaN values (division by 0)
    count_items_in_basket_expanded = tf.expand_dims(
        tf.cast(count_items_in_basket, dtype=tf.float32), -1
    )

    # Apply boolean mask for case distinction
    alpha_average = tf.where(
        condition=count_items_in_basket_expanded != 0,  # If True: count_items_in_basket > 0
        x=alpha_sum / count_items_in_basket_expanded,  # Output if condition is True
        y=tf.zeros_like(alpha_sum),  # Output if condition is False
    )

    # Compute the dot product along the last dimension
    basket_interaction_utility = tf.reduce_sum(rho_item * alpha_average, axis=1)

    item_utilities = psi + basket_interaction_utility

    # No thinking ahead
    if not self.think_ahead:
        return item_utilities

    # Thinking ahead
    next_step_utilities = self.thinking_ahead(
        item_batch=item_batch,
        ragged_basket_batch=item_indices_ragged,
        price_batch=price_batch,
        available_item_batch=available_item_batch,
        theta_store=theta_store,
        gamma_store=gamma_store,  # 0 if self.price_effects is False
        delta_week=delta_week,  # 0 if self.seasonal_effects is False
    )

    return item_utilities + next_step_utilities

get_negative_samples(available_items, purchased_items, future_purchases, next_item, n_samples)

Sample randomly a set of items.

(set of items not already purchased and not necessarily from the basket)

Parameters:

Name Type Description Default
available_items ndarray

Matrix indicating the availability (1) or not (0) of the products Shape must be (n_items,)

required
purchased_items ndarray

List of items already purchased (already in the basket)

required
future_purchases ndarray

List of items to be purchased in the future (not yet in the basket)

required
next_item int

Next item (to be added in the basket)

required
n_samples int

Number of samples to draw

required

Returns:

Type Description
list[int]

Random sample of items, each of them distinct from the next item and from the items already in the basket

Source code in choice_learn/basket_models/shopper.py
def get_negative_samples(
    self,
    available_items: np.ndarray,
    purchased_items: np.ndarray,
    future_purchases: np.ndarray,
    next_item: int,
    n_samples: int,
) -> list[int]:
    """Sample randomly a set of items.

    (set of items not already purchased and *not necessarily* from the basket)

    Parameters
    ----------
    available_items: np.ndarray
        Matrix indicating the availability (1) or not (0) of the products
        Shape must be (n_items,)
    purchased_items: np.ndarray
        List of items already purchased (already in the basket)
    future_purchases: np.ndarray
        List of items to be purchased in the future (not yet in the basket)
    next_item: int
        Next item (to be added in the basket)
    n_samples: int
        Number of samples to draw

    Returns
    -------
    list[int]
        Random sample of items, each of them distinct from
        the next item and from the items already in the basket
    """
    # Convert inputs to tensors
    available_items = tf.cast(tf.convert_to_tensor(available_items), dtype=tf.int32)
    purchased_items = tf.cast(tf.convert_to_tensor(purchased_items), dtype=tf.int32)
    future_purchases = tf.cast(tf.convert_to_tensor(future_purchases), dtype=tf.int32)
    next_item = tf.cast(tf.convert_to_tensor(next_item), dtype=tf.int32)

    # Get the list of available items based on the availability matrix
    item_ids = tf.range(self.n_items)
    available_mask = tf.equal(available_items, 1)
    assortment = tf.boolean_mask(item_ids, available_mask)

    not_to_be_chosen = tf.concat(
        [purchased_items, future_purchases, tf.expand_dims(next_item, axis=0)], axis=0
    )

    # Ensure that the checkout item 0 can be picked as a negative sample
    # if it is not the next item
    # (otherwise 0 is always in not_to_be_chosen because it's in future_purchases)
    if not tf.equal(next_item, 0):
        not_to_be_chosen = tf.boolean_mask(not_to_be_chosen, not_to_be_chosen != 0)

    # Sample negative items from the assortment excluding not_to_be_chosen
    negative_samples = tf.boolean_mask(
        tensor=assortment,
        # Reduce the 2nd dimension of the boolean mask to get a 1D mask
        mask=~tf.reduce_any(
            tf.equal(tf.expand_dims(assortment, axis=1), not_to_be_chosen), axis=1
        ),
    )

    error_message = (
        "The number of negative samples to draw must be less than "
        "the number of available items not already purchased and "
        "distinct from the next item."
    )
    # Raise an error if n_samples > tf.size(negative_samples)
    tf.debugging.assert_greater_equal(
        tf.size(negative_samples), n_samples, message=error_message
    )

    # Randomize the sampling
    negative_samples = tf.random.shuffle(negative_samples)

    # Keep only n_samples
    return negative_samples[:n_samples]

instantiate(n_items, n_stores=0)

Instantiate the Shopper model.

Parameters:

Name Type Description Default
n_items int

Number of items to consider, i.e. the number of items in the dataset (includes the checkout item)

required
n_stores int

Number of stores in the population

0
Source code in choice_learn/basket_models/shopper.py
def instantiate(
    self,
    n_items: int,
    n_stores: int = 0,
) -> None:
    """Instantiate the Shopper model.

    Parameters
    ----------
    n_items: int
        Number of items to consider, i.e. the number of items in the dataset
        (includes the checkout item)
    n_stores: int
        Number of stores in the population
    """
    self.n_items = n_items
    if n_stores == 0 and self.price_effects:
        # To take into account the price effects, the number of stores must be > 0
        # to have a gamma embedding
        # (By default, the store id is 0)
        n_stores = 1
    self.n_stores = n_stores

    self.rho = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
            shape=(n_items, self.latent_sizes["preferences"])
        ),  # Dimension for 1 item: latent_sizes["preferences"]
        trainable=True,
        name="rho",
    )
    self.alpha = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
            shape=(n_items, self.latent_sizes["preferences"])
        ),  # Dimension for 1 item: latent_sizes["preferences"]
        trainable=True,
        name="alpha",
    )
    self.theta = tf.Variable(
        tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
            shape=(n_stores, self.latent_sizes["preferences"])
        ),  # Dimension for 1 item: latent_sizes["preferences"]
        trainable=True,
        name="theta",
    )

    if self.item_intercept:
        # Add item intercept
        self.lambda_ = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                # No lambda for the checkout item (set to 0 later)
                shape=(n_items - 1,)  # Dimension for 1 item: 1
            ),
            trainable=True,
            name="lambda_",
        )

    if self.price_effects:
        # Add price sensitivity
        self.beta = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                shape=(n_items, self.latent_sizes["price"])
            ),  # Dimension for 1 item: latent_sizes["price"]
            trainable=True,
            name="beta",
        )
        self.gamma = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=1.0, seed=42)(
                shape=(n_stores, self.latent_sizes["price"])
            ),  # Dimension for 1 item: latent_sizes["price"]
            trainable=True,
            name="gamma",
        )

    if self.seasonal_effects:
        # Add seasonal effects
        self.mu = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.1, seed=42)(
                shape=(n_items, self.latent_sizes["season"])
            ),  # Dimension for 1 item: latent_sizes["season"]
            trainable=True,
            name="mu",
        )
        self.delta = tf.Variable(
            tf.random_normal_initializer(mean=0, stddev=0.1, seed=42)(
                shape=(52, self.latent_sizes["season"])
            ),  # Dimension for 1 item: latent_sizes["season"]
            trainable=True,
            name="delta",
        )

    self.instantiated = True

thinking_ahead(item_batch, ragged_basket_batch, price_batch, available_item_batch, theta_store, gamma_store, delta_week)

Compute the utility of all the items in item_batch.

Parameters:

Name Type Description Default
item_batch Union[ndarray, Tensor]

Batch of the purchased items ID (integers) for which to compute the utility Shape must be (batch_size,) (positive and negative samples concatenated together)

required
ragged_basket_batch RaggedTensor

Batch of baskets (ID of items already in the baskets) (arrays) without padding for each purchased item Shape must be (batch_size, None)

required
price_batch ndarray

Batch of prices (integers) for each purchased item Shape must be (batch_size,)

required
available_item_batch ndarray

Batch of availability matrices (indicating the availability (1) or not (0) of the products) (arrays) for each purchased item Shape must be (batch_size, n_items)

required
theta_store Tensor

Slices from theta embedding gathered according to the indices that correspond to the store of each purchased item in the batch Shape must be (batch_size, latent_sizes["preferences"])

required
gamma_store Tensor

Slices from gamma embedding gathered according to the indices that correspond to the store of each purchased item in the batch Shape must be (batch_size, latent_sizes["price"])

required
delta_week Tensor

Slices from delta embedding gathered according to the indices that correspond to the week of each purchased item in the batch Shape must be (batch_size, latent_sizes["season"])

required

Returns:

Name Type Description
total_next_step_utilities Tensor

Nex step utility of all the items in item_batch Shape must be (batch_size,)

Source code in choice_learn/basket_models/shopper.py
def thinking_ahead(
    self,
    item_batch: Union[np.ndarray, tf.Tensor],
    ragged_basket_batch: tf.RaggedTensor,
    price_batch: np.ndarray,
    available_item_batch: np.ndarray,
    theta_store: tf.Tensor,
    gamma_store: tf.Tensor,
    delta_week: tf.Tensor,
) -> tf.Tensor:
    """Compute the utility of all the items in item_batch.

    Parameters
    ----------
    item_batch: np.ndarray or tf.Tensor
        Batch of the purchased items ID (integers) for which to compute the utility
        Shape must be (batch_size,)
        (positive and negative samples concatenated together)
    ragged_basket_batch: tf.RaggedTensor
        Batch of baskets (ID of items already in the baskets) (arrays) without padding
        for each purchased item
        Shape must be (batch_size, None)
    price_batch: np.ndarray
        Batch of prices (integers) for each purchased item
        Shape must be (batch_size,)
    available_item_batch: np.ndarray
        Batch of availability matrices (indicating the availability (1) or not (0)
        of the products) (arrays) for each purchased item
        Shape must be (batch_size, n_items)
    theta_store: tf.Tensor
        Slices from theta embedding gathered according to the indices that correspond
        to the store of each purchased item in the batch
        Shape must be (batch_size, latent_sizes["preferences"])
    gamma_store: tf.Tensor
        Slices from gamma embedding gathered according to the indices that correspond
        to the store of each purchased item in the batch
        Shape must be (batch_size, latent_sizes["price"])
    delta_week: tf.Tensor
        Slices from delta embedding gathered according to the indices that correspond
        to the week of each purchased item in the batch
        Shape must be (batch_size, latent_sizes["season"])

    Returns
    -------
    total_next_step_utilities: tf.Tensor
        Nex step utility of all the items in item_batch
        Shape must be (batch_size,)
    """
    total_next_step_utilities = tf.zeros_like(item_batch, dtype=tf.float32)
    # Compute the next step item utility for each element of the batch, one by one
    # TODO: avoid a for loop on ragged_basket_batch at a later stage
    for idx in tf.range(ragged_basket_batch.shape[0]):
        basket = tf.gather(ragged_basket_batch, idx)
        if len(basket) != 0 and basket[-1] == 0:
            # No thinking ahead when the basket ends already with the checkout item 0
            total_next_step_utilities = tf.tensor_scatter_nd_update(
                tensor=total_next_step_utilities, indices=[[idx]], updates=[0]
            )

        else:
            # Basket with the hypothetical current item
            next_basket = tf.concat([basket, [item_batch[idx]]], axis=0)
            # Get the list of available items based on the availability matrix
            item_ids = tf.range(self.n_items)
            available_mask = tf.equal(available_item_batch[idx], 1)
            assortment = tf.boolean_mask(item_ids, available_mask)
            hypothetical_next_purchases = tf.boolean_mask(
                assortment,
                ~tf.reduce_any(
                    tf.equal(tf.expand_dims(assortment, axis=1), next_basket), axis=1
                ),
            )
            # Check if there are still items to purchase during the next step
            if len(hypothetical_next_purchases) == 0:
                # No more items to purchase: next step impossible
                total_next_step_utilities = tf.tensor_scatter_nd_update(
                    tensor=total_next_step_utilities, indices=[[idx]], updates=[0]
                )
            else:
                # Compute the dot product along the last dimension between the embeddings
                # of the given store's theta and alpha of all the items
                hypothetical_store_preferences = tf.reduce_sum(
                    theta_store[idx] * self.alpha, axis=1
                )

                if self.item_intercept:
                    # Manually enforce the lambda of the checkout item to be 0
                    # (equivalent to translating the lambda values)
                    hypothetical_item_intercept = tf.concat([[0.0], self.lambda_], axis=0)
                else:
                    hypothetical_item_intercept = tf.zeros_like(hypothetical_store_preferences)

                if self.price_effects:
                    hypothetical_price_effects = (
                        -1
                        # Compute the dot product along the last dimension between
                        # the embeddings of the given store's gamma and beta
                        # of all the items
                        * tf.reduce_sum(gamma_store[idx] * self.beta, axis=1)
                        * tf.math.log(price_batch[idx] + self.epsilon_price)
                    )
                else:
                    hypothetical_price_effects = tf.zeros_like(hypothetical_store_preferences)

                if self.seasonal_effects:
                    # Compute the dot product along the last dimension between the embeddings
                    # of delta of the given week and mu of all the items
                    hypothetical_seasonal_effects = tf.reduce_sum(
                        delta_week[idx] * self.mu, axis=1
                    )
                else:
                    hypothetical_seasonal_effects = tf.zeros_like(
                        hypothetical_store_preferences
                    )

                # The effects of item intercept, store preferences, price sensitivity
                # and seasonal effects are combined in the per-item per-trip latent variable
                hypothetical_psi = tf.reduce_sum(
                    [
                        hypothetical_item_intercept,  # 0 if self.item_intercept is False
                        hypothetical_store_preferences,
                        hypothetical_price_effects,  # 0 if self.price_effects is False
                        hypothetical_seasonal_effects,  # 0 if self.seasonal_effects is False
                    ],
                    axis=0,
                )  # Shape: (n_items,)

                # Shape: (len(hypothetical_next_purchases),)
                next_psi = tf.gather(hypothetical_psi, indices=hypothetical_next_purchases)

                # Consider hypothetical "next" item one by one
                next_step_basket_interaction_utilities = tf.zeros(
                    (len(hypothetical_next_purchases),), dtype=tf.float32
                )
                for inner_idx in tf.range(len(hypothetical_next_purchases)):
                    next_item_id = tf.gather(hypothetical_next_purchases, inner_idx)
                    rho_next_item = tf.gather(
                        self.rho, indices=next_item_id
                    )  # Shape: (latent_size,)
                    # Gather the embeddings using a tensor of indices
                    # (before ensure that indices are integers)
                    next_alpha_by_basket = tf.gather(
                        self.alpha, indices=tf.cast(next_basket, dtype=tf.int32)
                    )  # Shape: (len(next_basket), latent_size)
                    # Divide the sum of alpha embeddings by the number of items
                    # in the basket of the next step (always > 0)
                    next_alpha_average = tf.reduce_sum(next_alpha_by_basket, axis=0) / tf.cast(
                        len(next_basket), dtype=tf.float32
                    )  # Shape: (latent_size,)
                    next_step_basket_interaction_utilities = tf.tensor_scatter_nd_update(
                        tensor=next_step_basket_interaction_utilities,
                        indices=[[inner_idx]],
                        # Compute the dot product along the last dimension, shape: (1,)
                        updates=[tf.reduce_sum(rho_next_item * next_alpha_average)],
                    )

                # Optimal next step: take the maximum utility among all possible next purchases
                next_step_utility = tf.reduce_max(
                    next_psi + next_step_basket_interaction_utilities, axis=0
                )  # Shape: (1,)
                total_next_step_utilities = tf.tensor_scatter_nd_update(
                    tensor=total_next_step_utilities,
                    indices=[[idx]],
                    updates=[next_step_utility],
                )

    return total_next_step_utilities  # Shape: (batch_size,)