SHOPPER model Usage

Introduction to basket modelling with SHOPPER

We use a synthetic dataset to demonstrate how to use the SHOPPER model.

# Install necessary requirements

# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.
# Uncomment the following lines:

# !pip install choice-learn

# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:
import sys

sys.path.append("../../")

import os
# Remove/Add GPU use
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import matplotlib.pyplot as plt
import numpy as np

from choice_learn.basket_models import Shopper, Trip

Dataset

For simplicity, we load a trip dataset whose creation is detailed in the notebook data.ipynb (link).

Please run the notebook data.ipynb to know more about this dataset.

from synthetic_dataset import get_dataset

data = get_dataset()

print(data)
print(f"\nThe TripDataset 'data' contains {data.n_items} distinct items that appear in {data.n_samples} transactions carried out at {data.n_stores} point of sale with {data.n_assortments} different assortments.")

Training SHOPPER model

Now we can fit a SHOPPER model.

# Hyperparameters

# Preferences and price effects are represented by latent variables of size 4 and 3, respectively.
latent_sizes = {"preferences": 4, "price": 3}
# We use 1 negative sample for each positive sample during the training phase.
n_negative_samples = 1
optimizer = "adam"
lr = 1e-2
epochs = 500
batch_size = 128

# Model: items fixed effect + items interactions + price effects
shopper = Shopper(
    item_intercept=False,
    price_effects=True,
    seasonal_effects=False,
    latent_sizes=latent_sizes,
    n_negative_samples=n_negative_samples,
    optimizer=optimizer,
    lr=lr,
    epochs=epochs,
    batch_size=batch_size,
)
# Feel free to explore other models by changing the hyperparameters!

The SHOPPER model can integrate store effects as well as seasonality. Check the documentation if you want to know more about it.

# Instantiate the model
shopper.instantiate(n_items=data.n_items)

# Train the model
history = shopper.fit(trip_dataset=data)

plt.plot(history["train_loss"])
plt.xlabel("Epoch")
plt.ylabel("Training Loss")
plt.legend()
plt.title("Training of SHOPPER model")
plt.show()

Inference with SHOPPER model

We evaluate the model on the validation dataset.

n_permutations = 2

# You can choose how many basket permutations are used to evaluate the model
nll = shopper.evaluate(data, n_permutations)

print(f"Mean negative log-likelihood on the dataset: {nll:.4f}.")

print("\nWe can see that the more complex the model, the lower the negative log-likelihood.")

We can also compute various utilities and probabilities.

item_batch_inference=np.array([2, 0, 4])
basket_inference = np.array([1, 3])
full_basket_inference = np.array([1, 3, 0])
price_inference = 5.
available_items_inference = np.ones(data.n_items)
available_items_inference[4] = 0  # Consider that item 4 is not available during inference
assortment_inference = np.array(
    [
        item_id for item_id in data.get_all_items() if available_items_inference[item_id] == 1
    ]
)

trip_inference = Trip(
    purchases=full_basket_inference,
    prices=np.random.uniform(1, 10, data.n_items),
    assortment=available_items_inference,
    store=0,  # 0 if not defined
    week=0,  # 0 if not defined
)

# Item utilities
item_utilities = shopper.compute_batch_utility(
    item_batch=item_batch_inference,
    basket_batch=np.tile(basket_inference, (3, 1)),
    store_batch=np.array([0]*3), # 0 if not defined
    week_batch=np.array([0]*3), # 0 if not defined
    price_batch=np.array([price_inference]*3),
    available_item_batch=np.tile(available_items_inference, (3, 1)),
)

print(
    f"Considering the assortment (ie the set of available items) {assortment_inference} with prices {price_inference},",
    f"and a basket with the items {basket_inference}.\n",
    f"Under these circumstances, the utility of the selected items are:"
)
for i, item_id in enumerate(item_batch_inference):
    if item_id == 0:
        print(f"    - Item {item_id} (checkout item): {item_utilities[i]:.4f}")
    else:
        print(f"    - Item {item_id}: {item_utilities[i]:.4f}")

item_likelihoods = shopper.compute_item_likelihood(trip=trip_inference)

print(
    f"Considering the assortment (ie the set of available items) {assortment_inference} with prices {price_inference},",
    f"and a basket with the items {basket_inference}.\n",
    f"Under these circumstances, the likelihoods that each item will be the next item added to the basket are:"
)
for i, item_id in enumerate(data.get_all_items()):
    if item_id == 0:
        print(f"    - Item {item_id} (checkout item, the customer decides to end his shopping trip): {item_likelihoods[i]:.4f}")
    else:
        print(f"    - Item {item_id}: {item_likelihoods[i]:.4f}")
print(f"\nN.B.: The item likelihoods sum to {np.sum(item_likelihoods):.4f}.")

# (Unordered) basket likelihood
n_permutations = 2
basket_likelihood = shopper.compute_basket_likelihood(trip=trip_inference, n_permutations=n_permutations)

print(f"Likelihood for (unordered) basket {full_basket_inference}: {basket_likelihood:.4f} (with {n_permutations} permutations to approximate all possible orders).")

# Ordered basket likelihood
basket_ordered_likelihood = shopper.compute_ordered_basket_likelihood(trip=trip_inference)

print(f"Likelihood for ordered basket {full_basket_inference}: {basket_ordered_likelihood:.4f}.")

# Ordered basket likelihood of the other permutation of the basket
basket_ordered_likelihood = shopper.compute_ordered_basket_likelihood(
    basket=np.array([3, 1, 0]),
    available_items=trip_inference.assortment,
    store=trip_inference.store,
    week=trip_inference.store,
    prices=trip_inference.prices,
)

print(f"Likelihood for ordered basket {[3, 1, 0]}: {basket_ordered_likelihood:.4f}.")