Skip to content

Example of use of Latent Class MNL

Open In Colab

# Install necessary requirements

# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.
# Uncomment the following lines:

# !pip install choice-learn

# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:
import os
import sys

sys.path.append("../../")
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

import matplotlib as mpl
import numpy as np
import pandas as pd

import tensorflow as tf

Let's use the Electricity Dataset used in this tutorial.

from choice_learn.datasets import load_electricity

elec_dataset = load_electricity(as_frame=False)
from choice_learn.models.simple_mnl import SimpleMNL
from choice_learn.models.latent_class_mnl import LatentClassSimpleMNL
lc_model = LatentClassSimpleMNL(n_latent_classes=3, fit_method="mle", optimizer="lbfgs", epochs=1000, lbfgs_tolerance=1e-20)
hist, results = lc_model.fit(elec_dataset, verbose=1)
print("Latent Class Model weights:")
print("Classes Logits:", lc_model.latent_logits)
for i in range(3):
    print("\n")
    print(f"Model Nb {i}, weights:", lc_model.models[i].weights)
nll = (lc_model.evaluate(elec_dataset) * len(elec_dataset)).numpy()
print(f"Negative Log-Likelihood: {nll}")
report = lc_model.compute_report(elec_dataset)

def format_color_groups(df):
    cmap = mpl.cm.get_cmap("Set1")
    colors = [mpl.colors.rgb2hex(cmap(i)) for i in range(cmap.N)]
    x = df.copy()
    factors = list(x['Latent Class'].unique())
    i = 0
    for factor in factors:
        style = f'background-color: {colors[i]}'
        x.loc[x['Latent Class'] == factor, :] = style
        i += 1
    return x

report.style.apply(format_color_groups, axis=None)
Using L-BFGS optimizer, setting up .fit() function
Using L-BFGS optimizer, setting up .fit() function
Using L-BFGS optimizer, setting up .fit() function


/var/folders/zz/r1py7zhj35q75v09h8_42nzh0000gp/T/ipykernel_67121/1263996749.py:4: MatplotlibDeprecationWarning: The get_cmap function was deprecated in Matplotlib 3.7 and will be removed two minor releases later. Use ``matplotlib.colormaps[name]`` or ``matplotlib.colormaps.get_cmap(obj)`` instead.
  cmap = mpl.cm.get_cmap("Set1")
  Latent Class Coefficient Name Coefficient Estimation Std. Err z_value P(.>z)
0 0 Weights_items_features_0 -0.675645 0.023987 -28.167109 0.000000
1 0 Weights_items_features_1 -0.060604 0.008162 -7.424849 0.000000
2 0 Weights_items_features_2 1.851951 0.054914 33.724579 0.000000
3 0 Weights_items_features_3 1.322549 0.048159 27.462420 0.000000
4 0 Weights_items_features_4 -5.857089 0.191162 -30.639460 0.000000
5 0 Weights_items_features_5 -6.513206 0.195680 -33.285046 0.000000
6 1 Weights_items_features_0 -1.817566 0.077771 -23.370796 0.000000
7 1 Weights_items_features_1 -1.726365 0.058838 -29.340986 0.000000
8 1 Weights_items_features_2 3.696567 0.160258 23.066404 0.000000
9 1 Weights_items_features_3 4.111840 0.157179 26.160225 0.000000
10 1 Weights_items_features_4 -26.693516 3.274723 -8.151381 0.000000
11 1 Weights_items_features_5 -14.925840 0.634699 -23.516403 0.000000
12 2 Weights_items_features_0 -2.104791 0.104296 -20.181009 0.000000
13 2 Weights_items_features_1 -1.652622 0.073820 -22.387188 0.000000
14 2 Weights_items_features_2 -5.554287 0.245318 -22.641151 0.000000
15 2 Weights_items_features_3 -13.565555 0.544168 -24.928965 0.000000
16 2 Weights_items_features_4 -9.794930 0.631004 -15.522781 0.000000
17 2 Weights_items_features_5 -12.126673 0.681118 -17.804060 0.000000

Latent Conditional Logit

We used a very simple MNL. Here we simulate the same MNL, by using the Conditional-Logit formulation.\ Don't hesitate to read the conditional-MNL tutorial to better understand how to use this formulation.

from choice_learn.models.latent_class_mnl import LatentClassConditionalLogit
lc_model_2 = LatentClassConditionalLogit(n_latent_classes=3,
                                       fit_method="mle",
                                       optimizer="lbfgs",
                                       epochs=1000,
                                       lbfgs_tolerance=1e-12)

For each feature, let's add a coefficient that is shared by all items:

lc_model_2.add_shared_coefficient(coefficient_name="pf",
                                  feature_name="pf",
                                  items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="cl",
                                  feature_name="cl",
                                    items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="loc",
                                  feature_name="loc",
                                  items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="wk",
                                feature_name="wk",
                                items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="tod",
                                  feature_name="tod",
                                  items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="seas",
                                  feature_name="seas",
                                  items_indexes=[0, 1, 2, 3])
# Fit
hist2 = lc_model_2.fit(elec_dataset, verbose=1)
print("Negative Log-Likelihood:", lc_model_2.evaluate(elec_dataset)*len(elec_dataset))
print("Latent Class Model weights:")
print("Classes Logits:", lc_model_2.latent_logits)
for i in range(3):
    print("\n")
    print(f"Model Nb {i}, weights:", lc_model_2.models[i].trainable_weights)

Just like any ChoiceModel you can get the probabilities:

lc_model.predict_probas(elec_dataset[:4])

If you want to use more complex formulations of Latent Class models, you can directly use the BaseLatentClassModel from choice_learn.models.base_model:

from choice_learn.models.latent_class_base_model import BaseLatentClassModel
manual_lc = BaseLatentClassModel(
                                 model_class=SimpleMNL,
                                 model_parameters={"add_exit_choice": False},
                                 n_latent_classes=3,
                                 fit_method="mle",
                                 epochs=1000,
                                 optimizer="lbfgs",
                                 lbfgs_tolerance=1e-12
                                 )
manual_lc.instantiate(n_items=4,
                      n_shared_features=0,
                      n_items_features=6)
manual_hist = manual_lc.fit(elec_dataset, verbose=1)
print(manual_lc.evaluate(elec_dataset) * len(elec_dataset))

If you need to go deeper, you can look here to see different implementations that could help you.