The Learning-MNL model

In this notebook we use choice-learn implementation of the L-MNL model (from the paper Enhance Discrete Choice Models with Representation Learning) to obtain the same results as presented by the authors.

# Install necessary requirements

# If you run this notebook on Google Colab, or in standalone mode, you need to install the required packages.
# Uncomment the following lines:

# !pip install choice-learn

# If you run the notebook within the GitHub repository, you need to run the following lines, that can skipped otherwise:
import os
import sys

sys.path.append("../../")

import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

import numpy as np
import pandas as pd

from choice_learn.datasets import load_swissmetro
from choice_learn.data import ChoiceDataset

test_df = pd.read_csv("https://raw.githubusercontent.com/BSifringer/EnhancedDCM/refs/heads/master/ready_example/swissmetro_paper/swissmetro_test.dat", sep="\t")
train_df = pd.read_csv("https://raw.githubusercontent.com/BSifringer/EnhancedDCM/refs/heads/master/ready_example/swissmetro_paper/swissmetro_train.dat", sep="\t")

test_df.head()

train_df.head()

# Preprocessing the dataset

test_df = test_df.loc[test_df.CAR_AV == 1]
test_df = test_df.loc[test_df.SM_AV == 1]
test_df = test_df.loc[test_df.TRAIN_AV == 1]

train_df = train_df.loc[train_df.CAR_AV == 1]
train_df = train_df.loc[train_df.SM_AV == 1]
train_df = train_df.loc[train_df.TRAIN_AV == 1]

# Normalizing values by 100
train_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] = (
    train_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] / 100.0
)

train_df[["TRAIN_HE", "SM_HE"]] = (
    train_df[["TRAIN_HE", "SM_HE"]] / 100.0
)

train_df["train_free_ticket"] = train_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)
train_df["sm_free_ticket"] = train_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)

train_df["TRAIN_travel_cost"] = train_df.apply(
    lambda row: (row["TRAIN_CO"] * (1 - row["train_free_ticket"])) / 100, axis=1
)
train_df["SM_travel_cost"] = train_df.apply(
    lambda row: (row["SM_CO"] * (1 - row["sm_free_ticket"])) / 100, axis=1
)
train_df["CAR_travel_cost"] = train_df.apply(lambda row: row["CAR_CO"] / 100, axis=1)


# Normalizing values by 100
test_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] = (
    test_df[["TRAIN_TT", "SM_TT", "CAR_TT"]] / 100.0
)

test_df[["TRAIN_HE", "SM_HE"]] = (
    test_df[["TRAIN_HE", "SM_HE"]] / 100.0
)

test_df["train_free_ticket"] = test_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)
test_df["sm_free_ticket"] = test_df.apply(
    lambda row: (row["GA"] == 1).astype(int), axis=1
)

test_df["TRAIN_travel_cost"] = test_df.apply(
    lambda row: (row["TRAIN_CO"] * (1 - row["train_free_ticket"])) / 100, axis=1
)
test_df["SM_travel_cost"] = test_df.apply(
    lambda row: (row["SM_CO"] * (1 - row["sm_free_ticket"])) / 100, axis=1
)
test_df["CAR_travel_cost"] = test_df.apply(lambda row: row["CAR_CO"] / 100, axis=1)

swiss_df.SM_SEATS = swiss_df.SM_SEATS.astype("float32")
train_df.SM_SEATS = train_df.SM_SEATS.astype("float32")
test_df.SM_SEATS = test_df.SM_SEATS.astype("float32")

train_df.CHOICE = train_df.CHOICE - 1
test_df.CHOICE = test_df.CHOICE - 1

dataset = ChoiceDataset.from_single_wide_df(df=swiss_df, choices_column="CHOICE", items_id=["TRAIN", "SM", "CAR"],
shared_features_columns=["GA", "AGE", "SM_SEATS", "LUGGAGE", "SM_SEATS", 'PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'],
items_features_suffixes=["TT", "travel_cost", "HE"], choice_format="items_index")

train_dataset = ChoiceDataset.from_single_wide_df(df=train_df, choices_column="CHOICE", items_id=["TRAIN", "SM", "CAR"],
shared_features_columns=["GA", "AGE", "SM_SEATS", "LUGGAGE", "SM_SEATS", 'PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'],
items_features_suffixes=["TT", "travel_cost", "HE"], choice_format="items_index")

test_dataset = ChoiceDataset.from_single_wide_df(df=test_df, choices_column="CHOICE", items_id=["TRAIN", "SM", "CAR"],
shared_features_columns=["GA", "AGE", "SM_SEATS", "LUGGAGE", "SM_SEATS", 'PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'],
items_features_suffixes=["TT", "travel_cost", "HE"], choice_format="items_index")

print(len(train_dataset), len(test_dataset))

from choice_learn.models import ConditionalLogit

clogit = ConditionalLogit()
clogit.add_shared_coefficient(feature_name="TT", coefficient_name="beta_time",  items_indexes=[0, 1, 2])
clogit.add_shared_coefficient(feature_name="travel_cost", coefficient_name="beta_cost", items_indexes=[0, 1, 2])
clogit.add_shared_coefficient(feature_name="HE",  coefficient_name="beta_freq",items_indexes=[0, 1])
clogit.add_shared_coefficient(feature_name="GA",  coefficient_name="beta_GA",items_indexes=[0, 1])
clogit.add_shared_coefficient(feature_name="AGE", coefficient_name="beta_age", items_indexes=[0])
clogit.add_shared_coefficient(feature_name="LUGGAGE", coefficient_name="beta_luggage", items_indexes=[2])
clogit.add_shared_coefficient(feature_name="SM_SEATS", coefficient_name="beta_seats", items_indexes=[1])
clogit.add_coefficients(feature_name="intercept", items_indexes=[1, 2])

clogit.fit(train_dataset)

clogit.trainable_weights

[<tf.Variable 'beta_time:0' shape=(1, 1) dtype=float32, numpy=array([[-1.2917504]], dtype=float32)>,
 <tf.Variable 'beta_cost:0' shape=(1, 1) dtype=float32, numpy=array([[-0.69039553]], dtype=float32)>,
 <tf.Variable 'beta_freq:0' shape=(1, 1) dtype=float32, numpy=array([[-0.7038978]], dtype=float32)>,
 <tf.Variable 'beta_GA:0' shape=(1, 1) dtype=float32, numpy=array([[1.5400317]], dtype=float32)>,
 <tf.Variable 'beta_age:0' shape=(1, 1) dtype=float32, numpy=array([[0.17458557]], dtype=float32)>,
 <tf.Variable 'beta_luggage:0' shape=(1, 1) dtype=float32, numpy=array([[-0.11316068]], dtype=float32)>,
 <tf.Variable 'beta_seats:0' shape=(1, 1) dtype=float32, numpy=array([[0.21595138]], dtype=float32)>,
 <tf.Variable 'beta_intercept:0' shape=(1, 2) dtype=float32, numpy=array([[1.1861055, 1.203332 ]], dtype=float32)>]

clogit.evaluate(train_dataset) * len(train_dataset), clogit.evaluate(test_dataset) * len(test_dataset)

(<tf.Tensor: shape=(), dtype=float32, numpy=5766.751>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1433.0833>)

from choice_learn.models.learning_mnl import LearningMNL

swiss_model = LearningMNL(optimizer="Adam", lr=0.005,
nn_features=['PURPOSE', 'FIRST', 'TICKET', 'WHO', 'MALE', 'INCOME', 'ORIGIN', 'DEST'], nn_layers_widths=[200], epochs=200, batch_size=32)
swiss_model.add_shared_coefficient(feature_name="TT", items_indexes=[0, 1, 2])
swiss_model.add_shared_coefficient(feature_name="travel_cost", items_indexes=[0, 1, 2])
swiss_model.add_shared_coefficient(feature_name="HE", items_indexes=[0, 1])
swiss_model.add_shared_coefficient(feature_name="GA", items_indexes=[0, 1])
swiss_model.add_shared_coefficient(feature_name="AGE", items_indexes=[0])
swiss_model.add_shared_coefficient(feature_name="LUGGAGE", items_indexes=[2])
swiss_model.add_shared_coefficient(feature_name="SM_SEATS", items_indexes=[1])
swiss_model.add_coefficients(feature_name="intercept", items_indexes=[1, 2])

hist = swiss_model.fit(train_dataset, val_dataset=test_dataset, verbose=1)

swiss_model.assign_lr(0.001)
hist2 = swiss_model.fit(train_dataset, val_dataset=test_dataset, verbose=1)

 swiss_model.evaluate(train_dataset, batch_size=32) * 7234

<tf.Tensor: shape=(), dtype=float32, numpy=4485.126>

swiss_model.evaluate(test_dataset, batch_size=32) * 1802

<tf.Tensor: shape=(), dtype=float32, numpy=1165.2987>

# Estimated parameters:
swiss_model.trainable_weights[:8]

[<tf.Variable 'beta_TT:0' shape=(1, 1) dtype=float32, numpy=array([[-1.6142633]], dtype=float32)>,
 <tf.Variable 'beta_travel_cost:0' shape=(1, 1) dtype=float32, numpy=array([[-1.536603]], dtype=float32)>,
 <tf.Variable 'beta_HE:0' shape=(1, 1) dtype=float32, numpy=array([[-0.9000557]], dtype=float32)>,
 <tf.Variable 'beta_GA:0' shape=(1, 1) dtype=float32, numpy=array([[0.7521398]], dtype=float32)>,
 <tf.Variable 'beta_AGE:0' shape=(1, 1) dtype=float32, numpy=array([[0.37502143]], dtype=float32)>,
 <tf.Variable 'beta_LUGGAGE:0' shape=(1, 1) dtype=float32, numpy=array([[0.17362104]], dtype=float32)>,
 <tf.Variable 'beta_SM_SEATS:0' shape=(1, 1) dtype=float32, numpy=array([[0.01463094]], dtype=float32)>,
 <tf.Variable 'beta_intercept:0' shape=(1, 2) dtype=float32, numpy=array([[0.38485298, 0.13572218]], dtype=float32)>]

Results are very similar to the one presented in the paper (see Table 7).