import arviz as az
import numpy as np  # For vectorized math operations
import pandas as pd  # For file input/output
import pymc as pm
import pytensor.tensor as pt

from matplotlib import pyplot as plt
from matplotlib.lines import Line2D


%config InlineBackend.figure_format = 'retina'  # high resolution figures
az.style.use("arviz-darkgrid")
rng = np.random.default_rng(42)


try:
    wide_heating_df = pd.read_csv("../data/heating_data_r.csv")
except:
    wide_heating_df = pd.read_csv(pm.get_data("heating_data_r.csv"))

wide_heating_df[wide_heating_df["idcase"] == 1]


try:
    long_heating_df = pd.read_csv("../data/long_heating_data.csv")
except:
    long_heating_df = pd.read_csv(pm.get_data("long_heating_data.csv"))

columns = [c for c in long_heating_df.columns if c != "Unnamed: 0"]
long_heating_df[long_heating_df["idcase"] == 1][columns]


N = wide_heating_df.shape[0]
observed = pd.Categorical(wide_heating_df["depvar"]).codes
coords = {
    "alts_probs": ["ec", "er", "gc", "gr", "hp"],
    "obs": range(N),
}

with pm.Model(coords=coords) as model_1:
    beta_ic = pm.Normal("beta_ic", 0, 1)
    beta_oc = pm.Normal("beta_oc", 0, 1)

    ## Construct Utility matrix and Pivot
    u0 = beta_ic * wide_heating_df["ic.ec"] + beta_oc * wide_heating_df["oc.ec"]
    u1 = beta_ic * wide_heating_df["ic.er"] + beta_oc * wide_heating_df["oc.er"]
    u2 = beta_ic * wide_heating_df["ic.gc"] + beta_oc * wide_heating_df["oc.gc"]
    u3 = beta_ic * wide_heating_df["ic.gr"] + beta_oc * wide_heating_df["oc.gr"]
    u4 = np.zeros(N)  # Outside Good
    s = pm.math.stack([u0, u1, u2, u3, u4]).T

    ## Apply Softmax Transform
    p_ = pm.Deterministic("p", pm.math.softmax(s, axis=1), dims=("obs", "alts_probs"))

    ## Likelihood
    choice_obs = pm.Categorical("y_cat", p=p_, observed=observed, dims="obs")

    idata_m1 = pm.sample_prior_predictive()
    idata_m1.extend(
        pm.sample(nuts_sampler="numpyro", idata_kwargs={"log_likelihood": True}, random_seed=101)
    )
    idata_m1.extend(pm.sample_posterior_predictive(idata_m1))

pm.model_to_graphviz(model_1)

Sampling: [beta_ic, beta_oc, y_cat]
/Users/nathanielforde/mambaforge/envs/pymc_examples_new/lib/python3.9/site-packages/pymc/sampling/mcmc.py:243: UserWarning: Use of external NUTS sampler is still experimental
  warnings.warn("Use of external NUTS sampler is still experimental", UserWarning)

Compiling...
Compilation time =  0:00:01.199248
Sampling...

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Sampling time =  0:00:02.165671
Transforming variables...
Transformation time =  0:00:00.404064
Computing Log Likelihood...


idata_m1

<xarray.Dataset>
Dimensions:     (chain: 4, draw: 1000, obs: 900, alts_probs: 5)
Coordinates:
  * chain       (chain) int64 0 1 2 3
  * draw        (draw) int64 0 1 2 3 4 5 6 7 ... 992 993 994 995 996 997 998 999
  * obs         (obs) int64 0 1 2 3 4 5 6 7 ... 892 893 894 895 896 897 898 899
  * alts_probs  (alts_probs) <U2 'ec' 'er' 'gc' 'gr' 'hp'
Data variables:
    beta_ic     (chain, draw) float64 0.001854 0.001794 ... 0.002087 0.001928
    beta_oc     (chain, draw) float64 -0.003802 -0.003846 ... -0.00447 -0.004533
    p           (chain, draw, obs, alts_probs) float64 0.07322 0.1129 ... 0.1111
Attributes:
    created_at:     2023-07-13T17:29:12.988497
    arviz_version:  0.15.1

array([0, 1, 2, 3])

array([  0,   1,   2, ..., 997, 998, 999])

array([  0,   1,   2, ..., 897, 898, 899])

array(['ec', 'er', 'gc', 'gr', 'hp'], dtype='<U2')

array([[0.00185432, 0.0017936 , 0.00168995, ..., 0.00179124, 0.0017465 ,
        0.00167666],
       [0.00177376, 0.00165435, 0.00215904, ..., 0.00198785, 0.00193043,
        0.00181715],
       [0.00185793, 0.00232339, 0.00194828, ..., 0.00203171, 0.0021245 ,
        0.00171454],
       [0.0018481 , 0.00185684, 0.00183757, ..., 0.00195163, 0.00208711,
        0.00192813]])

array([[-0.00380192, -0.00384633, -0.00387485, ..., -0.00462905,
        -0.00462057, -0.00383906],
       [-0.00407646, -0.00419749, -0.00384315, ..., -0.00435413,
        -0.00422983, -0.00419225],
       [-0.00389835, -0.00446452, -0.00464509, ..., -0.00496631,
        -0.00477238, -0.00364794],
       [-0.00398744, -0.00407702, -0.00393378, ..., -0.00416899,
        -0.00447043, -0.00453276]])

array([[[[0.07322309, 0.11294964, 0.2841128 , 0.40787089, 0.12184357],
         [0.09165709, 0.1249422 , 0.30704356, 0.32518664, 0.1511705 ],
         [0.10218969, 0.16264231, 0.23121714, 0.36117826, 0.14277259],
         ...,
         [0.08249413, 0.13373172, 0.23616285, 0.44085863, 0.10675267],
         [0.06839877, 0.17788318, 0.22552951, 0.42617996, 0.10200859],
         [0.08482953, 0.13414628, 0.27236361, 0.40738278, 0.1012778 ]],

        [[0.07191539, 0.11025465, 0.28335124, 0.40526019, 0.12921853],
         [0.08962555, 0.12163123, 0.30624134, 0.32372789, 0.15877399],
         [0.10060054, 0.15861184, 0.23209332, 0.35897101, 0.14972329],
         ...,
         [0.08155359, 0.13079867, 0.23744346, 0.43673993, 0.11346434],
         [0.06799272, 0.174111  , 0.2262014 , 0.42287908, 0.1088158 ],
         [0.08351165, 0.1314237 , 0.27347175, 0.40338936, 0.10820353]],

        [[0.0707887 , 0.10715561, 0.28156113, 0.39923231, 0.14126225],
         [0.08744061, 0.11758153, 0.30394884, 0.32027513, 0.17075389],
         [0.09898362, 0.15332404, 0.23304048, 0.35392342, 0.16072844],
         ...,
...
         ...,
         [0.07434766, 0.12424822, 0.23666514, 0.45907092, 0.10566806],
         [0.06105228, 0.16958401, 0.22476353, 0.4435739 , 0.10102629],
         [0.07642407, 0.12504988, 0.27629164, 0.42177755, 0.10045687]],

        [[0.05882476, 0.0966925 , 0.28953088, 0.43895928, 0.11599257],
         [0.07701313, 0.10985028, 0.3211502 , 0.34258706, 0.14939933],
         [0.08840643, 0.15014045, 0.23352096, 0.38783924, 0.14009291],
         ...,
         [0.06805284, 0.11789883, 0.2356187 , 0.4787461 , 0.09968353],
         [0.05517125, 0.164697  , 0.22310792, 0.46191585, 0.095108  ],
         [0.07017311, 0.11890053, 0.2785132 , 0.43781697, 0.0945962 ]],

        [[0.05711837, 0.09215532, 0.28711905, 0.42994953, 0.13365773],
         [0.0737155 , 0.10374125, 0.31766931, 0.33720999, 0.16766396],
         [0.08587985, 0.14203293, 0.23520541, 0.38005886, 0.15682296],
         ...,
         [0.06705557, 0.11277456, 0.238708  , 0.46575398, 0.11570788],
         [0.05516607, 0.15734329, 0.22488893, 0.45122393, 0.11137777],
         [0.06842681, 0.11410865, 0.28072918, 0.42561475, 0.11112061]]]])

PandasIndex(Int64Index([0, 1, 2, 3], dtype='int64', name='chain'))

PandasIndex(Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            990, 991, 992, 993, 994, 995, 996, 997, 998, 999],
           dtype='int64', name='draw', length=1000))


summaries = az.summary(idata_m1, var_names=["beta_ic", "beta_oc"])
summaries


## marginal rate of substitution for a reduction in installation costs
post = az.extract(idata_m1)
substitution_rate = post["beta_oc"] / post["beta_ic"]
substitution_rate.mean().item()

-2.215565260325244


fig, ax = plt.subplots(figsize=(20, 10))

ax.hist(
    substitution_rate,
    bins=30,
    ec="black",
)
ax.set_title("Uncertainty in Marginal Rate of Substitution \n Operating Costs / Installation Costs");


idata_m1["posterior"]["p"].mean(dim=["chain", "draw", "obs"])

<xarray.DataArray 'p' (alts_probs: 5)>
array([0.08414602, 0.13748865, 0.26918857, 0.38213088, 0.12704589])
Coordinates:
  * alts_probs  (alts_probs) <U2 'ec' 'er' 'gc' 'gr' 'hp'

array([0.08414602, 0.13748865, 0.26918857, 0.38213088, 0.12704589])

array(['ec', 'er', 'gc', 'gr', 'hp'], dtype='<U2')

PandasIndex(Index(['ec', 'er', 'gc', 'gr', 'hp'], dtype='object', name='alts_probs'))


fig, axs = plt.subplots(1, 2, figsize=(20, 10))
ax = axs[0]
counts = wide_heating_df.groupby("depvar")["idcase"].count()
predicted_shares = idata_m1["posterior"]["p"].mean(dim=["chain", "draw", "obs"])
ci_lb = idata_m1["posterior"]["p"].quantile(0.025, dim=["chain", "draw", "obs"])
ci_ub = idata_m1["posterior"]["p"].quantile(0.975, dim=["chain", "draw", "obs"])
ax.scatter(ci_lb, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(ci_ub, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(
    counts / counts.sum(),
    ["ec", "er", "gc", "gr", "hp"],
    label="Observed Shares",
    color="red",
    s=100,
)
ax.hlines(
    ["ec", "er", "gc", "gr", "hp"], ci_lb, ci_ub, label="Predicted 95% Interval", color="black"
)
ax.legend()
ax.set_title("Observed V Predicted Shares")
az.plot_ppc(idata_m1, ax=axs[1])
axs[1].set_title("Posterior Predictive Checks")
ax.set_xlabel("Shares")
ax.set_ylabel("Heating System");


N = wide_heating_df.shape[0]
observed = pd.Categorical(wide_heating_df["depvar"]).codes

coords = {
    "alts_intercepts": ["ec", "er", "gc", "gr"],
    "alts_probs": ["ec", "er", "gc", "gr", "hp"],
    "obs": range(N),
}
with pm.Model(coords=coords) as model_2:
    beta_ic = pm.Normal("beta_ic", 0, 1)
    beta_oc = pm.Normal("beta_oc", 0, 1)
    alphas = pm.Normal("alpha", 0, 1, dims="alts_intercepts")

    ## Construct Utility matrix and Pivot using an intercept per alternative
    u0 = alphas[0] + beta_ic * wide_heating_df["ic.ec"] + beta_oc * wide_heating_df["oc.ec"]
    u1 = alphas[1] + beta_ic * wide_heating_df["ic.er"] + beta_oc * wide_heating_df["oc.er"]
    u2 = alphas[2] + beta_ic * wide_heating_df["ic.gc"] + beta_oc * wide_heating_df["oc.gc"]
    u3 = alphas[3] + beta_ic * wide_heating_df["ic.gr"] + beta_oc * wide_heating_df["oc.gr"]
    u4 = np.zeros(N)  # Outside Good
    s = pm.math.stack([u0, u1, u2, u3, u4]).T

    ## Apply Softmax Transform
    p_ = pm.Deterministic("p", pm.math.softmax(s, axis=1), dims=("obs", "alts_probs"))

    ## Likelihood
    choice_obs = pm.Categorical("y_cat", p=p_, observed=observed, dims="obs")

    idata_m2 = pm.sample_prior_predictive()
    idata_m2.extend(
        pm.sample(nuts_sampler="numpyro", idata_kwargs={"log_likelihood": True}, random_seed=103)
    )
    idata_m2.extend(pm.sample_posterior_predictive(idata_m2))


pm.model_to_graphviz(model_2)

Sampling: [alpha, beta_ic, beta_oc, y_cat]
/Users/nathanielforde/mambaforge/envs/pymc_examples_new/lib/python3.9/site-packages/pymc/sampling/mcmc.py:243: UserWarning: Use of external NUTS sampler is still experimental
  warnings.warn("Use of external NUTS sampler is still experimental", UserWarning)

Compiling...
Compilation time =  0:00:01.339706
Sampling...

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Sampling time =  0:00:15.309789
Transforming variables...
Transformation time =  0:00:00.289041
Computing Log Likelihood...


az.summary(idata_m2, var_names=["beta_ic", "beta_oc", "alpha"])


fig, axs = plt.subplots(1, 2, figsize=(20, 10))
ax = axs[0]
counts = wide_heating_df.groupby("depvar")["idcase"].count()
predicted_shares = idata_m2["posterior"]["p"].mean(dim=["chain", "draw", "obs"])
ci_lb = idata_m2["posterior"]["p"].quantile(0.025, dim=["chain", "draw", "obs"])
ci_ub = idata_m2["posterior"]["p"].quantile(0.975, dim=["chain", "draw", "obs"])

ax.scatter(ci_lb, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(ci_ub, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(
    counts / counts.sum(),
    ["ec", "er", "gc", "gr", "hp"],
    label="Observed Shares",
    color="red",
    s=100,
)
ax.hlines(
    ["ec", "er", "gc", "gr", "hp"], ci_lb, ci_ub, label="Predicted 95% Interval", color="black"
)
ax.legend()
ax.set_title("Observed V Predicted Shares")
az.plot_ppc(idata_m2, ax=axs[1])
axs[1].set_title("Posterior Predictive Checks")
ax.set_xlabel("Shares")
ax.set_ylabel("Heating System");


coords = {
    "alts_intercepts": ["ec", "er", "gc", "gr"],
    "alts_probs": ["ec", "er", "gc", "gr", "hp"],
    "obs": range(N),
}
with pm.Model(coords=coords) as model_3:
    ## Add data to experiment with changes later.
    ic_ec = pm.MutableData("ic_ec", wide_heating_df["ic.ec"])
    oc_ec = pm.MutableData("oc_ec", wide_heating_df["oc.ec"])
    ic_er = pm.MutableData("ic_er", wide_heating_df["ic.er"])
    oc_er = pm.MutableData("oc_er", wide_heating_df["oc.er"])

    beta_ic = pm.Normal("beta_ic", 0, 1)
    beta_oc = pm.Normal("beta_oc", 0, 1)
    beta_income = pm.Normal("beta_income", 0, 1, dims="alts_intercepts")
    chol, corr, stds = pm.LKJCholeskyCov(
        "chol", n=4, eta=2.0, sd_dist=pm.Exponential.dist(1.0, shape=4)
    )
    alphas = pm.MvNormal("alpha", mu=0, chol=chol, dims="alts_intercepts")

    u0 = alphas[0] + beta_ic * ic_ec + beta_oc * oc_ec + beta_income[0] * wide_heating_df["income"]
    u1 = alphas[1] + beta_ic * ic_er + beta_oc * oc_er + beta_income[1] * wide_heating_df["income"]
    u2 = (
        alphas[2]
        + beta_ic * wide_heating_df["ic.gc"]
        + beta_oc * wide_heating_df["oc.gc"]
        + beta_income[2] * wide_heating_df["income"]
    )
    u3 = (
        alphas[3]
        + beta_ic * wide_heating_df["ic.gr"]
        + beta_oc * wide_heating_df["oc.gr"]
        + beta_income[3] * wide_heating_df["income"]
    )
    u4 = np.zeros(N)  # pivot
    s = pm.math.stack([u0, u1, u2, u3, u4]).T

    p_ = pm.Deterministic("p", pm.math.softmax(s, axis=1), dims=("obs", "alts_probs"))
    choice_obs = pm.Categorical("y_cat", p=p_, observed=observed, dims="obs")

    idata_m3 = pm.sample_prior_predictive()
    idata_m3.extend(
        pm.sample(nuts_sampler="numpyro", idata_kwargs={"log_likelihood": True}, random_seed=100)
    )
    idata_m3.extend(pm.sample_posterior_predictive(idata_m3))


pm.model_to_graphviz(model_3)

Sampling: [alpha, beta_ic, beta_income, beta_oc, chol, y_cat]
/Users/nathanielforde/mambaforge/envs/pymc_examples_new/lib/python3.9/site-packages/pymc/sampling/mcmc.py:243: UserWarning: Use of external NUTS sampler is still experimental
  warnings.warn("Use of external NUTS sampler is still experimental", UserWarning)

Compiling...
Compilation time =  0:00:04.533953
Sampling...

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Sampling time =  0:00:30.042792
Transforming variables...
Transformation time =  0:00:00.594139
Computing Log Likelihood...


fig, axs = plt.subplots(1, 2, figsize=(20, 10))
ax = axs[0]
counts = wide_heating_df.groupby("depvar")["idcase"].count()
predicted_shares = idata_m3["posterior"]["p"].mean(dim=["chain", "draw", "obs"])
ci_lb = idata_m3["posterior"]["p"].quantile(0.025, dim=["chain", "draw", "obs"])
ci_ub = idata_m3["posterior"]["p"].quantile(0.975, dim=["chain", "draw", "obs"])

ax.scatter(ci_lb, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(ci_ub, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(
    counts / counts.sum(),
    ["ec", "er", "gc", "gr", "hp"],
    label="Observed Shares",
    color="red",
    s=100,
)
ax.hlines(
    ["ec", "er", "gc", "gr", "hp"], ci_lb, ci_ub, label="Predicted 95% Interval", color="black"
)
ax.legend()
ax.set_title("Observed V Predicted Shares")
az.plot_ppc(idata_m3, ax=axs[1])
axs[1].set_title("Posterior Predictive Checks")
ax.set_xlabel("Shares")
ax.set_ylabel("Heating System");


az.summary(
    idata_m3, var_names=["beta_income", "beta_ic", "beta_oc", "alpha", "chol_corr"], round_to=4
)

/Users/nathanielforde/mambaforge/envs/pymc_examples_new/lib/python3.9/site-packages/arviz/stats/diagnostics.py:592: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


post = az.extract(idata_m3)
substitution_rate = post["beta_oc"] / post["beta_ic"]
substitution_rate.mean().item()

17.581376035151784


with model_3:
    # update values of predictors with new 20% price increase in operating costs for electrical options
    pm.set_data({"oc_ec": wide_heating_df["oc.ec"] * 1.2, "oc_er": wide_heating_df["oc.er"] * 1.2})
    # use the updated values and predict outcomes and probabilities:
    idata_new_policy = pm.sample_posterior_predictive(
        idata_m3,
        var_names=["p", "y_cat"],
        return_inferencedata=True,
        predictions=True,
        extend_inferencedata=False,
        random_seed=100,
    )

idata_new_policy

<xarray.Dataset>
Dimensions:     (chain: 4, draw: 1000, obs: 900, alts_probs: 5)
Coordinates:
  * chain       (chain) int64 0 1 2 3
  * draw        (draw) int64 0 1 2 3 4 5 6 7 ... 992 993 994 995 996 997 998 999
  * obs         (obs) int64 0 1 2 3 4 5 6 7 ... 892 893 894 895 896 897 898 899
  * alts_probs  (alts_probs) <U2 'ec' 'er' 'gc' 'gr' 'hp'
Data variables:
    p           (chain, draw, obs, alts_probs) float64 0.04832 ... 0.05605
    y_cat       (chain, draw, obs) int64 2 4 4 2 1 2 2 2 2 ... 2 3 3 3 2 1 2 4 2
Attributes:
    created_at:                 2023-07-13T17:30:54.332027
    arviz_version:              0.15.1
    inference_library:          pymc
    inference_library_version:  5.3.0

array([0, 1, 2, 3])

array([  0,   1,   2, ..., 997, 998, 999])

array([  0,   1,   2, ..., 897, 898, 899])

array(['ec', 'er', 'gc', 'gr', 'hp'], dtype='<U2')

array([[[[0.04832002, 0.0752078 , 0.68683941, 0.12152489, 0.06810788],
         [0.04476203, 0.07039096, 0.68099044, 0.13176806, 0.07208852],
         [0.05135906, 0.08271831, 0.64560802, 0.14763801, 0.07267661],
         ...,
         [0.05014134, 0.07771716, 0.66259606, 0.13810671, 0.07143873],
         [0.05050141, 0.09423651, 0.637604  , 0.14221295, 0.07544514],
         [0.03778261, 0.06934743, 0.64020447, 0.16589454, 0.08677095]],

        [[0.04233181, 0.06143711, 0.7111071 , 0.12979504, 0.05532895],
         [0.03947445, 0.05478907, 0.70784896, 0.13031194, 0.06757557],
         [0.05063401, 0.07792033, 0.62771886, 0.17004312, 0.07368368],
         ...,
         [0.04599708, 0.06829343, 0.65431144, 0.17033189, 0.06106616],
         [0.04250738, 0.09758275, 0.62190542, 0.17427809, 0.06372637],
         [0.02895578, 0.05159981, 0.63847768, 0.20485859, 0.07610814]],

        [[0.03431323, 0.05354092, 0.73089291, 0.12294999, 0.05830295],
         [0.03771595, 0.04837363, 0.71414228, 0.12773264, 0.0720355 ],
         [0.05356146, 0.07272789, 0.6164005 , 0.17813399, 0.07917616],
         ...,
...
         ...,
         [0.05726533, 0.06342393, 0.66066572, 0.1516834 , 0.06696161],
         [0.05086387, 0.08283043, 0.64817392, 0.15153051, 0.06660127],
         [0.03979472, 0.07152408, 0.65481527, 0.17632445, 0.05754148]],

        [[0.05094682, 0.07403621, 0.67371889, 0.13510685, 0.06619123],
         [0.05190867, 0.0751801 , 0.66746741, 0.14668949, 0.05875433],
         [0.05890911, 0.08693934, 0.63771287, 0.161988  , 0.05445068],
         ...,
         [0.05589228, 0.08072595, 0.65194827, 0.15355613, 0.05787737],
         [0.0557648 , 0.09253297, 0.63544208, 0.15638201, 0.05987814],
         [0.05083981, 0.08123078, 0.63433452, 0.1824528 , 0.0511421 ]],

        [[0.05265113, 0.05813257, 0.68305352, 0.15017725, 0.05598552],
         [0.04728697, 0.05506159, 0.68930742, 0.15935793, 0.0489861 ],
         [0.05892811, 0.06923376, 0.65607418, 0.1716434 , 0.04412055],
         ...,
         [0.05729577, 0.06241275, 0.67345633, 0.15633063, 0.05050452],
         [0.06081723, 0.0825264 , 0.63398358, 0.16625131, 0.05642149],
         [0.03967079, 0.05927182, 0.66208718, 0.18291914, 0.05605107]]]])

array([[[2, 4, 4, ..., 3, 2, 2],
        [2, 0, 0, ..., 2, 2, 2],
        [2, 2, 2, ..., 2, 2, 2],
        ...,
        [3, 2, 1, ..., 2, 2, 2],
        [2, 2, 2, ..., 2, 2, 2],
        [0, 2, 2, ..., 2, 2, 2]],

       [[0, 2, 2, ..., 2, 0, 2],
        [2, 0, 2, ..., 2, 0, 1],
        [4, 2, 2, ..., 2, 3, 4],
        ...,
        [2, 2, 2, ..., 2, 2, 2],
        [2, 3, 2, ..., 3, 0, 2],
        [2, 2, 2, ..., 2, 2, 2]],

       [[2, 2, 1, ..., 2, 2, 3],
        [2, 2, 4, ..., 1, 0, 1],
        [2, 0, 4, ..., 2, 4, 3],
        ...,
        [4, 4, 2, ..., 2, 1, 2],
        [2, 4, 2, ..., 2, 2, 2],
        [3, 3, 3, ..., 2, 4, 2]],

       [[2, 2, 3, ..., 1, 2, 2],
        [2, 2, 2, ..., 2, 2, 1],
        [2, 2, 2, ..., 2, 2, 2],
        ...,
        [2, 0, 2, ..., 2, 1, 2],
        [2, 2, 2, ..., 3, 2, 3],
        [2, 3, 1, ..., 2, 4, 2]]])

PandasIndex(Int64Index([0, 1, 2, 3], dtype='int64', name='chain'))

PandasIndex(Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            990, 991, 992, 993, 994, 995, 996, 997, 998, 999],
           dtype='int64', name='draw', length=1000))

PandasIndex(Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            890, 891, 892, 893, 894, 895, 896, 897, 898, 899],
           dtype='int64', name='obs', length=900))


idata_new_policy["predictions"]["p"].mean(dim=["chain", "draw", "obs"])

<xarray.DataArray 'p' (alts_probs: 5)>
array([0.05383866, 0.07239016, 0.66253495, 0.1482966 , 0.06293963])
Coordinates:
  * alts_probs  (alts_probs) <U2 'ec' 'er' 'gc' 'gr' 'hp'

array([0.05383866, 0.07239016, 0.66253495, 0.1482966 , 0.06293963])

array(['ec', 'er', 'gc', 'gr', 'hp'], dtype='<U2')

PandasIndex(Index(['ec', 'er', 'gc', 'gr', 'hp'], dtype='object', name='alts_probs'))


fig, ax = plt.subplots(1, figsize=(20, 10))
counts = wide_heating_df.groupby("depvar")["idcase"].count()
new_predictions = idata_new_policy["predictions"]["p"].mean(dim=["chain", "draw", "obs"]).values
ci_lb = idata_m3["posterior"]["p"].quantile(0.025, dim=["chain", "draw", "obs"])
ci_ub = idata_m3["posterior"]["p"].quantile(0.975, dim=["chain", "draw", "obs"])
ax.scatter(ci_lb, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(ci_ub, ["ec", "er", "gc", "gr", "hp"], color="k", s=2)
ax.scatter(
    new_predictions,
    ["ec", "er", "gc", "gr", "hp"],
    color="green",
    label="New Policy Predicted Share",
    s=100,
)
ax.scatter(
    counts / counts.sum(),
    ["ec", "er", "gc", "gr", "hp"],
    label="Observed Shares",
    color="red",
    s=100,
)
ax.hlines(
    ["ec", "er", "gc", "gr", "hp"],
    ci_lb,
    ci_ub,
    label="Predicted 95% Credible Interval Old Policy",
    color="black",
)
ax.set_title("Predicted Market Shares under Old and New Pricing Policy", fontsize=20)
ax.set_xlabel("Market Share")
ax.legend();


compare = az.compare({"m1": idata_m1, "m2": idata_m2, "m3": idata_m3})
compare


az.plot_compare(compare)

<Axes: title={'center': 'Model comparison\nhigher is better'}, xlabel='elpd_loo (log)', ylabel='ranked models'>


try:
    c_df = pd.read_csv("../data/cracker_choice_short.csv")
except:
    c_df = pd.read_csv(pm.get_data("cracker_choice_short.csv"))
columns = [c for c in c_df.columns if c != "Unnamed: 0"]
c_df[columns]


c_df.groupby("personId")[["choiceId"]].count().T


fig, axs = plt.subplots(1, 2, figsize=(20, 10))
axs = axs.flatten()
map_color = {"nabisco": "red", "keebler": "blue", "sunshine": "purple", "private": "orange"}


for i in c_df["personId"].unique():
    temp = c_df[c_df["personId"] == i].copy(deep=True)
    temp["color"] = temp["choice"].map(map_color)
    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.sunshine"], deg=1))
    axs[0].plot(predict(range(25)), color="red", label="Sunshine", alpha=0.4)
    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.keebler"], deg=1))
    axs[0].plot(predict(range(25)), color="blue", label="Keebler", alpha=0.4)
    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.nabisco"], deg=1))
    axs[0].plot(predict(range(25)), color="grey", label="Nabisco", alpha=0.4)

    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.sunshine"], deg=2))
    axs[1].plot(predict(range(25)), color="red", label="Sunshine", alpha=0.4)
    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.keebler"], deg=2))
    axs[1].plot(predict(range(25)), color="blue", label="Keebler", alpha=0.4)
    predict = np.poly1d(np.polyfit(temp["personChoiceId"], temp["price.nabisco"], deg=2))
    axs[1].plot(predict(range(25)), color="grey", label="Nabisco", alpha=0.4)

axs[0].set_title("Linear Regression Fit \n Customer Price Exposure over Time", fontsize=20)
axs[1].set_title("Polynomial^(2) Regression Fit \n Customer Price Exposure over Time", fontsize=20)
axs[0].set_xlabel("Nth Decision/Time point")
axs[1].set_xlabel("Nth Decision/Time point")
axs[0].set_ylabel("Product Price Offered")
axs[1].set_ylim(0, 2)
axs[0].set_ylim(0, 2)

colors = ["red", "blue", "grey"]
lines = [Line2D([0], [0], color=c, linewidth=3, linestyle="-") for c in colors]
labels = ["Sunshine", "Keebler", "Nabisco"]
axs[0].legend(lines, labels)
axs[1].legend(lines, labels);


N = c_df.shape[0]
observed = pd.Categorical(c_df["choice"]).codes
person_indx, uniques = pd.factorize(c_df["personId"])

coords = {
    "alts_intercepts": ["sunshine", "keebler", "nabisco"],
    "alts_probs": ["sunshine", "keebler", "nabisco", "private"],
    "individuals": uniques,
    "obs": range(N),
}
with pm.Model(coords=coords) as model_4:
    beta_feat = pm.TruncatedNormal("beta_feat", 0, 1, upper=10, lower=0)
    beta_disp = pm.TruncatedNormal("beta_disp", 0, 1, upper=10, lower=0)
    ## Stronger Prior on Price to ensure an increase in price negatively impacts utility
    beta_price = pm.TruncatedNormal("beta_price", 0, 1, upper=0, lower=-10)
    alphas = pm.Normal("alpha", 0, 1, dims="alts_intercepts")
    beta_individual = pm.Normal("beta_individual", 0, 0.05, dims=("individuals", "alts_intercepts"))

    u0 = (
        (alphas[0] + beta_individual[person_indx, 0])
        + beta_disp * c_df["disp.sunshine"]
        + beta_feat * c_df["feat.sunshine"]
        + beta_price * c_df["price.sunshine"]
    )
    u1 = (
        (alphas[1] + beta_individual[person_indx, 1])
        + beta_disp * c_df["disp.keebler"]
        + beta_feat * c_df["feat.keebler"]
        + beta_price * c_df["price.keebler"]
    )
    u2 = (
        (alphas[2] + beta_individual[person_indx, 2])
        + beta_disp * c_df["disp.nabisco"]
        + beta_feat * c_df["feat.nabisco"]
        + beta_price * c_df["price.nabisco"]
    )
    u3 = np.zeros(N)  # Outside Good
    s = pm.math.stack([u0, u1, u2, u3]).T
    # Reconstruct the total data

    ## Apply Softmax Transform
    p_ = pm.Deterministic("p", pm.math.softmax(s, axis=1), dims=("obs", "alts_probs"))

    ## Likelihood
    choice_obs = pm.Categorical("y_cat", p=p_, observed=observed, dims="obs")

    idata_m4 = pm.sample_prior_predictive()
    idata_m4.extend(
        pm.sample(nuts_sampler="numpyro", idata_kwargs={"log_likelihood": True}, random_seed=103)
    )
    idata_m4.extend(pm.sample_posterior_predictive(idata_m4))


pm.model_to_graphviz(model_4)

Sampling: [alpha, beta_disp, beta_feat, beta_individual, beta_price, y_cat]
/Users/nathanielforde/mambaforge/envs/pymc_examples_new/lib/python3.9/site-packages/pymc/sampling/mcmc.py:243: UserWarning: Use of external NUTS sampler is still experimental
  warnings.warn("Use of external NUTS sampler is still experimental", UserWarning)

Compiling...
Compilation time =  0:00:02.628050
Sampling...

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

Sampling time =  0:00:11.040460
Transforming variables...
Transformation time =  0:00:01.419801
Computing Log Likelihood...


az.summary(idata_m4, var_names=["beta_disp", "beta_feat", "beta_price", "alpha", "beta_individual"])


az.plot_dist_comparison(idata_m4, var_names=["beta_price"]);


fig, axs = plt.subplots(1, 2, figsize=(20, 10))
ax = axs[0]
counts = c_df.groupby("choice")["choiceId"].count()
labels = c_df.groupby("choice")["choiceId"].count().index
predicted_shares = idata_m4["posterior"]["p"].mean(dim=["chain", "draw", "obs"])
ci_lb = idata_m4["posterior"]["p"].quantile(0.025, dim=["chain", "draw", "obs"])
ci_ub = idata_m4["posterior"]["p"].quantile(0.975, dim=["chain", "draw", "obs"])
ax.scatter(ci_lb, labels, color="k", s=2)
ax.scatter(ci_ub, labels, color="k", s=2)
ax.scatter(
    counts / counts.sum(),
    labels,
    label="Observed Shares",
    color="red",
    s=100,
)
ax.scatter(
    predicted_shares,
    labels,
    label="Predicted Mean",
    color="green",
    s=100,
)
ax.hlines(
    labels,
    ci_lb,
    ci_ub,
    label="Predicted 95% Interval",
    color="black",
)
ax.legend()
ax.set_title("Observed V Predicted Shares")
az.plot_ppc(idata_m4, ax=axs[1])
axs[1].set_title("Posterior Predictive Checks")
ax.set_xlabel("Shares")
ax.set_ylabel("Crackers");


idata_m4

<xarray.Dataset>
Dimensions:          (chain: 4, draw: 1000, alts_intercepts: 3,
                      individuals: 136, obs: 3156, alts_probs: 4)
Coordinates:
  * chain            (chain) int64 0 1 2 3
  * draw             (draw) int64 0 1 2 3 4 5 6 ... 993 994 995 996 997 998 999
  * alts_intercepts  (alts_intercepts) <U8 'sunshine' 'keebler' 'nabisco'
  * individuals      (individuals) int64 1 2 3 4 5 6 ... 131 132 133 134 135 136
  * obs              (obs) int64 0 1 2 3 4 5 6 ... 3150 3151 3152 3153 3154 3155
  * alts_probs       (alts_probs) <U8 'sunshine' 'keebler' 'nabisco' 'private'
Data variables:
    alpha            (chain, draw, alts_intercepts) float64 -0.03604 ... 1.472
    beta_individual  (chain, draw, individuals, alts_intercepts) float64 -0.0...
    beta_feat        (chain, draw) float64 0.008284 0.00381 ... 0.06727 0.0183
    beta_disp        (chain, draw) float64 0.02731 0.02334 ... 0.003561 0.001434
    beta_price       (chain, draw) float64 -0.01967 -0.08042 ... -0.01557
    p                (chain, draw, obs, alts_probs) float64 0.05909 ... 0.07105
Attributes:
    created_at:     2023-07-13T17:31:22.499390
    arviz_version:  0.15.1

array([0, 1, 2, 3])

array([  0,   1,   2, ..., 997, 998, 999])

array(['sunshine', 'keebler', 'nabisco'], dtype='<U8')

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,
        29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,
        43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,
        57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,
        71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
        85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,
        99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
       113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
       127, 128, 129, 130, 131, 132, 133, 134, 135, 136])

array([   0,    1,    2, ..., 3153, 3154, 3155])

array(['sunshine', 'keebler', 'nabisco', 'private'], dtype='<U8')

array([[[-3.60354089e-02,  2.14480430e+00,  1.68506815e+00],
        [ 1.55227190e-01,  2.13629995e+00,  1.52518135e+00],
        [ 1.35868068e-02,  2.10626238e+00,  1.51838973e+00],
        ...,
        [ 7.54591026e-02,  2.10467698e+00,  1.54587101e+00],
        [ 4.04155687e-02,  2.04643441e+00,  1.50894838e+00],
        [ 9.31199468e-02,  2.10627525e+00,  1.56864008e+00]],

       [[-4.71215832e-02,  1.96048359e+00,  1.44187627e+00],
        [-4.12740981e-02,  2.05116312e+00,  1.48762707e+00],
        [-6.39949887e-02,  2.00165455e+00,  1.47207154e+00],
        ...,
        [ 7.53096155e-02,  2.05073796e+00,  1.53612143e+00],
        [-2.04418548e-01,  1.91268460e+00,  1.47868264e+00],
        [ 1.31145017e-03,  2.15266472e+00,  1.52350357e+00]],

       [[-1.67417178e-01,  1.93567031e+00,  1.36223077e+00],
        [-2.05434891e-03,  2.04217404e+00,  1.49425649e+00],
        [-9.62584618e-02,  1.97378688e+00,  1.46211638e+00],
        ...,
        [-1.78130585e-01,  1.89683295e+00,  1.42686467e+00],
        [-1.13593699e-01,  1.94827950e+00,  1.34688070e+00],
        [-1.00997139e-01,  2.03502095e+00,  1.42026456e+00]],

       [[-5.14730394e-02,  1.97567274e+00,  1.40884364e+00],
        [ 1.36100414e-02,  2.05271250e+00,  1.53461898e+00],
        [-1.28874270e-01,  1.94729367e+00,  1.47803578e+00],
        ...,
        [-1.89498299e-01,  1.92867822e+00,  1.42140180e+00],
        [-1.89138888e-01,  2.01432122e+00,  1.40835152e+00],
        [-1.60098982e-02,  2.05420574e+00,  1.47237351e+00]]])

array([[[[-7.10560114e-02,  1.22929332e-02, -1.46737437e-01],
         [ 8.52822212e-02,  3.32629274e-03, -8.20605473e-02],
         [ 4.78202552e-02, -2.18693778e-02, -3.68227935e-02],
         ...,
         [ 4.29781265e-02,  8.73897036e-02, -1.53910413e-02],
         [ 4.29146940e-03, -8.76279211e-03,  3.95867252e-02],
         [ 2.55974630e-02, -4.12636767e-02, -2.37613477e-02]],

        [[ 4.41778679e-02, -1.68296356e-02, -8.99795648e-02],
         [-1.29898100e-02, -7.51922913e-03, -3.36075829e-02],
         [ 5.36816645e-02, -5.50745672e-02,  8.18963024e-02],
         ...,
         [ 9.64938849e-03,  2.70293983e-02,  1.00558230e-02],
         [-8.83822929e-02,  8.21913566e-02,  3.26926018e-02],
         [-1.90090484e-02,  3.87284664e-02,  5.05434664e-02]],

        [[-5.77427761e-02, -1.70018539e-02,  4.47958404e-02],
         [ 5.26355345e-02, -2.71295228e-02, -2.47569760e-02],
         [-2.19562549e-02,  3.73185473e-02, -3.60250340e-02],
         ...,
...
         ...,
         [-9.61670811e-02,  8.47178812e-02,  6.74259652e-02],
         [ 7.27041214e-03, -7.87551611e-03, -4.26231534e-02],
         [-1.76720367e-02,  2.48984235e-02, -2.90015112e-02]],

        [[ 5.88698708e-02, -4.05063495e-02, -5.33633974e-03],
         [ 4.83901677e-02, -1.02670413e-01,  9.65700367e-03],
         [-8.92126797e-02,  5.18146129e-02,  4.22038044e-03],
         ...,
         [-1.34862027e-01,  2.07118333e-02, -7.52939259e-02],
         [ 4.88489501e-02, -4.17625557e-02, -4.55311955e-03],
         [-1.02991936e-01,  6.47643005e-02, -2.80800993e-02]],

        [[-1.04878612e-01, -7.22608778e-03, -6.50507719e-02],
         [-7.63817056e-02,  1.31133704e-02, -8.51631458e-02],
         [-7.41760347e-02, -7.44501815e-02,  4.95712014e-03],
         ...,
         [ 4.95819664e-02, -4.96280002e-02, -5.64554584e-02],
         [ 2.28594296e-02,  4.18671514e-02, -2.56275232e-02],
         [ 9.03650352e-02,  2.39682361e-02, -2.85149180e-02]]]])

array([[0.00828351, 0.00380971, 0.03502916, ..., 0.00189139, 0.01810111,
        0.01654725],
       [0.0039898 , 0.04145032, 0.01360652, ..., 0.01273682, 0.00126043,
        0.00874647],
       [0.01200379, 0.03659929, 0.00521525, ..., 0.00193413, 0.02779925,
        0.00242009],
       [0.02490679, 0.00953517, 0.03384639, ..., 0.02284003, 0.06727227,
        0.01829692]])


beta_individual = idata_m4["posterior"]["beta_individual"]
predicted = beta_individual.mean(("chain", "draw"))
predicted = predicted.sortby(predicted.sel(alts_intercepts="nabisco"))
ci_lb = beta_individual.quantile(0.025, ("chain", "draw")).sortby(
    predicted.sel(alts_intercepts="nabisco")
)
ci_ub = beta_individual.quantile(0.975, ("chain", "draw")).sortby(
    predicted.sel(alts_intercepts="nabisco")
)


fig = plt.figure(figsize=(10, 9))
gs = fig.add_gridspec(
    2,
    3,
    width_ratios=(4, 4, 4),
    height_ratios=(1, 7),
    left=0.1,
    right=0.9,
    bottom=0.1,
    top=0.9,
    wspace=0.05,
    hspace=0.05,
)
# Create the Axes.
ax = fig.add_subplot(gs[1, 0])
ax.set_yticklabels([])
ax_histx = fig.add_subplot(gs[0, 0], sharex=ax)
ax_histx.set_title("Expected Modifications \n to Nabisco Baseline", fontsize=10)
ax_histx.hist(predicted.sel(alts_intercepts="nabisco"), bins=30, ec="black", color="red")
ax_histx.set_yticklabels([])
ax_histx.tick_params(labelsize=8)
ax.set_ylabel("Individuals", fontsize=10)
ax.tick_params(labelsize=8)
ax.hlines(
    range(len(predicted)),
    ci_lb.sel(alts_intercepts="nabisco"),
    ci_ub.sel(alts_intercepts="nabisco"),
    color="black",
    alpha=0.3,
)
ax.scatter(predicted.sel(alts_intercepts="nabisco"), range(len(predicted)), color="red", ec="white")
ax.fill_betweenx(range(139), -0.03, 0.03, alpha=0.2, color="red")

ax1 = fig.add_subplot(gs[1, 1])
ax1.set_yticklabels([])
ax_histx = fig.add_subplot(gs[0, 1], sharex=ax1)
ax_histx.set_title("Expected Modifications \n to Keebler Baseline", fontsize=10)
ax_histx.set_yticklabels([])
ax_histx.tick_params(labelsize=8)
ax_histx.hist(predicted.sel(alts_intercepts="keebler"), bins=30, ec="black", color="red")
ax1.hlines(
    range(len(predicted)),
    ci_lb.sel(alts_intercepts="keebler"),
    ci_ub.sel(alts_intercepts="keebler"),
    color="black",
    alpha=0.3,
)
ax1.scatter(
    predicted.sel(alts_intercepts="keebler"), range(len(predicted)), color="red", ec="white"
)
ax1.set_xlabel("Individual Modifications to the Product Intercept", fontsize=10)
ax1.fill_betweenx(range(139), -0.03, 0.03, alpha=0.2, color="red", label="Negligible \n Region")
ax1.tick_params(labelsize=8)
ax1.legend(fontsize=10)

ax2 = fig.add_subplot(gs[1, 2])
ax2.set_yticklabels([])
ax_histx = fig.add_subplot(gs[0, 2], sharex=ax2)
ax_histx.set_title("Expected Modifications \n to Sunshine Baseline", fontsize=10)
ax_histx.set_yticklabels([])
ax_histx.hist(predicted.sel(alts_intercepts="sunshine"), bins=30, ec="black", color="red")
ax2.hlines(
    range(len(predicted)),
    ci_lb.sel(alts_intercepts="sunshine"),
    ci_ub.sel(alts_intercepts="sunshine"),
    color="black",
    alpha=0.3,
)
ax2.fill_betweenx(range(139), -0.03, 0.03, alpha=0.2, color="red")
ax2.scatter(
    predicted.sel(alts_intercepts="sunshine"), range(len(predicted)), color="red", ec="white"
)
ax2.tick_params(labelsize=8)
ax_histx.tick_params(labelsize=8)
plt.suptitle("Individual Differences by Product", fontsize=20);


%load_ext watermark
%watermark -n -u -v -iv -w -p pytensor

Last updated: Thu Jul 13 2023

Python implementation: CPython
Python version       : 3.9.16
IPython version      : 8.11.0

pytensor: 2.11.1

numpy     : 1.23.5
matplotlib: 3.7.1
pytensor  : 2.11.1
pandas    : 1.5.3
arviz     : 0.15.1
pymc      : 5.3.0

Watermark: 2.3.1

	idcase	alt_id	choice	depvar	income	agehed	rooms	region	installation_costs	operating_costs
0	1	1	1	gc	7	25	6	ncostl	866.00	199.69
1	1	2	0	gc	7	25	6	ncostl	962.64	151.72
2	1	3	0	gc	7	25	6	ncostl	859.90	553.34
3	1	4	0	gc	7	25	6	ncostl	995.76	505.60
4	1	5	0	gc	7	25	6	ncostl	1135.50	237.88

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
beta_ic	0.001	0.000	-0.000	0.001	0.000	0.000	1215.0	1612.0	1.00
beta_oc	-0.003	0.001	-0.005	-0.001	0.000	0.000	1379.0	1743.0	1.00
alpha[ec]	1.039	0.497	0.076	1.936	0.016	0.012	908.0	1054.0	1.00
alpha[er]	1.077	0.474	0.216	1.988	0.016	0.012	839.0	991.0	1.00
alpha[gc]	2.376	0.309	1.789	2.953	0.011	0.008	814.0	836.0	1.01
alpha[gr]	0.733	0.373	-0.031	1.374	0.013	0.009	854.0	947.0	1.01

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
beta_income[ec]	0.0971	0.1074	-0.1025	0.3046	0.0035	0.0025	936.3265	1900.0530	1.0033
beta_income[er]	0.0655	0.1047	-0.1187	0.2695	0.0036	0.0025	839.1058	1613.9147	1.0017
beta_income[gc]	0.0673	0.0867	-0.1058	0.2202	0.0032	0.0023	722.9224	1321.0255	1.0028
beta_income[gr]	-0.0318	0.0977	-0.2220	0.1441	0.0034	0.0024	807.8161	1624.6096	1.0020
beta_ic	0.0004	0.0007	-0.0009	0.0016	0.0000	0.0000	752.9909	914.0799	1.0019
beta_oc	-0.0035	0.0015	-0.0064	-0.0007	0.0000	0.0000	1436.0405	2066.3187	1.0015
alpha[ec]	1.0354	1.0479	-0.4211	3.0541	0.0470	0.0333	520.2449	1178.1694	1.0063
alpha[er]	1.2391	1.0751	-0.3175	3.2426	0.0507	0.0358	441.6820	991.4928	1.0064
alpha[gc]	2.3718	0.7613	1.1220	3.7710	0.0366	0.0259	414.8905	699.3486	1.0073
alpha[gr]	1.2014	0.8524	-0.0952	2.8006	0.0402	0.0284	442.3913	1198.3044	1.0053
chol_corr[0, 0]	1.0000	0.0000	1.0000	1.0000	0.0000	0.0000	4000.0000	4000.0000	NaN
chol_corr[0, 1]	0.1184	0.3671	-0.5402	0.7923	0.0074	0.0062	2518.0052	2043.9328	1.0015
chol_corr[0, 2]	0.1427	0.3705	-0.5480	0.7769	0.0093	0.0066	1673.7845	1975.7307	1.0020
chol_corr[0, 3]	0.1157	0.3753	-0.5676	0.7683	0.0079	0.0056	2319.4753	2119.7780	1.0012
chol_corr[1, 0]	0.1184	0.3671	-0.5402	0.7923	0.0074	0.0062	2518.0052	2043.9328	1.0015
chol_corr[1, 1]	1.0000	0.0000	1.0000	1.0000	0.0000	0.0000	4239.6296	4000.0000	0.9996
chol_corr[1, 2]	0.1675	0.3483	-0.4430	0.8095	0.0079	0.0056	1978.9399	1538.2851	1.0011
chol_corr[1, 3]	0.1526	0.3561	-0.4722	0.7963	0.0070	0.0050	2595.1991	3126.5524	1.0014
chol_corr[2, 0]	0.1427	0.3705	-0.5480	0.7769	0.0093	0.0066	1673.7845	1975.7307	1.0020
chol_corr[2, 1]	0.1675	0.3483	-0.4430	0.8095	0.0079	0.0056	1978.9399	1538.2851	1.0011
chol_corr[2, 2]	1.0000	0.0000	1.0000	1.0000	0.0000	0.0000	3929.0431	4000.0000	1.0007
chol_corr[2, 3]	0.1757	0.3411	-0.4384	0.7867	0.0071	0.0051	2260.6724	2564.2728	1.0017
chol_corr[3, 0]	0.1157	0.3753	-0.5676	0.7683	0.0079	0.0056	2319.4753	2119.7780	1.0012
chol_corr[3, 1]	0.1526	0.3561	-0.4722	0.7963	0.0070	0.0050	2595.1991	3126.5524	1.0014
chol_corr[3, 2]	0.1757	0.3411	-0.4384	0.7867	0.0071	0.0051	2260.6724	2564.2728	1.0017
chol_corr[3, 3]	1.0000	0.0000	1.0000	1.0000	0.0000	0.0000	3954.4789	3702.0363	1.0001

	rank	elpd_loo	p_loo	elpd_diff	weight	se	dse	warning	scale
m2	0	-1023.600927	4.964862	0.000000	1.000000e+00	27.802379	0.000000	False	log
m3	1	-1025.830780	9.954792	2.229854	2.220446e-16	28.086804	2.070976	False	log
m1	2	-1309.610895	1.196878	286.009968	0.000000e+00	12.933024	22.677606	False	log

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
beta_disp	0.023	0.021	0.000	0.061	0.000	0.000	5386.0	2248.0	1.0
beta_feat	0.019	0.018	0.000	0.052	0.000	0.000	6417.0	2283.0	1.0
beta_price	-0.021	0.020	-0.059	-0.000	0.000	0.000	4281.0	2277.0	1.0
alpha[sunshine]	-0.054	0.096	-0.243	0.119	0.002	0.001	3632.0	3220.0	1.0
alpha[keebler]	2.023	0.073	1.886	2.159	0.001	0.001	3302.0	2894.0	1.0
...	...	...	...	...	...	...	...	...	...
beta_individual[135, keebler]	0.012	0.048	-0.077	0.104	0.000	0.001	10419.0	3135.0	1.0
beta_individual[135, nabisco]	-0.009	0.051	-0.108	0.081	0.000	0.001	10504.0	2555.0	1.0
beta_individual[136, sunshine]	-0.002	0.049	-0.089	0.096	0.001	0.001	7867.0	2729.0	1.0
beta_individual[136, keebler]	-0.018	0.048	-0.112	0.071	0.001	0.001	7684.0	2845.0	1.0
beta_individual[136, nabisco]	0.020	0.050	-0.072	0.114	0.001	0.001	8252.0	2559.0	1.0

Discrete Choice and Random Utility Models¶

Discrete Choice Modelling: The Idea¶

The Data¶

Digression on Data Formats¶

The Basic Model¶

Improved Model: Adding Alternative Specific Intercepts¶

Experimental Model: Adding Correlation Structure¶

Compare Models¶

Choosing Crackers over Repeated Choices: Mixed Logit Model¶

Conclusion¶

Authors¶

References¶

Watermark¶

	personId	disp.sunshine	disp.keebler	disp.nabisco	disp.private	feat.sunshine	feat.keebler	feat.nabisco	feat.private	price.sunshine	price.keebler	price.nabisco	price.private	choice	lastChoice	personChoiceId	choiceId
0	1	0	0	0	0	0	0	0	0	0.99	1.09	0.99	0.71	nabisco	nabisco	1	1
1	1	1	0	0	0	0	0	0	0	0.49	1.09	1.09	0.78	sunshine	nabisco	2	2
2	1	0	0	0	0	0	0	0	0	1.03	1.09	0.89	0.78	nabisco	sunshine	3	3
3	1	0	0	0	0	0	0	0	0	1.09	1.09	1.19	0.64	nabisco	nabisco	4	4
4	1	0	0	0	0	0	0	0	0	0.89	1.09	1.19	0.84	nabisco	nabisco	5	5
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
3151	136	0	0	0	0	0	0	0	0	1.09	1.19	0.99	0.55	private	private	9	3152
3152	136	0	0	0	1	0	0	0	0	0.78	1.35	1.04	0.65	private	private	10	3153
3153	136	0	0	0	0	0	0	0	0	1.09	1.17	1.29	0.59	private	private	11	3154
3154	136	0	0	0	0	0	0	0	0	1.09	1.22	1.29	0.59	private	private	12	3155
3155	136	0	0	0	0	0	0	0	0	1.29	1.04	1.23	0.59	private	private	13	3156

Discrete Choice and Random Utility Models¶

Discrete Choice Modelling: The Idea¶

The Data¶

Digression on Data Formats¶

The Basic Model¶

Improved Model: Adding Alternative Specific Intercepts¶

Experimental Model: Adding Correlation Structure¶

Market Inteventions and Predicting Market Share¶

Compare Models¶

Choosing Crackers over Repeated Choices: Mixed Logit Model¶

Conclusion¶

Authors¶

References¶

Watermark¶