#| echo: false
#| include: false
import warnings
warnings.filterwarnings('ignore')


import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd

plt.rcParams.update({
    'figure.facecolor': 'white',
    'axes.facecolor': 'white',
    'font.size': 12,
    'axes.spines.top': False,
    'axes.spines.right': False,
})


df = pd.DataFrame(
    {
        "name": [
            "Rheia", "Kronos", "Demeter", "Hades", "Hestia", "Poseidon",
            "Hera", "Zeus", "Artemis", "Apollo", "Leto", "Ares",
            "Athena", "Hephaestus", "Aphrodite", "Cyclope", "Persephone",
            "Hermes", "Hebe", "Dionysus",
        ],
        "Risk_Strata": [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        "Treatment": [0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        "Outcome": [0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0],
    }
)

df["Treatment_x_Risk_Strata"] = df.Treatment * df.Risk_Strata
df


simple_average = (
    df.groupby("Treatment")[["Outcome"]]
    .mean()
    .rename({"Outcome": "Share"}, axis=1)
)
simple_average


causal_risk_ratio = simple_average.iloc[1]["Share"] / simple_average.iloc[0]["Share"]
print(f"Naive Causal Risk Ratio: {causal_risk_ratio:.2f}")

Naive Causal Risk Ratio: 1.26


df.groupby("Risk_Strata")[["Treatment"]].count().assign(
    proportion=lambda x: x["Treatment"] / len(df)
)


outcomes_controlled = (
    df.groupby(["Risk_Strata", "Treatment"])[["Outcome"]]
    .mean()
    .reset_index()
    .pivot(index="Treatment", columns=["Risk_Strata"], values="Outcome")
)
outcomes_controlled


n_low = (df["Risk_Strata"] == 0).sum() / len(df)
n_high = (df["Risk_Strata"] == 1).sum() / len(df)

weighted_avg = outcomes_controlled.assign(
    weighted_average=lambda x: x[0] * n_low + x[1] * n_high
)
weighted_avg


corrected_ratio = (
    weighted_avg.iloc[1]["weighted_average"] / weighted_avg.iloc[0]["weighted_average"]
)
print(f"Corrected Causal Risk Ratio: {corrected_ratio:.2f}")

Corrected Causal Risk Ratio: 1.00


reg = bmb.Model("Outcome ~ 1 + Treatment", df)
results = reg.fit(random_seed=42)

reg_strata = bmb.Model(
    "Outcome ~ 1 + Treatment + Risk_Strata + Treatment_x_Risk_Strata", df
)
results_strata = reg_strata.fit(random_seed=42)

Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Intercept, Treatment]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Intercept, Treatment, Risk_Strata, Treatment_x_Risk_Strata]

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 1 seconds.


az.summary(results, var_names=["Treatment"])


az.summary(results_strata, var_names=["Treatment"])


ax = az.plot_forest(
    [results, results_strata],
    model_names=["Naive Model", "Stratified Model"],
    var_names=["Treatment"],
    kind="ridgeplot",
    ridgeplot_alpha=0.4,
    combined=True,
    figsize=(10, 5),
)
ax[0].axvline(0, color="black", linestyle="--")
ax[0].set_title("Treatment Effects: Naive vs. Stratified Models");


fig, axs = plt.subplots(1, 2, figsize=(16, 5))
bmb.interpret.plot_predictions(reg, results, ["Treatment"], ax=axs[0])
bmb.interpret.plot_predictions(reg_strata, results_strata, ["Treatment"], ax=axs[1])
axs[0].set_title("Non-Stratified Regression\nModel Predictions")
axs[1].set_title("Stratified Regression\nModel Predictions");

Default computed for conditional variable: Treatment
Default computed for conditional variable: Treatment
Default computed for unspecified variable: Risk_Strata, Treatment_x_Risk_Strata


cces_all_df = pd.read_csv("../../data/mr_p_cces18_common_vv.csv.gz", low_memory=False)
print(f"Full dataset: {cces_all_df.shape[0]:,} respondents, {cces_all_df.shape[1]} columns")

Full dataset: 60,000 respondents, 526 columns


states = [
    "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA",
    "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
    "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
    "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
    "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY",
]

lkup_states = dict(zip(range(1, 56), states))

ethnicity = [
    "White", "Black", "Hispanic", "Asian",
    "Native American", "Mixed", "Other", "Middle Eastern",
]
lkup_ethnicity = dict(zip(range(1, 9), ethnicity))

edu = ["No HS", "HS", "Some college", "Associates", "4-Year College", "Post-grad"]
lkup_edu = dict(zip(range(1, 7), edu))


def clean_df(df):
    """Clean CCES data into demographic strata suitable for MRP."""
    df = df.copy()
    # Binary outcome: 0 = oppose, 1 = support
    df["abortion"] = np.abs(df["CC18_321d"] - 2)
    df["state"] = df["inputstate"].map(lkup_states)
    # Gender: centered contrast coding (-0.5 = female, +0.5 = male)
    df["male"] = np.abs(df["gender"] - 2) - 0.5
    # Ethnicity: collapse small groups into "Other"
    df["eth"] = df["race"].map(lkup_ethnicity)
    df["eth"] = np.where(
        df["eth"].isin(["Asian", "Other", "Middle Eastern", "Mixed", "Native American"]),
        "Other",
        df["eth"],
    )
    # Age brackets
    df["age"] = 2018 - df["birthyr"]
    df["age"] = pd.cut(
        df["age"].astype(int),
        [0, 29, 39, 49, 59, 69, 120],
        labels=["18-29", "30-39", "40-49", "50-59", "60-69", "70+"],
        ordered=True,
    )
    # Education: collapse Associates into Some college
    df["edu"] = df["educ"].map(lkup_edu)
    df["edu"] = np.where(
        df["edu"].isin(["Some college", "Associates"]), "Some college", df["edu"]
    )
    df = df[["abortion", "state", "eth", "male", "age", "edu", "caseid"]]
    return df.dropna()


statelevel_predictors_df = pd.read_csv("../../data/mr_p_statelevel_predictors.csv")

cces_all_df = clean_df(cces_all_df)
print(f"After cleaning: {cces_all_df.shape[0]:,} respondents")
cces_all_df.head()

After cleaning: 55,035 respondents


cces_df = cces_all_df.merge(
    statelevel_predictors_df, left_on="state", right_on="state", how="left"
)
cces_df["weight"] = (
    5 * cces_df["repvote"]
    + (cces_df["age"] == "18-29") * 0.5
    + (cces_df["age"] == "30-39") * 1
    + (cces_df["age"] == "40-49") * 2
    + (cces_df["age"] == "50-59") * 4
    + (cces_df["age"] == "60-69") * 6
    + (cces_df["age"] == "70+") * 8
    + (cces_df["male"] == 0.5) * 20
    + (cces_df["eth"] == "White") * 1.05
)

cces_df = cces_df.sample(5000, weights="weight", random_state=1000)
print(f"Biased sample: {len(cces_df):,} respondents")

Biased sample: 5,000 respondents


mosaic = """
    ABCD
    EEEE
    """

fig = plt.figure(layout="constrained", figsize=(18, 10))
ax_dict = fig.subplot_mosaic(mosaic)


def plot_var(var, ax):
    a = (
        cces_df.groupby(var, observed=False)[["abortion"]]
        .mean()
        .rename({"abortion": "share"}, axis=1)
        .reset_index()
    )
    b = (
        cces_all_df.groupby(var, observed=False)[["abortion"]]
        .mean()
        .rename({"abortion": "share_census"}, axis=1)
        .reset_index()
    )
    a = a.merge(b).sort_values("share")
    ax_dict[ax].vlines(a[var], a.share, a.share_census, color="gray", linestyle="--")
    ax_dict[ax].scatter(a[var], a.share, color="#C62828", s=50, zorder=3, label="Biased Sample")
    ax_dict[ax].scatter(
        a[var], a.share_census, color="#2E7D32", s=50, zorder=3, label="Full Data"
    )
    ax_dict[ax].set_ylabel("Proportion Supporting")
    ax_dict[ax].tick_params(axis='x', rotation=45)


plot_var("age", "A")
plot_var("edu", "B")
plot_var("male", "C")
plot_var("eth", "D")
plot_var("state", "E")

ax_dict["E"].legend(fontsize=12)
ax_dict["C"].set_xlabel("Female (-0.5) / Male (0.5)")
plt.suptitle(
    "Comparison of Proportions: Biased Survey Sample vs. Full Dataset", fontsize=18
);


def get_se_bernoulli(p, n):
    return np.sqrt(p * (1 - p) / n)


sample_biased = {
    "data": "Biased Sample",
    "mean": np.mean(cces_df["abortion"].astype(float)),
    "se": get_se_bernoulli(np.mean(cces_df["abortion"].astype(float)), len(cces_df)),
    "n": len(cces_df),
}

sample_full = {
    "data": "Full Dataset",
    "mean": np.mean(cces_all_df["abortion"].astype(float)),
    "se": get_se_bernoulli(np.mean(cces_all_df["abortion"].astype(float)), len(cces_all_df)),
    "n": len(cces_all_df),
}

pd.DataFrame([sample_full, sample_biased]).set_index("data")


model_df = (
    cces_df.groupby(["state", "eth", "male", "age", "edu"], observed=False)
    .agg({"caseid": "nunique", "abortion": "sum"})
    .reset_index()
    .sort_values("abortion", ascending=False)
    .rename({"caseid": "n"}, axis=1)
    .merge(statelevel_predictors_df, left_on="state", right_on="state", how="left")
)
model_df["abortion"] = model_df["abortion"].astype(int)
model_df["n"] = model_df["n"].astype(int)

print(f"Aggregated data: {len(model_df):,} demographic cells")
print(f"Total respondents: {model_df['n'].sum():,}")
print(f"Cells with 0 respondents: {(model_df['n'] == 0).sum():,}")
model_df.head(10)

Aggregated data: 11,040 demographic cells
Total respondents: 5,000
Cells with 0 respondents: 8,952


formula_base = "p(abortion, n) ~ C(state) + C(eth) + C(edu) + male + repvote"

base_model = bmb.Model(formula_base, model_df, family="binomial")
result_base = base_model.fit(
    random_seed=100,
    target_accept=0.95,
    inference_method="numpyro",
    num_chains=4,
)

The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


mosaic = """
AABB
CCCC
"""

fig = plt.figure(layout="constrained", figsize=(18, 7))
axs = fig.subplot_mosaic(mosaic)

bmb.interpret.plot_predictions(base_model, result_base, "eth", ax=axs["A"])
bmb.interpret.plot_predictions(base_model, result_base, "edu", ax=axs["B"])
bmb.interpret.plot_predictions(base_model, result_base, "state", ax=axs["C"])
plt.suptitle("Predicted Support by Demographic Group (Fixed Effects Model)", fontsize=18);

Default computed for conditional variable: eth
Default computed for unspecified variable: edu, male, repvote, state
Default computed for conditional variable: edu
Default computed for unspecified variable: eth, male, repvote, state
Default computed for conditional variable: state
Default computed for unspecified variable: edu, eth, male, repvote


fig, ax = bmb.interpret.plot_comparisons(
    model=base_model,
    idata=result_base,
    contrast={"eth": ["Black", "White"]},
    conditional=["age", "edu"],
    comparison_type="diff",
    subplot_kwargs={"main": "age", "group": "edu"},
    fig_kwargs={"figsize": (12, 5), "sharey": True},
    legend=True,
)
ax[0].set_title(
    "Comparison of Black vs. White Support\nWithin Age and Education Strata"
);

Default computed for conditional variable: age, edu
Default computed for unspecified variable: male, repvote, state


bmb.interpret.comparisons(
    model=base_model,
    idata=result_base,
    contrast={"edu": ["Post-grad", "No HS"]},
    conditional={"eth": ["Black", "White"], "state": ["NY", "CA", "ID", "VA"]},
    comparison_type="diff",
)

Default computed for unspecified variable: male, repvote


formula_hier = (
    "p(abortion, n) ~ male + repvote"
    " + (1 | state) + (1 | eth) + (1 | edu)"
    " + (1 | male:eth) + (1 | edu:age) + (1 | edu:eth)"
)

model_hierarchical = bmb.Model(formula_hier, model_df, family="binomial")
result = model_hierarchical.fit(
    draws=2000,
    random_seed=110,
    target_accept=0.95,
    inference_method="numpyro",
)

There were 54 divergences after tuning. Increase `target_accept` or reparameterize.


az.summary(
    result, var_names=["Intercept", "male", "1|edu", "1|eth", "repvote"]
)


model_hierarchical.graph()


model_hierarchical.predict(result, kind="response")
ax = az.plot_trace(
    result, var_names=["Intercept", "male", "1|edu", "1|eth", "repvote"]
)
plt.suptitle("Trace Plot Diagnostics");
plt.tight_layout();


az.plot_ppc(result, num_pp_samples=500);


poststrat_df = pd.read_csv("../../data/mr_p_poststrat_df.csv")
print(f"Post-stratification cells: {len(poststrat_df):,}")
print(f"Total population represented: {poststrat_df['n'].sum():,}")
poststrat_df.head()

Post-stratification cells: 12,000
Total population represented: 228,443,347


new_data = poststrat_df.merge(
    statelevel_predictors_df, left_on="state", right_on="state", how="left"
)
new_data.rename({"educ": "edu"}, axis=1, inplace=True)

# Census-driven merge: keep ALL census cells, not just those in the biased sample.
# The hierarchical model can predict into cells with zero survey respondents
# via partial pooling — this is a key advantage of MRP.
new_data = new_data.merge(
    model_df[["state", "eth", "male", "age", "edu"]].drop_duplicates(),
    how="left",
    on=["state", "eth", "male", "age", "edu"],
    indicator=True,
)
n_unobserved = (new_data["_merge"] == "left_only").sum()
print(f"Census cells with no survey respondents: {n_unobserved}")
new_data = new_data.drop(columns=["_merge"])

new_data = new_data.merge(
    new_data.groupby("state")
    .agg({"n": "sum"})
    .reset_index()
    .rename({"n": "state_total"}, axis=1)
)
new_data["state_percent"] = new_data["n"] / new_data["state_total"]
print(f"Prediction dataset: {len(new_data):,} cells")
new_data.head()

Census cells with no survey respondents: 960
Prediction dataset: 12,000 cells


def poststratify_by_state(
    model: bmb.Model,
    idata: az.InferenceData,
    new_data: pd.DataFrame,
    num_samples: int = 2000,
    model_type: str = "hierarchical",
) -> pd.DataFrame:
    """
    Compute state-level MRP estimates from a fitted model and census frame.

    For each state, generates posterior predictions on census-weighted
    demographic cells, then computes the weighted sum per draw. Credible
    intervals are taken from the distribution of draw-level state estimates
    (not from per-cell quantiles, which would overstate uncertainty).

    For hierarchical models (``model_type="hierarchical"``), unseen factor
    levels are handled via ``sample_new_groups=True``, which draws new group
    intercepts from the estimated population distribution. For fixed-effects
    models (``model_type="fixed_effects"``), unseen levels are filtered out
    and within-state population shares are recomputed — a warning is printed
    so the analyst sees which states are dropped.

    Parameters
    ----------
    model : bmb.Model
        A fitted Bambi binomial model.
    idata : az.InferenceData
        Posterior samples from model.fit().
    new_data : pd.DataFrame
        Census prediction frame with columns including state, state_percent,
        and the covariates used in the model formula.
    num_samples : int
        Number of posterior draws to extract.
    model_type : str
        Either ``"hierarchical"`` or ``"fixed_effects"``. Controls whether
        unseen factor levels are predicted via ``sample_new_groups=True``
        (hierarchical) or filtered out (fixed effects).

    Returns
    -------
    pd.DataFrame
        Columns: state, mrp_adjusted, mrp_lb, mrp_ub.
    """
    pred_data = new_data.copy()
    dropped = set()

    if model_type == "fixed_effects":
        # Fixed-effects model: filter out census cells whose categorical
        # levels were never seen during training.
        training_data = model.data
        for col in ["state", "eth", "edu"]:
            if col in training_data.columns and col in pred_data.columns:
                seen_levels = set(training_data[col].unique())
                unseen_mask = ~pred_data[col].isin(seen_levels)
                if unseen_mask.any():
                    unseen = set(pred_data.loc[unseen_mask, col].unique())
                    dropped.update(unseen)
                    pred_data = pred_data[~unseen_mask].copy()

        if dropped:
            print(
                f"  ⚠ Dropped {len(dropped)} unseen factor level(s) "
                f"not in training data: {sorted(dropped)}"
            )
            # Recompute state_percent within the filtered data
            state_totals = (
                pred_data.groupby("state")["n"]
                .sum()
                .reset_index()
                .rename(columns={"n": "state_total"})
            )
            pred_data = pred_data.drop(
                columns=["state_total", "state_percent"], errors="ignore"
            )
            pred_data = pred_data.merge(state_totals, on="state")
            pred_data["state_percent"] = pred_data["n"] / pred_data["state_total"]

    # For hierarchical models, sample_new_groups=True draws group
    # intercepts from the estimated population distribution, enabling
    # prediction for states/groups absent from the training sample.
    predict_kwargs = dict(data=pred_data, inplace=False, kind="response")
    if model_type == "hierarchical":
        predict_kwargs["sample_new_groups"] = True

    result_pred = model.predict(idata, **predict_kwargs)
    posterior = az.extract(result_pred, num_samples=num_samples)["p"]

    mrp_estimates = []
    for s in pred_data["state"].unique():
        idx = pred_data.index[pred_data["state"] == s].tolist()
        weights = pred_data.iloc[idx]["state_percent"].values

        # Weighted sum per draw, then summarise
        mrp_draws = (posterior[idx].values * weights[:, None]).sum(axis=0)

        mrp_estimates.append({
            "state": s,
            "mrp_adjusted": float(np.mean(mrp_draws)),
            "mrp_lb": float(np.quantile(mrp_draws, 0.025)),
            "mrp_ub": float(np.quantile(mrp_draws, 0.975)),
        })

    return pd.DataFrame(mrp_estimates)


def plot_mrp_evaluation(
    state_predicted: pd.DataFrame,
    title_suffix: str = "",
):
    """
    Two-panel MRP evaluation plot: accuracy (top) and uncertainty (bottom).

    States absent from the biased sample (NaN in raw_biased) are still
    plotted for MRP and census, but omitted from the raw scatter/vlines.

    Parameters
    ----------
    state_predicted : pd.DataFrame
        Must have columns: state, raw_biased, mrp_adjusted, census_share,
        raw_lb, raw_ub, mrp_lb, mrp_ub.
    title_suffix : str
        Appended to plot titles (e.g., "(Fixed Effects)").
    """
    fig, axs = plt.subplots(2, 1, figsize=(17, 10))
    ax, ax1 = axs

    # Subset with raw sample data (may exclude states absent from biased sample)
    has_raw = state_predicted.dropna(subset=["raw_biased"])

    # Top panel: accuracy
    ax.scatter(has_raw["state"], has_raw["raw_biased"],
               color="#C62828", label="Raw Biased Sample", s=40, zorder=3)
    ax.scatter(state_predicted["state"], state_predicted["mrp_adjusted"],
               color="#4527A0", label="MRP Adjusted", s=40, zorder=3)
    ax.scatter(state_predicted["state"], state_predicted["census_share"],
               color="#2E7D32", label="Census Ground Truth", s=40, zorder=3)
    ax.vlines(state_predicted["state"],
              state_predicted["mrp_adjusted"], state_predicted["census_share"],
              color="black", linestyles="--", alpha=0.5)
    ax.legend(fontsize=12)
    ax.set_xlabel("State")
    ax.set_ylabel("Proportion Supporting")
    ax.set_title(f"Post-Stratified Adjustment vs. Census Ground Truth {title_suffix}", fontsize=16)

    # Bottom panel: uncertainty
    ax1.scatter(has_raw["state"], has_raw["raw_biased"],
                color="#C62828", label="Raw Biased Sample", s=40, zorder=3)
    ax1.scatter(state_predicted["state"], state_predicted["mrp_adjusted"],
                color="#4527A0", label="MRP Adjusted", s=40, zorder=3)
    ax1.vlines(has_raw["state"],
               has_raw["raw_ub"], has_raw["raw_lb"],
               color="#C62828", alpha=0.6)
    ax1.vlines(state_predicted["state"],
               state_predicted["mrp_ub"], state_predicted["mrp_lb"],
               color="#4527A0", alpha=0.6)
    ax1.legend(fontsize=12)
    ax1.set_xlabel("State")
    ax1.set_ylabel("Proportion Supporting")
    ax1.set_title(f"Uncertainty: Raw Sample vs. MRP {title_suffix}", fontsize=16)

    return fig, axs


def compute_mrp_mae(state_predicted: pd.DataFrame, label: str = "") -> dict:
    """Compute mean absolute error of raw and MRP estimates vs. census.

    Raw MAE is computed only over states present in the biased sample.
    MRP MAE is computed over *all* states the model can predict.
    """
    n_states_mrp = state_predicted["mrp_adjusted"].notna().sum()
    has_raw = state_predicted.dropna(subset=["raw_biased"])
    n_states_raw = len(has_raw)

    raw_mae = np.abs(
        has_raw["raw_biased"] - has_raw["census_share"]
    ).mean()
    mrp_mae = np.abs(
        state_predicted["mrp_adjusted"] - state_predicted["census_share"]
    ).mean()
    reduction = (1 - mrp_mae / raw_mae) * 100

    print(f"--- {label} ---")
    print(f"  States covered (MRP): {n_states_mrp},  (raw sample): {n_states_raw}")
    print(f"  MAE (raw biased sample): {raw_mae:.4f}")
    print(f"  MAE (MRP adjusted):      {mrp_mae:.4f}")
    print(f"  Reduction in error:      {reduction:.1f}%")
    return {
        "model": label, "n_states_mrp": n_states_mrp,
        "n_states_raw": n_states_raw,
        "raw_mae": raw_mae, "mrp_mae": mrp_mae, "reduction_pct": reduction,
    }


# --- Raw biased sample baseline ---
# This is what you'd report if you naively trusted the biased survey:
# just the sample proportion within each state, no model, no correction.
raw_biased_by_state = (
    cces_df.groupby("state")[["abortion"]]
    .agg(["mean", "count"])
    .droplevel(0, axis=1)
    .rename(columns={"mean": "raw_biased", "count": "n_respondents"})
    .reset_index()
)

# Standard error for binomial proportion (for approximate CIs)
raw_biased_by_state["raw_se"] = np.sqrt(
    raw_biased_by_state["raw_biased"]
    * (1 - raw_biased_by_state["raw_biased"])
    / raw_biased_by_state["n_respondents"]
)
raw_biased_by_state["raw_lb"] = raw_biased_by_state["raw_biased"] - 1.96 * raw_biased_by_state["raw_se"]
raw_biased_by_state["raw_ub"] = raw_biased_by_state["raw_biased"] + 1.96 * raw_biased_by_state["raw_se"]

# --- Census ground truth ---
census_ground_truth = (
    cces_all_df.groupby("state")[["abortion"]]
    .mean()
    .reset_index()
    .rename(columns={"abortion": "census_share"})
)


def build_mrp_comparison(mrp_df, raw_biased_by_state, census_ground_truth):
    """Merge MRP estimates with raw biased baseline and census ground truth.

    Uses left joins from the MRP frame so that states predicted by the
    hierarchical model but absent from the biased sample are retained
    (with NaN for raw_biased).
    """
    return (
        mrp_df
        .merge(raw_biased_by_state[["state", "raw_biased", "raw_lb", "raw_ub"]],
               on="state", how="left")
        .merge(census_ground_truth, on="state", how="left")
        .sort_values("mrp_adjusted")
    )


mrp_base = poststratify_by_state(base_model, result_base, new_data, model_type="fixed_effects")
state_predicted_base = build_mrp_comparison(mrp_base, raw_biased_by_state, census_ground_truth)

  ⚠ Dropped 4 unseen factor level(s) not in training data: ['AZ', 'CT', 'IN', 'TX']


plot_mrp_evaluation(state_predicted_base, title_suffix="(Fixed Effects)");


mae_base = compute_mrp_mae(state_predicted_base, label="Fixed Effects Model")

--- Fixed Effects Model ---
  States covered (MRP): 46,  (raw sample): 46
  MAE (raw biased sample): 0.0619
  MAE (MRP adjusted):      0.0485
  Reduction in error:      21.6%


mrp_hier = poststratify_by_state(model_hierarchical, result, new_data, model_type="hierarchical")
state_predicted_hier = build_mrp_comparison(mrp_hier, raw_biased_by_state, census_ground_truth)


plot_mrp_evaluation(state_predicted_hier, title_suffix="(Hierarchical)");


mae_hier = compute_mrp_mae(state_predicted_hier, label="Hierarchical Model")

--- Hierarchical Model ---
  States covered (MRP): 50,  (raw sample): 46
  MAE (raw biased sample): 0.0619
  MAE (MRP adjusted):      0.0404
  Reduction in error:      34.7%


n_states_base = len(state_predicted_base)
n_states_hier = len(state_predicted_hier)
print(f"States covered — Fixed Effects: {n_states_base}, Hierarchical: {n_states_hier}")

comparison_df = pd.DataFrame([mae_base, mae_hier])
comparison_df

States covered — Fixed Effects: 46, Hierarchical: 50


import numpy as np

fig, ax = plt.subplots(figsize=(17, 7))

# 1. Establish the shared horizontal order and coordinate system
state_order = state_predicted_hier["state"].values
x_coords = np.arange(len(state_order))
state_to_x = {s: i for i, s in enumerate(state_order)}

# Use hierarchical ordering as the master index
hier_indexed = state_predicted_hier.set_index("state")

# 2. Plot Census Ground Truth (Diamonds)
ax.scatter(x_coords, hier_indexed.loc[state_order, "census_share"],
           color="#2E7D32", label="Census Ground Truth", s=60, zorder=4, marker="D")

# 3. Plot Hierarchical MRP (Purple dots)
ax.scatter(x_coords, hier_indexed.loc[state_order, "mrp_adjusted"],
           color="#4527A0", label="MRP (Hierarchical)", s=40, zorder=3, alpha=0.8)

# 4. Fixed-effects: map states to their specific integer positions
base_indexed = state_predicted_base.set_index("state")
common_states = [s for s in state_order if s in base_indexed.index]
common_x = [state_to_x[s] for s in common_states]

ax.scatter(common_x, base_indexed.loc[common_states, "mrp_adjusted"],
           color="#E65100", label="MRP (Fixed Effects)", s=40, zorder=3, alpha=0.8)

# 5. Mark missing states (Orange X's)
missing_states = [s for s in state_order if s not in base_indexed.index]
if missing_states:
    missing_x = [state_to_x[s] for s in missing_states]
    missing_y = hier_indexed.loc[missing_states, "census_share"]
    ax.scatter(missing_x, missing_y,
               color="#E65100", marker="x", s=80, zorder=5, alpha=0.7,
               label=f"Missing from Fixed Effects ({len(missing_states)})")

# 6. Apply formatting: map the integer positions back to State names
ax.set_xticks(x_coords)
ax.set_xticklabels(state_order, rotation=45, ha='right', fontsize=10)

ax.legend(fontsize=11, loc='upper left')
ax.set_title("MRP Comparison: Fixed Effects vs. Hierarchical Model", fontsize=16)
ax.set_xlabel("State (Sorted by Predicted Support)")
ax.set_ylabel("Proportion Supporting")
ax.grid(axis='y', alpha=0.3)
plt.tight_layout()


%load_ext watermark
%watermark -n -u -v -w -p arviz,bambi,numpy,nutpie,pymc,pytensor

Last updated: Wed, 11 Feb 2026

Python implementation: CPython
Python version       : 3.13.11
IPython version      : 9.8.0

arviz   : 0.23.0
bambi   : 0.16.0
numpy   : 2.3.5
nutpie  : 0.16.4
pymc    : 5.27.0
pytensor: 2.36.1

Watermark: 2.6.0

Method	$E[Y \mid \text{cell}_j]$	$P(\text{cell}_j)$
Manual stratification	Raw group means	Hand-computed shares
Regression	Model-estimated conditional means	Implicit: sample proportions
Survey weighting / raking	Raw group means	Census / design weights
MRP	Model-smoothed estimates (partial pooling)	Census population shares

Risk_Strata	0	1
Treatment
0	0.25	0.666667
1	0.25	0.666667

	mean	sd	hdi_3%	hdi_97%	mcse_mean	mcse_sd	ess_bulk	ess_tail	r_hat
Intercept	0.152	0.446	-0.686	0.994	0.009	0.006	2589.0	2934.0	1.0
male	0.425	0.308	-0.164	1.019	0.007	0.007	2528.0	2702.0	1.0
1\|edu[4-Year College]	-0.038	0.149	-0.337	0.231	0.002	0.004	6190.0	5402.0	1.0
1\|edu[HS]	0.001	0.143	-0.276	0.278	0.002	0.004	5784.0	4817.0	1.0
1\|edu[No HS]	0.046	0.163	-0.233	0.381	0.002	0.005	6851.0	5888.0	1.0
1\|edu[Post-grad]	-0.053	0.154	-0.408	0.188	0.002	0.004	6136.0	5726.0	1.0
1\|edu[Some college]	0.045	0.150	-0.206	0.356	0.002	0.004	5754.0	5895.0	1.0
1\|eth[Black]	-0.214	0.346	-0.940	0.360	0.006	0.007	3701.0	3538.0	1.0
1\|eth[Hispanic]	-0.021	0.325	-0.663	0.639	0.006	0.008	4245.0	2812.0	1.0
1\|eth[Other]	0.010	0.338	-0.678	0.633	0.006	0.013	4315.0	3498.0	1.0
1\|eth[White]	0.174	0.328	-0.377	0.883	0.005	0.008	4091.0	3770.0	1.0
repvote	-1.013	0.555	-2.009	0.074	0.012	0.007	2290.0	4453.0	1.0

Term	Role
`p(abortion, n)`	Binomial outcome: `abortion` successes out of `n` trials per cell
`male`	Fixed effect for gender (centered at 0)
`repvote`	Fixed effect for state-level Republican vote share
`(1 \| state)`	Random intercept per state --- captures state-level variation beyond `repvote`
`(1 \| eth)`	Random intercept per ethnicity group
`(1 \| edu)`	Random intercept per education level
`(1 \| male:eth)`	Random intercept for gender-ethnicity interaction
`(1 \| edu:age)`	Random intercept for education-age interaction
`(1 \| edu:eth)`	Random intercept for education-ethnicity interaction

	name	Risk_Strata	Treatment	Outcome	Treatment_x_Risk_Strata
0	Rheia	0	0	0	0
1	Kronos	0	0	1	0
2	Demeter	0	0	0	0
3	Hades	0	0	0	0
4	Hestia	0	1	0	0
5	Poseidon	0	1	0	0
6	Hera	0	1	0	0
7	Zeus	0	1	1	0
8	Artemis	1	0	1	0
9	Apollo	1	0	1	0
10	Leto	1	0	0	0
11	Ares	1	1	1	1
12	Athena	1	1	1	1
13	Hephaestus	1	1	1	1
14	Aphrodite	1	1	1	1
15	Cyclope	1	1	1	1
16	Persephone	1	1	1	1
17	Hermes	1	1	0	1
18	Hebe	1	1	0	1
19	Dionysus	1	1	0	1

	abortion	state	eth	male	age	edu	caseid
0	1.0	MS	Other	-0.5	50-59	Some college	123464282
1	1.0	WA	White	-0.5	40-49	HS	170169205
2	1.0	RI	White	-0.5	60-69	Some college	175996005
3	0.0	CO	Other	-0.5	70+	Post-grad	176818556
4	1.0	MA	White	-0.5	40-49	HS	202120533

	mean	se	n
data
Full Dataset	0.434051	0.002113	55035
Biased Sample	0.467400	0.007056	5000

	state	eth	male	age	edu	n	abortion	repvote	region
0	ID	White	0.5	70+	4-Year College	24	17	0.683102	West
1	TN	White	0.5	60-69	HS	21	13	0.636243	South
2	CO	White	0.5	70+	Post-grad	21	12	0.473167	West
3	CO	White	0.5	60-69	Some college	22	11	0.473167	West
4	WV	White	0.5	60-69	Some college	15	11	0.721611	South
5	WV	White	0.5	50-59	HS	13	10	0.721611	South
6	CO	White	0.5	70+	Some college	18	10	0.473167	West
7	ID	White	0.5	70+	Some college	19	10	0.683102	West
8	WV	White	0.5	70+	Some college	15	10	0.721611	South
9	ID	White	0.5	50-59	Some college	11	9	0.683102	West

	term	estimate_type	value	eth	state	repvote	estimate	lower_3.0%	upper_97.0%
0	edu	diff	(Post-grad, No HS)	Black	CA	0.530191	0.076576	0.001871	0.159316
1	edu	diff	(Post-grad, No HS)	Black	ID	0.530191	0.065617	-0.002249	0.138209
2	edu	diff	(Post-grad, No HS)	Black	NY	0.530191	0.064058	0.001890	0.142652
3	edu	diff	(Post-grad, No HS)	Black	VA	0.530191	0.050642	-0.003664	0.112990
4	edu	diff	(Post-grad, No HS)	White	CA	0.530191	0.076451	0.003433	0.158124
5	edu	diff	(Post-grad, No HS)	White	ID	0.530191	0.080469	0.004153	0.166141
6	edu	diff	(Post-grad, No HS)	White	NY	0.530191	0.079241	0.002499	0.164317
7	edu	diff	(Post-grad, No HS)	White	VA	0.530191	0.071352	0.001576	0.154288

	state	eth	male	age	educ	n
0	AL	White	-0.5	18-29	No HS	23948
1	AL	White	-0.5	18-29	HS	59378
2	AL	White	-0.5	18-29	Some college	104855
3	AL	White	-0.5	18-29	4-Year College	37066
4	AL	White	-0.5	18-29	Post-grad	9378

	model	n_states_mrp	n_states_raw	raw_mae	mrp_mae	reduction_pct
0	Fixed Effects Model	46	46	0.061898	0.048537	21.584856
1	Hierarchical Model	50	46	0.061898	0.040396	34.737903

	Share
Treatment
0	0.428571
1	0.538462

Introduction: Weighting as a Corrective¶

Learning Objectives¶

The Central Insight¶

The Unifying Identity¶

Basu's Elephant¶

The Parable¶

Regression as Stratification¶

The Heart Transplant Example¶

Stratification Dissolves the Paradox¶

Regression Does This Automatically¶

The Catch¶

Weighting Was Always There: From OLS to WLS to MRP¶

The Need for Post-Stratification¶

Loading the Survey Data¶

Design Choices in Cleaning¶

Creating a Deliberately Biased Sample¶

Visualising the Bias¶

Quantifying the Aggregate Bias¶

A 3% Gap Matters¶

Fitting the Regression Model¶

Aggregating to Demographic Cells¶

Why Aggregate?¶

Exploratory Model: Fixed Effects¶

The Hierarchical Model¶

Understanding the Formula¶

Model Diagnostics¶

Post-Stratification¶

Loading Census Population Data¶

Preparing the Census Data for Prediction¶

What Changed¶

Reusable MRP Functions¶

Why Functions?¶

Computing Shared Baselines¶

Assembling MRP Results¶

Evaluating the Correction¶

MRP with the Fixed-Effects Model¶

MRP with the Hierarchical Model¶

Comparing Model Specifications¶

Model Quality Matters¶

Exercises¶

Exercise 1: Varying the Bias¶

Exercise 2: Model Specification¶

Exercise 3: Subgroup Estimates¶

Exercise 4: Prior Sensitivity¶

Summary and Key Takeaways¶

What We've Learned¶

When to Use MRP¶

Cautions¶

The Ecological Fallacy: Why Direction Matters¶

The Elephant is Everywhere¶

References¶

Session Info¶