import pymc as pm
import pandas as pd
import numpy as np
from pytensor import tensor as pt
import arviz as az
from matplotlib import pyplot as plt
import seaborn as sns
from pymc import do, observe
Measurment Model
= pd.read_csv('sem_data.csv')
df df.head()
motiv | harm | stabi | ppsych | ses | verbal | read | arith | spell | |
---|---|---|---|---|---|---|---|---|---|
0 | -7.907122 | -5.075312 | -3.138836 | -17.800210 | 4.766450 | -3.633360 | -3.488981 | -9.989121 | -6.567873 |
1 | 1.751478 | -4.155847 | 3.520752 | 7.009367 | -6.048681 | -7.693461 | -4.520552 | 8.196238 | 8.778973 |
2 | 14.472570 | -4.540677 | 4.070600 | 23.734260 | -16.970670 | -3.909941 | -4.818170 | 7.529984 | -5.688716 |
3 | -1.165421 | -5.668406 | 2.600437 | 1.493158 | 1.396363 | 21.409450 | -3.138441 | 5.730547 | -2.915676 |
4 | -4.222899 | -10.072150 | -6.030737 | -5.985864 | -18.376400 | -1.438816 | -2.009742 | -0.623953 | -1.024624 |
= {
coords "obs": list(range(len(df))),
"indicators": ["motiv", "harm", "stabi", "verbal", "ses", "ppsych", "read", "arith", "spell"],
"indicators_1": ["motiv", "harm", "stabi"],
"indicators_2": ["verbal", "ses", "ppsych"],
"indicators_3": ["read", "arith", "spell"],
"latent": ["adjust", "risk", "achieve"],
}
def make_lambda(indicators, name='lambdas1', priors=[1, 10]):
""" Takes an argument indicators which is a string in the coords dict"""
= name + '_'
temp_name = pm.Normal(temp_name, priors[0], priors[1], dims=(indicators))
lambdas_ # Force a fixed scale on the factor loadings for factor 1
= pm.Deterministic(
lambdas_1 0], 1), dims=(indicators)
name, pt.set_subtensor(lambdas_[
)return lambdas_1
= list(range(len(df)))
obs_idx with pm.Model(coords=coords) as model:
# Set up Factor Loadings
= make_lambda('indicators_1', 'lambdas1')
lambdas_1 = make_lambda('indicators_2', 'lambdas2', priors=[1, 2])
lambdas_2 = make_lambda('indicators_3', 'lambdas3')
lambdas_3 # Specify covariance structure between latent factors
= 0
kappa = pm.Exponential.dist(1.0, shape=3)
sd_dist = pm.LKJCholeskyCov("chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True)
chol, _, _ = pm.MvNormal("ksi", kappa, chol=chol, dims=("obs", "latent"))
ksi
# Construct Pseudo Observation matrix based on Factor Loadings
= ksi[obs_idx, 0] * lambdas_1[0]
m1 = ksi[obs_idx, 0] * lambdas_1[1]
m2 = ksi[obs_idx, 0] * lambdas_1[2]
m3 = ksi[obs_idx, 1] * lambdas_2[0]
m4 = ksi[obs_idx, 1] * lambdas_2[1]
m5 = ksi[obs_idx, 1] * lambdas_2[2]
m6 = ksi[obs_idx, 2] * lambdas_3[0]
m7 = ksi[obs_idx, 2] * lambdas_3[1]
m8 = ksi[obs_idx, 2] * lambdas_3[2]
m9
= pm.Deterministic("mu", pm.math.stack([m1, m2, m3, m4, m5, m6, m7, m8, m9]).T)
mu
## Error Terms
= pm.InverseGamma("Psi", 2, 13, dims="indicators")
Psi
# Likelihood
= pm.Normal(
_ "likelihood",
mu,
Psi,=df[coords['indicators']].values,
observed
)
= pm.sample(
idata =1000,
draws=4,
chains="numpyro", target_accept=0.95, idata_kwargs={"log_likelihood": True},
nuts_sampler=2000,
tune=150
random_seed
)
idata.extend(pm.sample_posterior_predictive(idata))
pm.model_to_graphviz(model)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) Cell In[76], line 38 30 # Likelihood 31 _ = pm.Normal( 32 "likelihood", 33 mu, 34 Psi, 35 observed=df[coords['indicators']].values, 36 ) ---> 38 idata = pm.sample( 39 draws=1000, 40 chains=4, 41 nuts_sampler="numpyro", target_accept=0.95, idata_kwargs={"log_likelihood": True}, 42 tune=2000, 43 random_seed=150 44 ) 45 idata.extend(pm.sample_posterior_predictive(idata)) 47 pm.model_to_graphviz(model) File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/pymc/sampling/mcmc.py:727, in sample(draws, tune, chains, cores, random_seed, progressbar, progressbar_theme, step, var_names, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, blas_cores, model, **kwargs) 722 raise ValueError( 723 "Model can not be sampled with NUTS alone. Your model is probably not continuous." 724 ) 726 with joined_blas_limiter(): --> 727 return _sample_external_nuts( 728 sampler=nuts_sampler, 729 draws=draws, 730 tune=tune, 731 chains=chains, 732 target_accept=kwargs.pop("nuts", {}).get("target_accept", 0.8), 733 random_seed=random_seed, 734 initvals=initvals, 735 model=model, 736 var_names=var_names, 737 progressbar=progressbar, 738 idata_kwargs=idata_kwargs, 739 compute_convergence_checks=compute_convergence_checks, 740 nuts_sampler_kwargs=nuts_sampler_kwargs, 741 **kwargs, 742 ) 744 if isinstance(step, list): 745 step = CompoundStep(step) File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/pymc/sampling/mcmc.py:356, in _sample_external_nuts(sampler, draws, tune, chains, target_accept, random_seed, initvals, model, var_names, progressbar, idata_kwargs, compute_convergence_checks, nuts_sampler_kwargs, **kwargs) 353 elif sampler in ("numpyro", "blackjax"): 354 import pymc.sampling.jax as pymc_jax --> 356 idata = pymc_jax.sample_jax_nuts( 357 draws=draws, 358 tune=tune, 359 chains=chains, 360 target_accept=target_accept, 361 random_seed=random_seed, 362 initvals=initvals, 363 model=model, 364 var_names=var_names, 365 progressbar=progressbar, 366 nuts_sampler=sampler, 367 idata_kwargs=idata_kwargs, 368 compute_convergence_checks=compute_convergence_checks, 369 **nuts_sampler_kwargs, 370 ) 371 return idata 373 else: File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/pymc/sampling/jax.py:615, in sample_jax_nuts(draws, tune, chains, target_accept, random_seed, initvals, jitter, model, var_names, nuts_kwargs, progressbar, keep_untransformed, chain_method, postprocessing_backend, postprocessing_vectorize, postprocessing_chunks, idata_kwargs, compute_convergence_checks, nuts_sampler) 612 raise ValueError(f"{nuts_sampler=} not recognized") 614 tic1 = datetime.now() --> 615 raw_mcmc_samples, sample_stats, library = sampler_fn( 616 model=model, 617 target_accept=target_accept, 618 tune=tune, 619 draws=draws, 620 chains=chains, 621 chain_method=chain_method, 622 progressbar=progressbar, 623 random_seed=random_seed, 624 initial_points=initial_points, 625 nuts_kwargs=nuts_kwargs, 626 ) 627 tic2 = datetime.now() 629 if idata_kwargs is None: File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/pymc/sampling/jax.py:469, in _sample_numpyro_nuts(model, target_accept, tune, draws, chains, chain_method, progressbar, random_seed, initial_points, nuts_kwargs) 455 pmap_numpyro.run( 456 map_seed, 457 init_params=initial_points, (...) 465 ), 466 ) 468 raw_mcmc_samples = pmap_numpyro.get_samples(group_by_chain=True) --> 469 sample_stats = _numpyro_stats_to_dict(pmap_numpyro) 470 return raw_mcmc_samples, sample_stats, numpyro File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/pymc/sampling/jax.py:409, in _numpyro_stats_to_dict(posterior) 407 data[name] = value 408 if stat == "num_steps": --> 409 data["tree_depth"] = np.log2(value).astype(int) + 1 410 return data File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/jax/_src/array.py:425, in ArrayImpl.__array__(self, dtype, context, copy) 422 def __array__(self, dtype=None, context=None, copy=None): 423 # copy argument is supported by np.asarray starting in numpy 2.0 424 kwds = {} if copy is None else {'copy': copy} --> 425 return np.asarray(self._value, dtype=dtype, **kwds) File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/jax/_src/profiler.py:333, in annotate_function.<locals>.wrapper(*args, **kwargs) 330 @wraps(func) 331 def wrapper(*args, **kwargs): 332 with TraceAnnotation(name, **decorator_kwargs): --> 333 return func(*args, **kwargs) 334 return wrapper File ~/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/jax/_src/array.py:645, in ArrayImpl._value(self) 643 npy_value = np.empty(self.shape, self.dtype) 644 for i, ind in _cached_index_calc(self.sharding, self.shape): --> 645 npy_value[ind] = self._arrays[i]._single_device_array_to_np_array() 646 self._npy_value = npy_value 647 self._npy_value.flags.writeable = False KeyboardInterrupt:
=["lambdas1", "lambdas2", "lambdas3"]) az.summary(idata, var_names
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
lambdas1[motiv] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000.0 | 4000.0 | NaN |
lambdas1[harm] | 0.895 | 0.043 | 0.814 | 0.973 | 0.002 | 0.001 | 521.0 | 1401.0 | 1.01 |
lambdas1[stabi] | 0.703 | 0.047 | 0.617 | 0.791 | 0.002 | 0.001 | 801.0 | 1571.0 | 1.01 |
lambdas2[verbal] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000.0 | 4000.0 | NaN |
lambdas2[ses] | 0.842 | 0.087 | 0.679 | 1.000 | 0.004 | 0.003 | 453.0 | 1294.0 | 1.01 |
lambdas2[ppsych] | -0.801 | 0.084 | -0.969 | -0.653 | 0.004 | 0.003 | 521.0 | 1276.0 | 1.00 |
lambdas3[read] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 4000.0 | 4000.0 | NaN |
lambdas3[arith] | 0.841 | 0.035 | 0.773 | 0.906 | 0.001 | 0.001 | 1955.0 | 2586.0 | 1.00 |
lambdas3[spell] | 0.981 | 0.029 | 0.927 | 1.037 | 0.001 | 0.001 | 1123.0 | 1700.0 | 1.00 |
=['lambdas1', 'lambdas2', 'lambdas3'])
az.plot_trace(idata, var_names; plt.tight_layout()
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/density_utils.py:488: UserWarning: Your data appears to have a single value or no finite values
warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/density_utils.py:488: UserWarning: Your data appears to have a single value or no finite values
warnings.warn("Your data appears to have a single value or no finite values")
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/density_utils.py:488: UserWarning: Your data appears to have a single value or no finite values
warnings.warn("Your data appears to have a single value or no finite values")
def make_factor_loadings_df(idata):
= pd.DataFrame(
factor_loadings
az.summary(=["lambdas1", "lambdas2", "lambdas3"]
idata, var_names"mean"]
)[
).reset_index()"factor"] = factor_loadings["index"].str.split("[", expand=True)[0]
factor_loadings[= ["factor_loading", "factor_loading_weight", "factor"]
factor_loadings.columns "factor_loading_weight_sq"] = factor_loadings["factor_loading_weight"] ** 2
factor_loadings["sum_sq_loadings"] = factor_loadings.groupby("factor")[
factor_loadings["factor_loading_weight_sq"
sum)
].transform("error_variances"] = az.summary(idata, var_names=["Psi"])["mean"].values
factor_loadings["total_indicator_variance"] = (
factor_loadings["factor_loading_weight_sq"] + factor_loadings["error_variances"]
factor_loadings[
)"total_variance"] = factor_loadings["total_indicator_variance"].sum()
factor_loadings["indicator_explained_variance"] = (
factor_loadings["factor_loading_weight_sq"] / factor_loadings["total_variance"]
factor_loadings[
)"factor_explained_variance"] = (
factor_loadings["sum_sq_loadings"] / factor_loadings["total_variance"]
factor_loadings[
)= [c for c in factor_loadings.columns if not c in ["factor_loading", "factor"]]
num_cols return factor_loadings
"display.max_colwidth", 15)
pd.set_option(= make_factor_loadings_df(idata)
factor_loadings = [c for c in factor_loadings.columns if not c in ["factor_loading", "factor"]]
num_cols format("{:.3f}", subset=num_cols).background_gradient(
factor_loadings.style.=0, subset=["indicator_explained_variance", "factor_explained_variance"]
axis )
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/var/folders/__/ng_3_9pn1f11ftyml_qr69vh0000gn/T/ipykernel_52977/2929805399.py:12: FutureWarning: The provided callable <built-in function sum> is currently using SeriesGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
].transform(sum)
factor_loading | factor_loading_weight | factor | factor_loading_weight_sq | sum_sq_loadings | error_variances | total_indicator_variance | total_variance | indicator_explained_variance | factor_explained_variance | |
---|---|---|---|---|---|---|---|---|---|---|
0 | lambdas1[motiv] | 1.000 | lambdas1 | 1.000 | 2.295 | 3.676 | 4.676 | 60.931 | 0.016 | 0.038 |
1 | lambdas1[harm] | 0.895 | lambdas1 | 0.801 | 2.295 | 5.623 | 6.424 | 60.931 | 0.013 | 0.038 |
2 | lambdas1[stabi] | 0.703 | lambdas1 | 0.494 | 2.295 | 7.605 | 8.099 | 60.931 | 0.008 | 0.038 |
3 | lambdas2[verbal] | 1.000 | lambdas2 | 1.000 | 2.351 | 6.915 | 7.915 | 60.931 | 0.016 | 0.039 |
4 | lambdas2[ses] | 0.842 | lambdas2 | 0.709 | 2.351 | 8.027 | 8.736 | 60.931 | 0.012 | 0.039 |
5 | lambdas2[ppsych] | -0.801 | lambdas2 | 0.642 | 2.351 | 8.242 | 8.884 | 60.931 | 0.011 | 0.039 |
6 | lambdas3[read] | 1.000 | lambdas3 | 1.000 | 2.670 | 3.409 | 4.409 | 60.931 | 0.016 | 0.044 |
7 | lambdas3[arith] | 0.841 | lambdas3 | 0.707 | 2.670 | 6.168 | 6.875 | 60.931 | 0.012 | 0.044 |
8 | lambdas3[spell] | 0.981 | lambdas3 | 0.962 | 2.670 | 3.951 | 4.913 | 60.931 | 0.016 | 0.044 |
= plt.subplots(1, 2, figsize=(20, 9))
fig, axs = axs.flatten()
axs =axs[0])
az.plot_energy(idata, ax=["lambdas1", "lambdas2", "lambdas3"], combined=True, ax=axs[1]);
az.plot_forest(idata, var_names; plt.tight_layout()
= coords['indicators']
drivers def get_posterior_resids(idata, samples=100, metric="cov"):
= []
resids for i in range(100):
if metric == "cov":
= pd.DataFrame(
model_cov "posterior_predictive"])["likelihood"][:, :, i]
az.extract(idata[
).cov()= df[drivers].cov()
obs_cov else:
= pd.DataFrame(
model_cov "posterior_predictive"])["likelihood"][:, :, i]
az.extract(idata[
).corr()= df[drivers].corr()
obs_cov = obs_cov.index
model_cov.index = obs_cov.columns
model_cov.columns = model_cov - obs_cov
residuals
resids.append(residuals.values.flatten())
= pd.DataFrame(pd.DataFrame(resids).mean().values.reshape(9, 9))
residuals_posterior = obs_cov.index
residuals_posterior.index = obs_cov.index
residuals_posterior.columns return residuals_posterior
= get_posterior_resids(idata, 1000)
residuals_posterior_cov = get_posterior_resids(idata, 1000, metric="corr") residuals_posterior_corr
= plt.subplots(figsize=(20, 10))
fig, ax = np.triu(np.ones_like(residuals_posterior_corr, dtype=bool))
mask = sns.heatmap(residuals_posterior_corr, annot=True, cmap="bwr", mask=mask)
ax "Residuals between Model Implied and Sample Correlations \n CFA", fontsize=25); ax.set_title(
def make_ppc(
idata,=100,
samples=drivers,
drivers=(3, 3),
dims
):= plt.subplots(dims[0], dims[1], figsize=(20, 10))
fig, axs = axs.flatten()
axs for i in range(len(drivers)):
for j in range(samples):
= az.extract(idata["posterior_predictive"].sel({"likelihood_dim_3": i}))[
temp "likelihood"
].values[:, j]= pd.DataFrame(temp, columns=["likelihood"])
temp if j == 0:
=0.3, ec="black", bins=20, label="Observed Scores")
axs[i].hist(df[drivers[i]], alpha
axs[i].hist("likelihood"], color="purple", alpha=0.1, bins=20, label="Predicted Scores"
temp[
)else:
=0.3, ec="black", bins=20)
axs[i].hist(df[drivers[i]], alpha"likelihood"], color="purple", alpha=0.1, bins=20)
axs[i].hist(temp[f"Posterior Predictive Checks {drivers[i]}")
axs[i].set_title(
axs[i].legend()
plt.tight_layout()
plt.show()
make_ppc(idata)
SEM Model
Making the Structural Matrix
import numpy as np
import pandas as pd
= np.random.multivariate_normal([0,0,0], [[1, 0, 0], [0,1, 0], [0, 0, 1]], size=(500))
data = np.ones((500, 3))
data = np.zeros((3, 3))
betas 1, 0] = 0.5
betas[1, 2] = 0.5
betas[0, 2] = 0.7
betas[
= pd.DataFrame(betas, index=['adjust_coef', 'risk_coef', 'achieve_coef'],
B_matrix =['adjust_target', 'risk_target', 'achieve_target'])
columns= pd.DataFrame(data, columns=['adjust', 'risk', 'achieve'])
data B_matrix
adjust_target | risk_target | achieve_target | |
---|---|---|---|
adjust_coef | 0.0 | 0.0 | 0.7 |
risk_coef | 0.5 | 0.0 | 0.5 |
achieve_coef | 0.0 | 0.0 | 0.0 |
data.head()
adjust | risk | achieve | |
---|---|---|---|
0 | 1.0 | 1.0 | 1.0 |
1 | 1.0 | 1.0 | 1.0 |
2 | 1.0 | 1.0 | 1.0 |
3 | 1.0 | 1.0 | 1.0 |
4 | 1.0 | 1.0 | 1.0 |
np.dot(data, B_matrix)
array([[0.5, 0. , 1.2],
[0.5, 0. , 1.2],
[0.5, 0. , 1.2],
...,
[0.5, 0. , 1.2],
[0.5, 0. , 1.2],
[0.5, 0. , 1.2]])
Fitting the Model
def make_sem():
= {
coords "obs": list(range(len(df))),
"indicators": ["motiv", "harm", "stabi", "verbal", "ses", "ppsych", "read", "arith", "spell"],
"indicators_1": ["motiv", "harm", "stabi"],
"indicators_2": ["verbal", "ses", "ppsych"],
"indicators_3": ["read", "arith", "spell"],
"latent": ["adjust", "risk", "achieve"],
"paths": ["risk->adjust", "risk-->achieve", "adjust-->achieve"]
}
= list(range(len(df)))
obs_idx with pm.Model(coords=coords) as model:
# Set up Factor Loadings
= make_lambda('indicators_1', 'lambdas1', priors=[1, .5]) #adjust
lambdas_1 = make_lambda('indicators_2', 'lambdas2', priors=[1, .5]) # risk
lambdas_2 = make_lambda('indicators_3', 'lambdas3', priors=[1, .5]) # achieve
lambdas_3
# Specify covariance structure between latent factors
= pm.Exponential.dist(1.0, shape=3)
sd_dist = pm.LKJCholeskyCov("chol_cov", n=3, eta=2, sd_dist=sd_dist, compute_corr=True)
chol, _, _ = pm.MvNormal("ksi", 0, chol=chol, dims=("obs", "latent"))
ksi
## Build Regression Components
= pm.Normal('betas', 0, .5, dims='paths')
coefs = pt.zeros((3, 3))
zeros ## adjust ~ risk
= pt.set_subtensor(zeros[1, 0], coefs[0])
zeros1 ## achieve ~ risk + adjust
= pt.set_subtensor(zeros1[1, 2], coefs[1])
zeros2 = pt.set_subtensor(zeros2[0, 2], coefs[2])
coefs_
= pm.Deterministic('endogenous_structural_paths', pm.math.dot(ksi, coefs_))
structural_relations
# Construct Pseudo Observation matrix based on Factor Loadings
= ksi[obs_idx, 0] * lambdas_1[0] + structural_relations[obs_idx, 0] #adjust
m1 = ksi[obs_idx, 0] * lambdas_1[1] + structural_relations[obs_idx, 0] #adjust
m2 = ksi[obs_idx, 0] * lambdas_1[2] + structural_relations[obs_idx, 0] #adjust
m3 = ksi[obs_idx, 1] * lambdas_2[0] + structural_relations[obs_idx, 1] #risk
m4 = ksi[obs_idx, 1] * lambdas_2[1] + structural_relations[obs_idx, 1] #risk
m5 = ksi[obs_idx, 1] * lambdas_2[2] + structural_relations[obs_idx, 1] #risk
m6 = ksi[obs_idx, 2] * lambdas_3[0] + structural_relations[obs_idx, 2] #achieve
m7 = ksi[obs_idx, 2] * lambdas_3[1] + structural_relations[obs_idx, 2] #achieve
m8 = ksi[obs_idx, 2] * lambdas_3[2] + structural_relations[obs_idx, 2] #achieve
m9
= pm.Deterministic("mu", pm.math.stack([m1, m2, m3, m4, m5, m6, m7, m8, m9]).T)
mu
## Error Terms
= pm.InverseGamma("Psi", 2, 13, dims="indicators")
Psi
# Likelihood
= pm.Normal(
_ "likelihood",
mu,
Psi,=df[coords['indicators']].values,
observed
)
= pm.sample(
idata =2500,
draws=4,
chains#nuts_sampler="numpyro",
=0.90, idata_kwargs={"log_likelihood": True},
target_accept=150,
random_seed=1000,
tune
)
idata.extend(pm.sample_posterior_predictive(idata))return idata, model
= make_sem() idata_sem, model_sem
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [lambdas1_, lambdas2_, lambdas3_, chol_cov, ksi, betas, Psi]
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
self.pid = os.fork()
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.
self.pid = os.fork()
Sampling 4 chains for 1_000 tune and 2_500 draw iterations (4_000 + 10_000 draws total) took 170 seconds.
There were 1 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters. A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details
Sampling: [likelihood]
pm.model_to_graphviz(model_sem)
=["lambdas1", "lambdas2", "lambdas3", "betas"]) az.summary(idata_sem, var_names
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/diagnostics.py:596: RuntimeWarning: invalid value encountered in scalar divide
(between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
mean | sd | hdi_3% | hdi_97% | mcse_mean | mcse_sd | ess_bulk | ess_tail | r_hat | |
---|---|---|---|---|---|---|---|---|---|
lambdas1[motiv] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 10000.0 | 10000.0 | NaN |
lambdas1[harm] | 0.850 | 0.045 | 0.766 | 0.933 | 0.001 | 0.001 | 997.0 | 2361.0 | 1.00 |
lambdas1[stabi] | 0.651 | 0.052 | 0.556 | 0.748 | 0.002 | 0.001 | 988.0 | 3122.0 | 1.00 |
lambdas2[verbal] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 10000.0 | 10000.0 | NaN |
lambdas2[ses] | 0.833 | 0.081 | 0.681 | 0.986 | 0.002 | 0.001 | 2282.0 | 4694.0 | 1.00 |
lambdas2[ppsych] | -0.764 | 0.079 | -0.920 | -0.623 | 0.002 | 0.001 | 2399.0 | 4215.0 | 1.00 |
lambdas3[read] | 1.000 | 0.000 | 1.000 | 1.000 | 0.000 | 0.000 | 10000.0 | 10000.0 | NaN |
lambdas3[arith] | 0.610 | 0.057 | 0.498 | 0.712 | 0.003 | 0.002 | 381.0 | 1068.0 | 1.00 |
lambdas3[spell] | 0.825 | 0.037 | 0.755 | 0.894 | 0.001 | 0.001 | 757.0 | 1717.0 | 1.00 |
betas[risk->adjust] | 0.202 | 0.139 | -0.065 | 0.457 | 0.010 | 0.007 | 186.0 | 202.0 | 1.02 |
betas[risk-->achieve] | 0.207 | 0.178 | -0.129 | 0.534 | 0.012 | 0.009 | 208.0 | 487.0 | 1.01 |
betas[adjust-->achieve] | 0.747 | 0.104 | 0.556 | 0.942 | 0.006 | 0.004 | 311.0 | 810.0 | 1.01 |
make_ppc(idata_sem)
= get_posterior_resids(idata_sem, 1000)
residuals_posterior_cov = get_posterior_resids(idata_sem, 1000, metric="corr") residuals_posterior_corr
= plt.subplots(figsize=(20, 10))
fig, ax = np.triu(np.ones_like(residuals_posterior_corr, dtype=bool))
mask = sns.heatmap(residuals_posterior_corr, annot=True, cmap="bwr", mask=mask)
ax "Residuals between Model Implied and Sample Correlations \n SEM model", fontsize=25); ax.set_title(
= plt.subplots(1, 3, figsize=(15, 10))
fig, axs = axs.flatten()
axs = axs[0]
ax = axs[1]
ax1 = axs[2]
ax2 =["ksi"], combined=True, ax=ax, coords={"latent": ["adjust"]})
az.plot_forest(idata_sem, var_names
az.plot_forest(
idata_sem,=["ksi"],
var_names=True,
combined=ax1,
ax="forestgreen",
colors={"latent": ["risk"]},
coords
)
az.plot_forest(
idata_sem,=["ksi"],
var_names=True,
combined=ax2,
ax="slateblue",
colors={"latent": ["achieve"]},
coords
)
ax.set_yticklabels([])"ADJUST")
ax.set_xlabel(
ax1.set_yticklabels([])"RISK")
ax1.set_xlabel(
ax2.set_yticklabels([])"ACHIEVE")
ax2.set_xlabel(-2, color="red")
ax.axvline(-2, color="red")
ax1.axvline(-2, color="red")
ax2.axvline("Individual On Latent Factor ADJUST")
ax.set_title("Individual On Latent Factor RISK")
ax1.set_title("Individual On Latent Factor ACHIEVE")
ax2.set_title(; plt.show()
= az.compare({'SEM': idata_sem, 'CFA': idata}, 'waic')
compare_df compare_df
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/stats.py:1653: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail.
See http://arxiv.org/abs/1507.04544 for details
warnings.warn(
/Users/nathanielforde/mambaforge/envs/bayesian_causal_book/lib/python3.10/site-packages/arviz/stats/stats.py:1653: UserWarning: For one or more samples the posterior variance of the log predictive densities exceeds 0.4. This could be indication of WAIC starting to fail.
See http://arxiv.org/abs/1507.04544 for details
warnings.warn(
rank | elpd_waic | p_waic | elpd_diff | weight | se | dse | warning | scale | |
---|---|---|---|---|---|---|---|---|---|
SEM | 0 | -14525.202949 | 905.303983 | 0.000000 | 1.0 | 49.632750 | 0.000000 | True | log |
CFA | 1 | -14748.968775 | 939.024032 | 223.765826 | 0.0 | 48.571539 | 12.080799 | True | log |
; az.plot_compare(compare_df)
Counterfactual Analysis
model_sem
\[ \begin{array}{rcl} \text{lambdas1\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{lambdas2\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{lambdas3\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{chol\_cov} &\sim & \operatorname{\_lkjcholeskycov}(3,~2,~\operatorname{Exponential}(f()))\\\text{ksi} &\sim & \operatorname{MultivariateNormal}(f(\text{chol\_cov}),~f(\text{chol\_cov}))\\\text{betas} &\sim & \operatorname{Normal}(0,~0.5)\\\text{Psi} &\sim & \operatorname{InverseGamma}(2,~13)\\\text{lambdas1} &\sim & \operatorname{Deterministic}(f(\text{lambdas1\_}))\\\text{lambdas2} &\sim & \operatorname{Deterministic}(f(\text{lambdas2\_}))\\\text{lambdas3} &\sim & \operatorname{Deterministic}(f(\text{lambdas3\_}))\\\text{chol\_cov\_corr} &\sim & \operatorname{Deterministic}(f(\text{chol\_cov}))\\\text{chol\_cov\_stds} &\sim & \operatorname{Deterministic}(f(\text{chol\_cov}))\\\text{endogenous\_structural\_paths} &\sim & \operatorname{Deterministic}(f(\text{ksi},~\text{betas}))\\\text{mu} &\sim & \operatorname{Deterministic}(f(\text{ksi},~\text{betas},~\text{lambdas3\_},~\text{lambdas2\_},~\text{lambdas1\_}))\\\text{likelihood} &\sim & \operatorname{Normal}(\text{mu},~\text{Psi}) \end{array} \]
= do(model_sem, {"betas": [0, 0 , 0]}, prune_vars=True)
model_beta0 = do(model_sem, {"betas": [.6, .3, .7]}, prune_vars=True)
model_beta1 model_beta0
\[ \begin{array}{rcl} \text{lambdas1\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{lambdas2\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{lambdas3\_} &\sim & \operatorname{Normal}(1,~0.5)\\\text{chol\_cov} &\sim & \operatorname{\_lkjcholeskycov}(3,~2,~\operatorname{Exponential}(f()))\\\text{ksi} &\sim & \operatorname{MultivariateNormal}(f(\text{chol\_cov}),~f(\text{chol\_cov}))\\\text{Psi} &\sim & \operatorname{InverseGamma}(2,~13)\\\text{endogenous\_structural\_paths} &\sim & \operatorname{Deterministic}(f(\text{ksi}))\\\text{lambdas3} &\sim & \operatorname{Deterministic}(f(\text{lambdas3\_}))\\\text{lambdas2} &\sim & \operatorname{Deterministic}(f(\text{lambdas2\_}))\\\text{lambdas1} &\sim & \operatorname{Deterministic}(f(\text{lambdas1\_}))\\\text{mu} &\sim & \operatorname{Deterministic}(f(\text{ksi},~\text{lambdas3\_},~\text{lambdas2\_},~\text{lambdas1\_}))\\\text{likelihood} &\sim & \operatorname{Normal}(f(\text{ksi},~\text{lambdas1\_},~\text{lambdas2\_},~\text{lambdas3\_}),~\text{Psi}) \end{array} \]
pm.model_to_graphviz(model_beta0)
# Sample new sales data assuming Google Ads off: P(Y | c, do(z=0))
= pm.sample_posterior_predictive(
idata_z0
idata_sem,=model_beta0,
model=False,
predictions=["likelihood", "betas",],
var_names
)
# Sample new sales data assuming Google Ads off: P(Y | c, do(z=1))
= pm.sample_posterior_predictive(
idata_z1
idata_sem,=model_beta1,
model=False,
predictions=["likelihood", "betas"],
var_names )
Sampling: [likelihood]
Sampling: [likelihood]
'posterior_predictive']['likelihood'] - idata_z1['posterior_predictive']['likelihood']).mean(dim=('chain', 'draw')).values) pd.DataFrame((idata_z0[
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | |
---|---|---|---|---|---|---|---|---|---|
0 | -0.463197 | -0.358005 | -0.286565 | -0.114874 | -0.042991 | 0.110666 | 4.936211 | 5.107738 | 4.972490 |
1 | 2.377638 | 2.422167 | 2.359539 | -0.063802 | -0.151001 | -0.018147 | -0.475168 | -0.303663 | -0.511428 |
2 | 4.663123 | 4.648260 | 4.735318 | 0.092329 | -0.014905 | -0.087927 | -5.440901 | -5.441826 | -5.468720 |
3 | -2.977008 | -2.997935 | -2.989136 | -0.039031 | 0.041482 | -0.025136 | 0.108632 | 0.264840 | 0.129751 |
4 | 1.826385 | 1.823625 | 1.722291 | 0.008998 | -0.087842 | 0.038139 | 3.982011 | 4.043272 | 3.869997 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
495 | -2.844905 | -3.038198 | -2.995460 | -0.007683 | 0.025422 | 0.177166 | -3.240432 | -3.093730 | -3.218599 |
496 | -1.817311 | -1.728041 | -1.782307 | 0.086392 | -0.032205 | -0.147835 | -3.111101 | -3.096323 | -3.237516 |
497 | 0.055163 | 0.142330 | 0.268756 | -0.165110 | -0.045748 | 0.036947 | -1.071530 | -1.102467 | -1.087500 |
498 | 1.989527 | 1.991247 | 1.913922 | -0.025677 | 0.027728 | -0.097815 | -9.445563 | -9.406408 | -9.387214 |
499 | 6.370377 | 6.422907 | 6.339601 | -0.023407 | -0.002096 | 0.105869 | 14.494246 | 14.581555 | 14.492164 |
500 rows × 9 columns
'posterior_predictive']['likelihood'] - idata_z1['posterior_predictive']['likelihood']).mean(dim=('chain', 'draw')).values).median() pd.DataFrame((idata_z0[
0 -0.105562
1 -0.085492
2 -0.135569
3 -0.006139
4 -0.004415
5 -0.002360
6 -0.148865
7 -0.163581
8 -0.169653
dtype: float64
= get_posterior_resids(idata_z0, 1000, metric="corr")
residuals_posterior_corr0
= get_posterior_resids(idata_z1, 1000, metric="corr") residuals_posterior_corr1
- residuals_posterior_corr1 residuals_posterior_corr0
motiv | harm | stabi | verbal | ses | ppsych | read | arith | spell | |
---|---|---|---|---|---|---|---|---|---|
motiv | 0.000000 | -0.080177 | -0.131360 | -0.243116 | -0.192810 | 0.178007 | -0.899440 | -0.816758 | -0.882780 |
harm | -0.080177 | 0.000000 | -0.153191 | -0.252675 | -0.206456 | 0.188974 | -0.799793 | -0.729367 | -0.786353 |
stabi | -0.131360 | -0.153191 | 0.000000 | -0.262698 | -0.217575 | 0.197371 | -0.678641 | -0.611510 | -0.658578 |
verbal | -0.243116 | -0.252675 | -0.262698 | 0.000000 | -0.010569 | -0.003814 | -0.241953 | -0.253102 | -0.250916 |
ses | -0.192810 | -0.206456 | -0.217575 | -0.010569 | 0.000000 | 0.001399 | -0.196381 | -0.206370 | -0.204542 |
ppsych | 0.178007 | 0.188974 | 0.197371 | -0.003814 | 0.001399 | 0.000000 | 0.175259 | 0.185912 | 0.186708 |
read | -0.899440 | -0.799793 | -0.678641 | -0.241953 | -0.196381 | 0.175259 | 0.000000 | -0.178436 | -0.087744 |
arith | -0.816758 | -0.729367 | -0.611510 | -0.253102 | -0.206370 | 0.185912 | -0.178436 | 0.000000 | -0.219602 |
spell | -0.882780 | -0.786353 | -0.658578 | -0.250916 | -0.204542 | 0.186708 | -0.087744 | -0.219602 | 0.000000 |
= plt.subplots(figsize=(20, 10))
fig, ax = np.triu(np.ones_like(residuals_posterior_corr1, dtype=bool))
mask = sns.heatmap(residuals_posterior_corr1, annot=True, cmap="bwr", mask=mask, vmax=1, vmin=-1)
ax "Residuals between Model Implied and Sample Correlations \n Counterfactual Setting ", fontsize=25); ax.set_title(
= plt.subplots(figsize=(20, 10))
fig, ax = np.triu(np.ones_like(residuals_posterior_corr0, dtype=bool))
mask = sns.heatmap(residuals_posterior_corr0, annot=True, cmap="bwr", mask=mask, vmax=1, vmin=-1)
ax "Residuals between Model Implied and Sample Correlations \n 0'd setting", fontsize=25); ax.set_title(