AMCE playground

Frequentist model with example data, automatic

The main R package for estimating AMCEs in a frequentist way is {cregg}

library(tidyverse)
library(broom)
library(cregg)
library(scales)
library(brms)
library(tidybayes)

data("taxes")

It calculates AMCEs and returns them in a nice tidy dataframe and provides an automatic plot function, which is nice:

taxes_model_automatic <- amce(
  taxes,
  chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
    taxrate4 + taxrate5 + taxrate6 + taxrev,
  id = ~ID
)
taxes_model_automatic %>%
  as_tibble()
## # A tibble: 32 × 10
##    outcome statistic feature level estimate std.error        z         p   lower
##    <chr>   <chr>     <fct>   <fct>    <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
##  1 chose_… amce      Tax ra… <10k…  0        NA        NA      NA        NA     
##  2 chose_… amce      Tax ra… <10k… -1.40e-2   0.00837  -1.67    9.43e- 2 -0.0304
##  3 chose_… amce      Tax ra… <10k… -8.98e-2   0.00988  -9.08    1.06e-19 -0.109 
##  4 chose_… amce      Tax ra… <10k… -2.22e-1   0.0125  -17.7     2.76e-70 -0.246 
##  5 chose_… amce      Tax ra… 10-3…  0        NA        NA      NA        NA     
##  6 chose_… amce      Tax ra… 10-3… -1.62e-2   0.0100   -1.61    1.06e- 1 -0.0358
##  7 chose_… amce      Tax ra… 10-3… -8.49e-2   0.0158   -5.37    8.07e- 8 -0.116 
##  8 chose_… amce      Tax ra… 10-3… -1.87e-1   0.0211   -8.86    7.70e-19 -0.228 
##  9 chose_… amce      Tax ra… 35-8…  0        NA        NA      NA        NA     
## 10 chose_… amce      Tax ra… 35-8…  5.36e-4   0.00824   0.0650  9.48e- 1 -0.0156
## # ℹ 22 more rows
## # ℹ 1 more variable: upper <dbl>
plot(taxes_model_automatic) + 
  guides(color = "none")

But (1) we’re doing this Bayesianly, and (2) I want to understand what exactly these AMCEs are, so I need to figure out how this all works under the hood.

Frequentist model with example data, manual

survey::svyglm() coefficients

Behind the scenes, {cregg} uses survey::svyglm() to run OLS with ID-specific adjustments to standard errors:

taxes_design <- survey::svydesign(
  ids = ~ID,
  weights = ~1,
  data = taxes
)

taxes_model_manual <- survey::svyglm(
  chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
    taxrate4 + taxrate5 + taxrate6 + taxrev,
  design = taxes_design
)

tidy(taxes_model_manual)
## # A tibble: 26 × 5
##    term                 estimate std.error statistic   p.value
##    <chr>                   <dbl>     <dbl>     <dbl>     <dbl>
##  1 (Intercept)          0.509      0.0182    27.9    5.19e-145
##  2 taxrate1<10k: 5%    -0.0140     0.00837   -1.67   9.45e-  2
##  3 taxrate1<10k: 15%   -0.0898     0.00988   -9.08   2.50e- 19
##  4 taxrate1<10k: 25%   -0.222      0.0125   -17.7    2.43e- 65
##  5 taxrate210-35k: 15% -0.0162     0.0100    -1.61   1.07e-  1
##  6 taxrate210-35k: 25% -0.0849     0.0158    -5.37   9.01e-  8
##  7 taxrate210-35k: 35% -0.187      0.0211    -8.86   1.68e- 18
##  8 taxrate335-85k: 15%  0.000536   0.00824    0.0650 9.48e-  1
##  9 taxrate335-85k: 25% -0.0533     0.00971   -5.49   4.52e-  8
## 10 taxrate335-85k: 35% -0.108      0.0119    -9.09   2.32e- 19
## # ℹ 16 more rows

Those are the same results that we get from cregg::amce(), both the estimates and the standard errors:

amce_estimates <- taxes_model_automatic %>% 
  as_tibble() %>% 
  drop_na(std.error) %>% 
  select(estimate, std.error)

svy_estimates <- taxes_model_manual %>% 
  tidy() %>% 
  filter(term != "(Intercept)") %>% 
  select(estimate, std.error)

amce_estimates == svy_estimates
##       estimate std.error
##  [1,]     TRUE      TRUE
##  [2,]     TRUE      TRUE
##  [3,]     TRUE      TRUE
##  [4,]     TRUE      TRUE
##  [5,]     TRUE      TRUE
##  [6,]     TRUE      TRUE
##  [7,]     TRUE      TRUE
##  [8,]     TRUE      TRUE
##  [9,]     TRUE      TRUE
## [10,]     TRUE      TRUE
## [11,]     TRUE      TRUE
## [12,]     TRUE      TRUE
## [13,]     TRUE      TRUE
## [14,]     TRUE      TRUE
## [15,]     TRUE      TRUE
## [16,]     TRUE      TRUE
## [17,]     TRUE      TRUE
## [18,]     TRUE      TRUE
## [19,]     TRUE      TRUE
## [20,]     TRUE      TRUE
## [21,]     TRUE      TRUE
## [22,]     TRUE      TRUE
## [23,]     TRUE      TRUE
## [24,]     TRUE      TRUE
## [25,]     TRUE      TRUE

Marginal effects

So the OLS coefficients here are the AMCEs. That’s super straightforward.

But also not quite because technically we’re estimating a linear probability model here. choose_plan is a binary 0/1 variable, but we’re using OLS with it. The marginal effects (slopes) are just the partial derivatives reported in the regression table.

But if we’re going to have interaction terms or if we’re going to use something that’s not OLS (like logistic regression), raw coefficients won’t work. Instead we need to calculate actual marginal effects. At one point, {cregg} supported this by using the {margins} package (which makes sense—Thomas Leeper developed both {cregg} and {margins}), but starting with {cregg} version 0.1.14, the {margins} support disappeared, leaving only support for OLS linear probability models.

We can use the newer {marginaleffects} package to calculate marginal effects/slopes for each of these effects. Here we’ll just hold all the predictors at their means (but there are a ton of different estimands we could calculate). In this case they’re the same as the raw coefficients

library(marginaleffects)

amces_manual <- taxes_model_manual %>% 
  avg_slopes(newdata = "mean")

amces_manual %>% 
  as_tibble() %>% 
  select(mfx_estimate = estimate, mfx_std.error = std.error) %>% 
  cbind(svy_estimates)
##     mfx_estimate mfx_std.error      estimate   std.error
## 1  -0.0139987267   0.008367718 -0.0139987267 0.008367718
## 2  -0.0897702241   0.009883554 -0.0897702241 0.009883554
## 3  -0.2215066470   0.012497932 -0.2215066470 0.012497932
## 4  -0.0161677383   0.010015769 -0.0161677383 0.010015769
## 5  -0.0849079259   0.015824370 -0.0849079259 0.015824370
## 6  -0.1868125806   0.021074682 -0.1868125806 0.021074682
## 7   0.0005356495   0.008242105  0.0005356495 0.008242105
## 8  -0.0533364485   0.009713809 -0.0533364485 0.009713809
## 9  -0.1083416179   0.011917151 -0.1083416179 0.011917151
## 10  0.0194226595   0.007719126  0.0194226595 0.007719126
## 11  0.0108897506   0.008078966  0.0108897506 0.008078966
## 12 -0.0015463277   0.008431674 -0.0015463277 0.008431674
## 13  0.0384042184   0.008581007  0.0384042184 0.008581007
## 14  0.0504838117   0.008867028  0.0504838117 0.008867028
## 15  0.0716090284   0.009162901  0.0716090284 0.009162901
## 16  0.0441842455   0.009601062  0.0441842455 0.009601062
## 17  0.0729290882   0.009630408  0.0729290882 0.009630408
## 18  0.1023481041   0.010525057  0.1023481041 0.010525057
## 19  0.1342706760   0.011342675  0.1342706760 0.011342675
## 20  0.1456934759   0.012358893  0.1456934759 0.012358893
## 21  0.1190155301   0.013344593  0.1190155301 0.013344593
## 22  0.0155562019   0.011532459  0.0155562019 0.011532459
## 23  0.0433356897   0.016694623  0.0433356897 0.016694623
## 24  0.0613870891   0.020195352  0.0613870891 0.020195352
## 25  0.0866358183   0.027742084  0.0866358183 0.027742084

Coefficient plot

What about that neat coefficient plot with the reference categories? tidy() can’t return empty rows for the reference levels, but we can make it work in a couple different ways: (1) automatically with broom.helpers::tidy_add_reference_rows(), or (2) manually with some data wrangling.

Automatically with {broom.helpers}

The easiest way to do this is to use broom.helpers::tidy_add_reference_rows(), which adds the reference levels automatically with estimates of 0, so we can use geom_pointrange() and make basically the same plot as {cregg}:

library(broom.helpers)
library(ggforce)

plot_data_manual <- taxes_model_manual %>% 
  tidy_and_attach() %>% 
  tidy_add_reference_rows() %>% 
  tidy_add_estimate_to_reference_rows() %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = fct_inorder(term))

ggplot(plot_data_manual, aes(x = estimate, y = fct_rev(term), color = variable)) +
  geom_vline(xintercept = 0, color = "grey75") +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  guides(color = "none") +
  facet_col(facets = "variable", scales = "free_y", space = "free") +
  theme_bw() +
  theme(panel.grid = element_blank())

Manually with dplyr wrangling magic

However, this doesn’t work with {marginaleffects} objects or multilevel models or Bayesian models, so ultimately we can’t use this :(

Instead, we need to do some fancy data wrangling:

# Extract all the right-hand variables
rhs <- all.vars(stats::update(formula(taxes_model_manual), 0 ~ .))

# Make a dataframe of all the levels of all the non-numeric things
model_variable_levels <- tibble(
  variable = rhs
) %>% 
  mutate(levels = map(variable, ~{
    x <- taxes[[.x]]
    if (is.numeric(x)) {
      ""
    } else if (is.factor(x)) {
      levels(x)
    } else {
      sort(unique(x))
    }
  })) %>% 
  unnest(levels) %>% 
  mutate(term = paste0(variable, levels))

# Extract model results
model_results <- tidy(taxes_model_manual, conf.int = TRUE)

# Combine full dataset of factor levels with model results
thing_to_plot <- model_variable_levels %>%
  left_join(model_results, by = join_by(term)) %>%
  mutate(levels = fct_inorder(levels)) %>%
  # Make these zero
  mutate(
    across(
      c(estimate, conf.low, conf.high),
      ~ ifelse(is.na(.x), 0, .x)
    )
  ) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = fct_inorder(term))

ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(term), color = variable)) +
  geom_vline(xintercept = 0, color = "grey75") +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  guides(color = "none") +
  facet_col(facets = "variable", scales = "free_y", space = "free") +
  theme_bw() +
  theme(panel.grid = element_blank())

It’s a little different with marginal effects because the term column is for the overarching variable (e.g., taxrate1) and the individual levels are built as contrasts in a column named contrast, like "<10k: 5% - <10k: 0%". We need to clean up that contrast column for joining with model_variable_levels.

# Extract marginal effects
model_results_mfx <- taxes_model_manual %>%
  avg_slopes(newdata = "mean") %>%
  separate_wider_delim(
    contrast,
    delim = " - ", 
    names = c("variable_level", "reference_level")
  )

# Combine full dataset of factor levels with model results
thing_to_plot <- model_variable_levels %>%
  left_join(
    model_results_mfx,
    by = join_by(variable == term, levels == variable_level)
  ) %>%
  mutate(levels = fct_inorder(levels)) %>%
  # Make these zero
  mutate(
    across(
      c(estimate, conf.low, conf.high),
      ~ ifelse(is.na(.x), 0, .x)
    )
  ) %>%
  mutate(
    term = fct_inorder(term),
    levels = fct_inorder(levels)
  )

ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(levels), color = variable)) +
  geom_vline(xintercept = 0, color = "grey75") +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  guides(color = "none") +
  facet_col(facets = "variable", scales = "free_y", space = "free") +
  theme_bw() +
  theme(panel.grid = element_blank())

Multilevel model with example data

Instead of using survey::svyglm(), what happens if we use a multilevel model with individual random effects?

Typical respondent - new hypothetical respondent

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:brms':
## 
##     ngrps

model_lmer <- lmer(
  chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
    taxrate4 + taxrate5 + taxrate6 + taxrev + (1 | ID),
  data = taxes
)
## boundary (singular) fit: see help('isSingular')

# results are the same, great

Logistic regression

Does this work if we don’t use an LPM?

taxes_model_logit <- survey::svyglm(
  chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
    taxrate4 + taxrate5 + taxrate6 + taxrev,
  design = taxes_design,
  family = binomial(link = "logit")
)

Check out those sweet sweet uninterpretable log odds:

taxes_model_logit %>% tidy()
## # A tibble: 26 × 5
##    term                estimate std.error statistic  p.value
##    <chr>                  <dbl>     <dbl>     <dbl>    <dbl>
##  1 (Intercept)          0.0308     0.0779    0.395  6.93e- 1
##  2 taxrate1<10k: 5%    -0.0594     0.0352   -1.69   9.19e- 2
##  3 taxrate1<10k: 15%   -0.375      0.0416   -9.02   4.17e-19
##  4 taxrate1<10k: 25%   -0.933      0.0541  -17.2    4.39e-62
##  5 taxrate210-35k: 15% -0.0706     0.0425   -1.66   9.65e- 2
##  6 taxrate210-35k: 25% -0.361      0.0674   -5.35   9.83e- 8
##  7 taxrate210-35k: 35% -0.792      0.0907   -8.73   5.10e-18
##  8 taxrate335-85k: 15%  0.00172    0.0350    0.0493 9.61e- 1
##  9 taxrate335-85k: 25% -0.226      0.0413   -5.48   4.84e- 8
## 10 taxrate335-85k: 35% -0.460      0.0512   -8.98   6.40e-19
## # ℹ 16 more rows

Let’s instead calculate their average marginal effects (marginal-effects-at-the-mean flavored ones):

taxes_model_logit %>% avg_slopes(newdata = "mean")
## 
##      Term                     Contrast  Estimate Std. Error        z Pr(>|z|)
##  taxrate1 <10k: 5% - <10k: 0%          -0.013079    0.00770  -1.6986  0.08939
##  taxrate1 <10k: 15% - <10k: 0%         -0.086551    0.00892  -9.7048  < 0.001
##  taxrate1 <10k: 25% - <10k: 0%         -0.224746    0.01157 -19.4185  < 0.001
##  taxrate2 10-35k: 15% - 10-35k: 5%     -0.016935    0.01004  -1.6866  0.09168
##  taxrate2 10-35k: 25% - 10-35k: 5%     -0.088342    0.01591  -5.5520  < 0.001
##  taxrate2 10-35k: 35% - 10-35k: 5%     -0.195402    0.02163  -9.0334  < 0.001
##  taxrate3 35-85k: 15% - 35-85k: 5%      0.000416    0.00844   0.0493  0.96071
##  taxrate3 35-85k: 25% - 35-85k: 5%     -0.055580    0.00990  -5.6141  < 0.001
##  taxrate3 35-85k: 35% - 35-85k: 5%     -0.113804    0.01231  -9.2462  < 0.001
##  taxrate4 85-175k: 15% - 85-175k: 5%    0.019616    0.00786   2.4945  0.01261
##  taxrate4 85-175k: 25% - 85-175k: 5%    0.010976    0.00826   1.3295  0.18370
##  taxrate4 85-175k: 35% - 85-175k: 5%   -0.001771    0.00865  -0.2048  0.83774
##  taxrate5 175-375k: 15% - 175-375k: 5%  0.039953    0.00898   4.4478  < 0.001
##  taxrate5 175-375k: 25% - 175-375k: 5%  0.052389    0.00932   5.6212  < 0.001
##  taxrate5 175-375k: 35% - 175-375k: 5%  0.073741    0.00971   7.5917  < 0.001
##  taxrate5 175-375k: 45% - 175-375k: 5%  0.045784    0.01015   4.5111  < 0.001
##  taxrate6 >375k: 15% - >375k: 5%        0.076755    0.01029   7.4564  < 0.001
##  taxrate6 >375k: 25% - >375k: 5%        0.105867    0.01131   9.3618  < 0.001
##  taxrate6 >375k: 35% - >375k: 5%        0.136613    0.01235  11.0639  < 0.001
##  taxrate6 >375k: 45% - >375k: 5%        0.147456    0.01352  10.9078  < 0.001
##  taxrate6 >375k: 55% - >375k: 5%        0.121719    0.01447   8.4145  < 0.001
##  taxrev   75-95% - <75%                 0.017001    0.01222   1.3907  0.16431
##  taxrev   95-105% - <75%                0.046306    0.01753   2.6413  0.00826
##  taxrev   105-125% - <75%               0.064905    0.02108   3.0790  0.00208
##  taxrev   >125% - <75%                  0.090218    0.02831   3.1873  0.00144
##     2.5 %   97.5 %
##  -0.02817  0.00201
##  -0.10403 -0.06907
##  -0.24743 -0.20206
##  -0.03662  0.00274
##  -0.11953 -0.05716
##  -0.23780 -0.15301
##  -0.01613  0.01696
##  -0.07498 -0.03618
##  -0.13793 -0.08968
##   0.00420  0.03503
##  -0.00521  0.02716
##  -0.01872  0.01518
##   0.02235  0.05756
##   0.03412  0.07066
##   0.05470  0.09278
##   0.02589  0.06568
##   0.05658  0.09693
##   0.08370  0.12803
##   0.11241  0.16081
##   0.12096  0.17395
##   0.09337  0.15007
##  -0.00696  0.04096
##   0.01195  0.08067
##   0.02359  0.10622
##   0.03474  0.14570
## 
## Columns: term, contrast, estimate, std.error, statistic, p.value, conf.low, conf.high

h*ck yes these percentage-point-scale estimates basically the same as what we get from the LPM model.

# Extract marginal effects
model_results_logit_mfx <- taxes_model_logit %>%
  avg_slopes(newdata = "mean") %>%
  separate_wider_delim(
    contrast,
    delim = " - ", 
    names = c("variable_level", "reference_level")
  )

# Combine full dataset of factor levels with model results
thing_to_plot <- model_variable_levels %>%
  left_join(
    model_results_logit_mfx,
    by = join_by(variable == term, levels == variable_level)
  ) %>%
  mutate(levels = fct_inorder(levels)) %>%
  # Make these zero
  mutate(
    across(
      c(estimate, conf.low, conf.high),
      ~ ifelse(is.na(.x), 0, .x)
    )
  ) %>%
  mutate(
    term = fct_inorder(term),
    levels = fct_inorder(levels)
  )

ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(levels), color = variable)) +
  geom_vline(xintercept = 0, color = "grey75") +
  geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
  guides(color = "none") +
  facet_col(facets = "variable", scales = "free_y", space = "free") +
  scale_x_continuous(
    labels = label_number(scale = 100, suffix = " pp.", style_negative = "minus")
  ) +
  labs(x = "Percentage point change in probability") +
  theme_bw() +
  theme(panel.grid = element_blank())

Bayesian model with example data

Now that we know (1) what the AMCE actually is, (2) that it works with multilevel models, and (3) that it works with logistic models, we can do it all Bayesianly with {brms}

taxes_model_brms <- brm(
  bf(chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
    taxrate4 + taxrate5 + taxrate6 + taxrev + (1 | ID)),
  data = taxes,
  family = bernoulli(link = "logit"),
  chains = 4, cores = 4, iter = 2000, seed = 1234,
  backend = "cmdstanr", threads = threading(2), 
  file = "taxes_model_brms"
)
posterior_mfx_nested <- taxes_model_brms %>% 
  avg_slopes(newdata = "mean") %>% 
  posteriordraws() %>% 
  separate_wider_delim(
    contrast,
    delim = " - ", 
    names = c("variable_level", "reference_level")
  ) %>% 
  group_by(term, variable_level) %>% 
  nest()

empty_data <- tibble(draw = 0, estimate = 0)

# Combine full dataset of factor levels with model results
thing_to_plot_bayes <- model_variable_levels %>%
  left_join(
    posterior_mfx_nested,
    by = join_by(variable == term, levels == variable_level)
  ) %>%
  mutate(data = map_if(data, is.null, ~ tibble(draw = 0, estimate = 0))) %>% 
  unnest(data) %>% 
  mutate(
    term = fct_inorder(term),
    levels = fct_inorder(levels)
  )

ggplot(thing_to_plot_bayes, aes(x = draw, y = fct_rev(levels), fill = variable)) +
  geom_vline(xintercept = 0, color = "grey75") +
  stat_halfeye() +
  guides(fill = "none") +
  facet_col(facets = "variable", scales = "free_y", space = "free") +
  scale_x_continuous(
    labels = label_number(scale = 100, suffix = " pp.", style_negative = "minus")
  ) +
  labs(x = "Percentage point change in probability",
    y = NULL) +
  theme_bw() +
  theme(panel.grid = element_blank())

Bayesian model with real data