library(tidyverse)
library(broom)
library(cregg)
library(scales)
library(brms)
library(tidybayes)
data("taxes")
AMCE playground
Frequentist model with example data, automatic
The main R package for estimating AMCEs in a frequentist way is {cregg}
It calculates AMCEs and returns them in a nice tidy dataframe and provides an automatic plot function, which is nice:
<- amce(
taxes_model_automatic
taxes,~ taxrate1 + taxrate2 + taxrate3 +
chose_plan + taxrate5 + taxrate6 + taxrev,
taxrate4 id = ~ID
)%>%
taxes_model_automatic as_tibble()
## # A tibble: 32 × 10
## outcome statistic feature level estimate std.error z p lower
## <chr> <chr> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 chose_… amce Tax ra… <10k… 0 NA NA NA NA
## 2 chose_… amce Tax ra… <10k… -1.40e-2 0.00837 -1.67 9.43e- 2 -0.0304
## 3 chose_… amce Tax ra… <10k… -8.98e-2 0.00988 -9.08 1.06e-19 -0.109
## 4 chose_… amce Tax ra… <10k… -2.22e-1 0.0125 -17.7 2.76e-70 -0.246
## 5 chose_… amce Tax ra… 10-3… 0 NA NA NA NA
## 6 chose_… amce Tax ra… 10-3… -1.62e-2 0.0100 -1.61 1.06e- 1 -0.0358
## 7 chose_… amce Tax ra… 10-3… -8.49e-2 0.0158 -5.37 8.07e- 8 -0.116
## 8 chose_… amce Tax ra… 10-3… -1.87e-1 0.0211 -8.86 7.70e-19 -0.228
## 9 chose_… amce Tax ra… 35-8… 0 NA NA NA NA
## 10 chose_… amce Tax ra… 35-8… 5.36e-4 0.00824 0.0650 9.48e- 1 -0.0156
## # ℹ 22 more rows
## # ℹ 1 more variable: upper <dbl>
plot(taxes_model_automatic) +
guides(color = "none")
But (1) we’re doing this Bayesianly, and (2) I want to understand what exactly these AMCEs are, so I need to figure out how this all works under the hood.
Frequentist model with example data, manual
survey::svyglm()
coefficients
Behind the scenes, {cregg} uses survey::svyglm()
to run OLS with ID-specific adjustments to standard errors:
<- survey::svydesign(
taxes_design ids = ~ID,
weights = ~1,
data = taxes
)
<- survey::svyglm(
taxes_model_manual ~ taxrate1 + taxrate2 + taxrate3 +
chose_plan + taxrate5 + taxrate6 + taxrev,
taxrate4 design = taxes_design
)
tidy(taxes_model_manual)
## # A tibble: 26 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.509 0.0182 27.9 5.19e-145
## 2 taxrate1<10k: 5% -0.0140 0.00837 -1.67 9.45e- 2
## 3 taxrate1<10k: 15% -0.0898 0.00988 -9.08 2.50e- 19
## 4 taxrate1<10k: 25% -0.222 0.0125 -17.7 2.43e- 65
## 5 taxrate210-35k: 15% -0.0162 0.0100 -1.61 1.07e- 1
## 6 taxrate210-35k: 25% -0.0849 0.0158 -5.37 9.01e- 8
## 7 taxrate210-35k: 35% -0.187 0.0211 -8.86 1.68e- 18
## 8 taxrate335-85k: 15% 0.000536 0.00824 0.0650 9.48e- 1
## 9 taxrate335-85k: 25% -0.0533 0.00971 -5.49 4.52e- 8
## 10 taxrate335-85k: 35% -0.108 0.0119 -9.09 2.32e- 19
## # ℹ 16 more rows
Those are the same results that we get from cregg::amce()
, both the estimates and the standard errors:
<- taxes_model_automatic %>%
amce_estimates as_tibble() %>%
drop_na(std.error) %>%
select(estimate, std.error)
<- taxes_model_manual %>%
svy_estimates tidy() %>%
filter(term != "(Intercept)") %>%
select(estimate, std.error)
== svy_estimates
amce_estimates ## estimate std.error
## [1,] TRUE TRUE
## [2,] TRUE TRUE
## [3,] TRUE TRUE
## [4,] TRUE TRUE
## [5,] TRUE TRUE
## [6,] TRUE TRUE
## [7,] TRUE TRUE
## [8,] TRUE TRUE
## [9,] TRUE TRUE
## [10,] TRUE TRUE
## [11,] TRUE TRUE
## [12,] TRUE TRUE
## [13,] TRUE TRUE
## [14,] TRUE TRUE
## [15,] TRUE TRUE
## [16,] TRUE TRUE
## [17,] TRUE TRUE
## [18,] TRUE TRUE
## [19,] TRUE TRUE
## [20,] TRUE TRUE
## [21,] TRUE TRUE
## [22,] TRUE TRUE
## [23,] TRUE TRUE
## [24,] TRUE TRUE
## [25,] TRUE TRUE
Marginal effects
So the OLS coefficients here are the AMCEs. That’s super straightforward.
But also not quite because technically we’re estimating a linear probability model here. choose_plan
is a binary 0/1 variable, but we’re using OLS with it. The marginal effects (slopes) are just the partial derivatives reported in the regression table.
But if we’re going to have interaction terms or if we’re going to use something that’s not OLS (like logistic regression), raw coefficients won’t work. Instead we need to calculate actual marginal effects. At one point, {cregg} supported this by using the {margins} package (which makes sense—Thomas Leeper developed both {cregg} and {margins}), but starting with {cregg} version 0.1.14, the {margins} support disappeared, leaving only support for OLS linear probability models.
We can use the newer {marginaleffects} package to calculate marginal effects/slopes for each of these effects. Here we’ll just hold all the predictors at their means (but there are a ton of different estimands we could calculate). In this case they’re the same as the raw coefficients
library(marginaleffects)
<- taxes_model_manual %>%
amces_manual avg_slopes(newdata = "mean")
%>%
amces_manual as_tibble() %>%
select(mfx_estimate = estimate, mfx_std.error = std.error) %>%
cbind(svy_estimates)
## mfx_estimate mfx_std.error estimate std.error
## 1 -0.0139987267 0.008367718 -0.0139987267 0.008367718
## 2 -0.0897702241 0.009883554 -0.0897702241 0.009883554
## 3 -0.2215066470 0.012497932 -0.2215066470 0.012497932
## 4 -0.0161677383 0.010015769 -0.0161677383 0.010015769
## 5 -0.0849079259 0.015824370 -0.0849079259 0.015824370
## 6 -0.1868125806 0.021074682 -0.1868125806 0.021074682
## 7 0.0005356495 0.008242105 0.0005356495 0.008242105
## 8 -0.0533364485 0.009713809 -0.0533364485 0.009713809
## 9 -0.1083416179 0.011917151 -0.1083416179 0.011917151
## 10 0.0194226595 0.007719126 0.0194226595 0.007719126
## 11 0.0108897506 0.008078966 0.0108897506 0.008078966
## 12 -0.0015463277 0.008431674 -0.0015463277 0.008431674
## 13 0.0384042184 0.008581007 0.0384042184 0.008581007
## 14 0.0504838117 0.008867028 0.0504838117 0.008867028
## 15 0.0716090284 0.009162901 0.0716090284 0.009162901
## 16 0.0441842455 0.009601062 0.0441842455 0.009601062
## 17 0.0729290882 0.009630408 0.0729290882 0.009630408
## 18 0.1023481041 0.010525057 0.1023481041 0.010525057
## 19 0.1342706760 0.011342675 0.1342706760 0.011342675
## 20 0.1456934759 0.012358893 0.1456934759 0.012358893
## 21 0.1190155301 0.013344593 0.1190155301 0.013344593
## 22 0.0155562019 0.011532459 0.0155562019 0.011532459
## 23 0.0433356897 0.016694623 0.0433356897 0.016694623
## 24 0.0613870891 0.020195352 0.0613870891 0.020195352
## 25 0.0866358183 0.027742084 0.0866358183 0.027742084
Coefficient plot
What about that neat coefficient plot with the reference categories? tidy()
can’t return empty rows for the reference levels, but we can make it work in a couple different ways: (1) automatically with broom.helpers::tidy_add_reference_rows()
, or (2) manually with some data wrangling.
Automatically with {broom.helpers}
The easiest way to do this is to use broom.helpers::tidy_add_reference_rows()
, which adds the reference levels automatically with estimates of 0, so we can use geom_pointrange()
and make basically the same plot as {cregg}:
library(broom.helpers)
library(ggforce)
<- taxes_model_manual %>%
plot_data_manual tidy_and_attach() %>%
tidy_add_reference_rows() %>%
tidy_add_estimate_to_reference_rows() %>%
filter(term != "(Intercept)") %>%
mutate(term = fct_inorder(term))
ggplot(plot_data_manual, aes(x = estimate, y = fct_rev(term), color = variable)) +
geom_vline(xintercept = 0, color = "grey75") +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
guides(color = "none") +
facet_col(facets = "variable", scales = "free_y", space = "free") +
theme_bw() +
theme(panel.grid = element_blank())
Manually with dplyr wrangling magic
However, this doesn’t work with {marginaleffects} objects or multilevel models or Bayesian models, so ultimately we can’t use this :(
Instead, we need to do some fancy data wrangling:
# Extract all the right-hand variables
<- all.vars(stats::update(formula(taxes_model_manual), 0 ~ .))
rhs
# Make a dataframe of all the levels of all the non-numeric things
<- tibble(
model_variable_levels variable = rhs
%>%
) mutate(levels = map(variable, ~{
<- taxes[[.x]]
x if (is.numeric(x)) {
""
else if (is.factor(x)) {
} levels(x)
else {
} sort(unique(x))
}%>%
})) unnest(levels) %>%
mutate(term = paste0(variable, levels))
# Extract model results
<- tidy(taxes_model_manual, conf.int = TRUE)
model_results
# Combine full dataset of factor levels with model results
<- model_variable_levels %>%
thing_to_plot left_join(model_results, by = join_by(term)) %>%
mutate(levels = fct_inorder(levels)) %>%
# Make these zero
mutate(
across(
c(estimate, conf.low, conf.high),
~ ifelse(is.na(.x), 0, .x)
)%>%
) filter(term != "(Intercept)") %>%
mutate(term = fct_inorder(term))
ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(term), color = variable)) +
geom_vline(xintercept = 0, color = "grey75") +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
guides(color = "none") +
facet_col(facets = "variable", scales = "free_y", space = "free") +
theme_bw() +
theme(panel.grid = element_blank())
It’s a little different with marginal effects because the term
column is for the overarching variable (e.g., taxrate1
) and the individual levels are built as contrasts in a column named contrast
, like "<10k: 5% - <10k: 0%"
. We need to clean up that contrast
column for joining with model_variable_levels
.
# Extract marginal effects
<- taxes_model_manual %>%
model_results_mfx avg_slopes(newdata = "mean") %>%
separate_wider_delim(
contrast,delim = " - ",
names = c("variable_level", "reference_level")
)
# Combine full dataset of factor levels with model results
<- model_variable_levels %>%
thing_to_plot left_join(
model_results_mfx,by = join_by(variable == term, levels == variable_level)
%>%
) mutate(levels = fct_inorder(levels)) %>%
# Make these zero
mutate(
across(
c(estimate, conf.low, conf.high),
~ ifelse(is.na(.x), 0, .x)
)%>%
) mutate(
term = fct_inorder(term),
levels = fct_inorder(levels)
)
ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(levels), color = variable)) +
geom_vline(xintercept = 0, color = "grey75") +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
guides(color = "none") +
facet_col(facets = "variable", scales = "free_y", space = "free") +
theme_bw() +
theme(panel.grid = element_blank())
Multilevel model with example data
Instead of using survey::svyglm()
, what happens if we use a multilevel model with individual random effects?
Typical respondent - new hypothetical respondent
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lme4'
## The following object is masked from 'package:brms':
##
## ngrps
<- lmer(
model_lmer ~ taxrate1 + taxrate2 + taxrate3 +
chose_plan + taxrate5 + taxrate6 + taxrev + (1 | ID),
taxrate4 data = taxes
)## boundary (singular) fit: see help('isSingular')
# results are the same, great
Logistic regression
Does this work if we don’t use an LPM?
<- survey::svyglm(
taxes_model_logit ~ taxrate1 + taxrate2 + taxrate3 +
chose_plan + taxrate5 + taxrate6 + taxrev,
taxrate4 design = taxes_design,
family = binomial(link = "logit")
)
Check out those sweet sweet uninterpretable log odds:
%>% tidy()
taxes_model_logit ## # A tibble: 26 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.0308 0.0779 0.395 6.93e- 1
## 2 taxrate1<10k: 5% -0.0594 0.0352 -1.69 9.19e- 2
## 3 taxrate1<10k: 15% -0.375 0.0416 -9.02 4.17e-19
## 4 taxrate1<10k: 25% -0.933 0.0541 -17.2 4.39e-62
## 5 taxrate210-35k: 15% -0.0706 0.0425 -1.66 9.65e- 2
## 6 taxrate210-35k: 25% -0.361 0.0674 -5.35 9.83e- 8
## 7 taxrate210-35k: 35% -0.792 0.0907 -8.73 5.10e-18
## 8 taxrate335-85k: 15% 0.00172 0.0350 0.0493 9.61e- 1
## 9 taxrate335-85k: 25% -0.226 0.0413 -5.48 4.84e- 8
## 10 taxrate335-85k: 35% -0.460 0.0512 -8.98 6.40e-19
## # ℹ 16 more rows
Let’s instead calculate their average marginal effects (marginal-effects-at-the-mean flavored ones):
%>% avg_slopes(newdata = "mean")
taxes_model_logit ##
## Term Contrast Estimate Std. Error z Pr(>|z|)
## taxrate1 <10k: 5% - <10k: 0% -0.013079 0.00770 -1.6986 0.08939
## taxrate1 <10k: 15% - <10k: 0% -0.086551 0.00892 -9.7048 < 0.001
## taxrate1 <10k: 25% - <10k: 0% -0.224746 0.01157 -19.4185 < 0.001
## taxrate2 10-35k: 15% - 10-35k: 5% -0.016935 0.01004 -1.6866 0.09168
## taxrate2 10-35k: 25% - 10-35k: 5% -0.088342 0.01591 -5.5520 < 0.001
## taxrate2 10-35k: 35% - 10-35k: 5% -0.195402 0.02163 -9.0334 < 0.001
## taxrate3 35-85k: 15% - 35-85k: 5% 0.000416 0.00844 0.0493 0.96071
## taxrate3 35-85k: 25% - 35-85k: 5% -0.055580 0.00990 -5.6141 < 0.001
## taxrate3 35-85k: 35% - 35-85k: 5% -0.113804 0.01231 -9.2462 < 0.001
## taxrate4 85-175k: 15% - 85-175k: 5% 0.019616 0.00786 2.4945 0.01261
## taxrate4 85-175k: 25% - 85-175k: 5% 0.010976 0.00826 1.3295 0.18370
## taxrate4 85-175k: 35% - 85-175k: 5% -0.001771 0.00865 -0.2048 0.83774
## taxrate5 175-375k: 15% - 175-375k: 5% 0.039953 0.00898 4.4478 < 0.001
## taxrate5 175-375k: 25% - 175-375k: 5% 0.052389 0.00932 5.6212 < 0.001
## taxrate5 175-375k: 35% - 175-375k: 5% 0.073741 0.00971 7.5917 < 0.001
## taxrate5 175-375k: 45% - 175-375k: 5% 0.045784 0.01015 4.5111 < 0.001
## taxrate6 >375k: 15% - >375k: 5% 0.076755 0.01029 7.4564 < 0.001
## taxrate6 >375k: 25% - >375k: 5% 0.105867 0.01131 9.3618 < 0.001
## taxrate6 >375k: 35% - >375k: 5% 0.136613 0.01235 11.0639 < 0.001
## taxrate6 >375k: 45% - >375k: 5% 0.147456 0.01352 10.9078 < 0.001
## taxrate6 >375k: 55% - >375k: 5% 0.121719 0.01447 8.4145 < 0.001
## taxrev 75-95% - <75% 0.017001 0.01222 1.3907 0.16431
## taxrev 95-105% - <75% 0.046306 0.01753 2.6413 0.00826
## taxrev 105-125% - <75% 0.064905 0.02108 3.0790 0.00208
## taxrev >125% - <75% 0.090218 0.02831 3.1873 0.00144
## 2.5 % 97.5 %
## -0.02817 0.00201
## -0.10403 -0.06907
## -0.24743 -0.20206
## -0.03662 0.00274
## -0.11953 -0.05716
## -0.23780 -0.15301
## -0.01613 0.01696
## -0.07498 -0.03618
## -0.13793 -0.08968
## 0.00420 0.03503
## -0.00521 0.02716
## -0.01872 0.01518
## 0.02235 0.05756
## 0.03412 0.07066
## 0.05470 0.09278
## 0.02589 0.06568
## 0.05658 0.09693
## 0.08370 0.12803
## 0.11241 0.16081
## 0.12096 0.17395
## 0.09337 0.15007
## -0.00696 0.04096
## 0.01195 0.08067
## 0.02359 0.10622
## 0.03474 0.14570
##
## Columns: term, contrast, estimate, std.error, statistic, p.value, conf.low, conf.high
h*ck yes these percentage-point-scale estimates basically the same as what we get from the LPM model.
# Extract marginal effects
<- taxes_model_logit %>%
model_results_logit_mfx avg_slopes(newdata = "mean") %>%
separate_wider_delim(
contrast,delim = " - ",
names = c("variable_level", "reference_level")
)
# Combine full dataset of factor levels with model results
<- model_variable_levels %>%
thing_to_plot left_join(
model_results_logit_mfx,by = join_by(variable == term, levels == variable_level)
%>%
) mutate(levels = fct_inorder(levels)) %>%
# Make these zero
mutate(
across(
c(estimate, conf.low, conf.high),
~ ifelse(is.na(.x), 0, .x)
)%>%
) mutate(
term = fct_inorder(term),
levels = fct_inorder(levels)
)
ggplot(thing_to_plot, aes(x = estimate, y = fct_rev(levels), color = variable)) +
geom_vline(xintercept = 0, color = "grey75") +
geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
guides(color = "none") +
facet_col(facets = "variable", scales = "free_y", space = "free") +
scale_x_continuous(
labels = label_number(scale = 100, suffix = " pp.", style_negative = "minus")
+
) labs(x = "Percentage point change in probability") +
theme_bw() +
theme(panel.grid = element_blank())
Bayesian model with example data
Now that we know (1) what the AMCE actually is, (2) that it works with multilevel models, and (3) that it works with logistic models, we can do it all Bayesianly with {brms}
<- brm(
taxes_model_brms bf(chose_plan ~ taxrate1 + taxrate2 + taxrate3 +
+ taxrate5 + taxrate6 + taxrev + (1 | ID)),
taxrate4 data = taxes,
family = bernoulli(link = "logit"),
chains = 4, cores = 4, iter = 2000, seed = 1234,
backend = "cmdstanr", threads = threading(2),
file = "taxes_model_brms"
)
<- taxes_model_brms %>%
posterior_mfx_nested avg_slopes(newdata = "mean") %>%
posteriordraws() %>%
separate_wider_delim(
contrast,delim = " - ",
names = c("variable_level", "reference_level")
%>%
) group_by(term, variable_level) %>%
nest()
<- tibble(draw = 0, estimate = 0)
empty_data
# Combine full dataset of factor levels with model results
<- model_variable_levels %>%
thing_to_plot_bayes left_join(
posterior_mfx_nested,by = join_by(variable == term, levels == variable_level)
%>%
) mutate(data = map_if(data, is.null, ~ tibble(draw = 0, estimate = 0))) %>%
unnest(data) %>%
mutate(
term = fct_inorder(term),
levels = fct_inorder(levels)
)
ggplot(thing_to_plot_bayes, aes(x = draw, y = fct_rev(levels), fill = variable)) +
geom_vline(xintercept = 0, color = "grey75") +
stat_halfeye() +
guides(fill = "none") +
facet_col(facets = "variable", scales = "free_y", space = "free") +
scale_x_continuous(
labels = label_number(scale = 100, suffix = " pp.", style_negative = "minus")
+
) labs(x = "Percentage point change in probability",
y = NULL) +
theme_bw() +
theme(panel.grid = element_blank())