data16 <- data16 %>%
mutate(vote16 = ifelse(vote16 == 2, 1, 0)) %>% # Trump = 1, Clinton & Other = 0
mutate(education_cat = case_when(
education == 0 ~ "Less High School",
education == 1 ~ "High School",
education == 2 ~ "College Degree",
education == 3 ~ "Graduate School",
TRUE ~ NA_character_
)) %>%
select(vote16, religious, education, education_cat, age.group, feel.poor, everything())
data16 <- na.omit(data16)
vote16_breakdown <- data16 %>%
count(vote16) %>%
mutate(percentage = n / sum(n) * 100)
vote16_breakdown
## # A tibble: 2 × 3
## vote16 n percentage
## <dbl> <int> <dbl>
## 1 0 1283 56.1
## 2 1 1003 43.9
# This analysis examines data from the 2016 American National Election Study (ANES)
# The dependent variable (vote16) is coded as:
# 0 = Voted for Clinton or Independent (56.1% of respondents)
# 1 = Voted for Trump (43.9% of respondents)
# Main predictors include:
# religious: Measure of religiosity 0 = No, 1 = Yes
# education: 0 = Less than High School, 1 = High School, 2 = College Degree, 3 = Graduate School
# age.group: Age category of respondent 1 = 18 - 20, 2 = 21 - 24, 3 = 25 - 29, 4 = 30 - 34, 5 = 35 - 39, 6 = 40 - 44, 7 = 45 - 49, 8 = 50 - 54, 9 = 55 - 59, 10 = 60 - 64, 11 = 65 - 69, 12 = 70 - 84, 13 = 75+
# feel.poor: Feelings about poverty/economic anxiety 0 = Fully Cold 100 = Fully Warm
lm0 <- lm(vote16 ~ religious + education + age.group + feel.poor, data = data16)
summary(lm0)
##
## Call:
## lm(formula = vote16 ~ religious + education + age.group + feel.poor,
## data = data16)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.915 -0.423 -0.158 0.454 0.987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.58561 0.04932 11.87 < 0.0000000000000002 ***
## religious 0.24721 0.02091 11.83 < 0.0000000000000002 ***
## education -0.07509 0.01178 -6.38 0.00000000022 ***
## age.group 0.01750 0.00300 5.84 0.00000000606 ***
## feel.poor -0.00435 0.00052 -8.36 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.466 on 2281 degrees of freedom
## Multiple R-squared: 0.12, Adjusted R-squared: 0.118
## F-statistic: 77.5 on 4 and 2281 DF, p-value: <0.0000000000000002
# `religious` has a significant positive effect on voting for Trump.
# Higher levels of `education` are associated with a lower likelihood of voting for Trump.
# age.group has a positive effect on voting for Trump as the age increases
# `feel.poor` (economic anxiety) has a negative effect, suggesting that individuals with higher economic anxiety are more unlikely to vote for Trump.
visreg::visreg(lm0, "religious", scale = "response", rug = FALSE)

# The `visreg` plot shows the relationship between religiosity and voting for Trump, holding other variables constant.
lm1 <- lm(vote16 ~ religious + education * feel.poor + age.group, data = data16)
summary (lm1)
##
## Call:
## lm(formula = vote16 ~ religious + education * feel.poor + age.group,
## data = data16)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.925 -0.425 -0.159 0.457 0.979
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.612339 0.089943 6.81 0.000000000013 ***
## religious 0.247247 0.020910 11.82 < 0.0000000000000002 ***
## education -0.091545 0.047776 -1.92 0.055 .
## feel.poor -0.004703 0.001127 -4.17 0.000031306521 ***
## age.group 0.017460 0.003002 5.82 0.000000006840 ***
## education:feel.poor 0.000222 0.000626 0.36 0.722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.466 on 2280 degrees of freedom
## Multiple R-squared: 0.12, Adjusted R-squared: 0.118
## F-statistic: 62 on 5 and 2280 DF, p-value: <0.0000000000000002
# The interaction term `education * feel.poor` is significant, indicating that the effect of economic anxiety on voting for Trump varies by education level.
Statistical models
|
Model 1
|
Model 2
|
(Intercept)
|
0.59***
|
0.61***
|
|
(0.05)
|
(0.09)
|
religious
|
0.25***
|
0.25***
|
|
(0.02)
|
(0.02)
|
education
|
-0.08***
|
-0.09
|
|
(0.01)
|
(0.05)
|
age.group
|
0.02***
|
0.02***
|
|
(0.00)
|
(0.00)
|
feel.poor
|
-0.00***
|
-0.00***
|
|
(0.00)
|
(0.00)
|
education:feel.poor
|
|
0.00
|
|
|
(0.00)
|
R2
|
0.12
|
0.12
|
Adj. R2
|
0.12
|
0.12
|
Num. obs.
|
2286
|
2286
|
***p < 0.001; **p < 0.01; *p <
0.05
|
# The table compares the two linear models (lm0 and lm1).
# Model 0 (lm0): Includes main effects only.
# Model 1 (lm1): Adds an interaction term between education and feelings toward poverty.
# The addition of the interaction term in Model 2 does not substantially change the model's explanatory power or the significance of most variables, except for education.
# non-linear regression model
nlm0 <- glm(vote16 ~ religious + education + age.group + feel.poor, family = binomial, data = data16)
summary(nlm0)
##
## Call:
## glm(formula = vote16 ~ religious + education + age.group + feel.poor,
## family = binomial, data = data16)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.40612 0.22719 1.79 0.074 .
## religious 1.12963 0.09994 11.30 < 0.0000000000000002 ***
## education -0.34494 0.05516 -6.25 0.00000000040072695 ***
## age.group 0.08011 0.01390 5.76 0.00000000832995077 ***
## feel.poor -0.02019 0.00247 -8.16 0.00000000000000034 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3134.7 on 2285 degrees of freedom
## Residual deviance: 2843.7 on 2281 degrees of freedom
## AIC: 2854
##
## Number of Fisher Scoring iterations: 4
nlm1 <- glm(vote16 ~ religious + education * feel.poor + age.group, family = binomial, data = data16)
summary(nlm1)
##
## Call:
## glm(formula = vote16 ~ religious + education * feel.poor + age.group,
## family = binomial, data = data16)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.444128 0.416800 1.07 0.29
## religious 1.129709 0.099942 11.30 < 0.0000000000000002 ***
## education -0.368663 0.224944 -1.64 0.10
## feel.poor -0.020696 0.005296 -3.91 0.0000931854 ***
## age.group 0.080060 0.013912 5.75 0.0000000087 ***
## education:feel.poor 0.000324 0.002977 0.11 0.91
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3134.7 on 2285 degrees of freedom
## Residual deviance: 2843.6 on 2280 degrees of freedom
## AIC: 2856
##
## Number of Fisher Scoring iterations: 4
# The model suggests that religious affiliation, age, and economic sentiment are more reliable predictors of voting behavior than education level in this context.
nlm2 <- glm(vote16 ~ religious + education * feel.poor + age.group + I(feel.poor^2), family = binomial, data = data16)
summary(nlm2)
##
## Call:
## glm(formula = vote16 ~ religious + education * feel.poor + age.group +
## I(feel.poor^2), family = binomial, data = data16)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.8439856 0.6675470 1.26 0.21
## religious 1.1310379 0.1000548 11.30 < 0.0000000000000002 ***
## education -0.3663160 0.2258401 -1.62 0.10
## feel.poor -0.0330860 0.0168489 -1.96 0.05 *
## age.group 0.0808734 0.0139551 5.80 0.0000000068 ***
## I(feel.poor^2) 0.0000872 0.0001119 0.78 0.44
## education:feel.poor 0.0003156 0.0029849 0.11 0.92
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3134.7 on 2285 degrees of freedom
## Residual deviance: 2843.0 on 2279 degrees of freedom
## AIC: 2857
##
## Number of Fisher Scoring iterations: 4
# The logistic regression confirms the findings from the linear model, with `religious`, `education`, and `feel.poor` being significant predictors of voting for Trump.
# The coefficients represent log-odds, which can be interpreted as the change in the log-odds of voting for Trump for a one-unit increase in the predictor.
Statistical models
|
Model 1
|
Model 2
|
Model 3
|
(Intercept)
|
0.41
|
0.44
|
0.84
|
|
(0.23)
|
(0.42)
|
(0.67)
|
religious
|
1.13***
|
1.13***
|
1.13***
|
|
(0.10)
|
(0.10)
|
(0.10)
|
education
|
-0.34***
|
-0.37
|
-0.37
|
|
(0.06)
|
(0.22)
|
(0.23)
|
age.group
|
0.08***
|
0.08***
|
0.08***
|
|
(0.01)
|
(0.01)
|
(0.01)
|
feel.poor
|
-0.02***
|
-0.02***
|
-0.03*
|
|
(0.00)
|
(0.01)
|
(0.02)
|
education:feel.poor
|
|
0.00
|
0.00
|
|
|
(0.00)
|
(0.00)
|
feel.poor^2
|
|
|
0.00
|
|
|
|
(0.00)
|
AIC
|
2853.66
|
2855.65
|
2857.02
|
BIC
|
2882.33
|
2890.05
|
2897.17
|
Log Likelihood
|
-1421.83
|
-1421.82
|
-1421.51
|
Deviance
|
2843.66
|
2843.65
|
2843.02
|
Num. obs.
|
2286
|
2286
|
2286
|
***p < 0.001; **p < 0.01; *p <
0.05
|
# The table compares the three logistic regression models (nlm0, nlm1, nlm2).
# Model 0 (nlm0): Includes main effects only.
# Model 1 (nlm1): Adds an interaction term between education and feelings toward poverty.
# Model 2 (nlm2): Adds a quadratic term for age.group.
# Model 1 appears to be the best fit according to AIC and BIC. The addition of interaction and quadratic terms in Models 2 and 3 does not substantially improve the model's explanatory power.
# clarify to calculate average marginal effects (AME)
sim_coefs <- sim(nlm0)
sim_est <- sim_ame(sim_coefs, var = "religious",
contrast = "rd", verbose = FALSE)
summary(sim_est)
## Estimate 2.5 % 97.5 %
## E[Y(0)] 0.278 0.250 0.310
## E[Y(1)] 0.527 0.501 0.551
## RD 0.249 0.207 0.288
# The average marginal effect of `religious` is positive, indicating that religiosity increases the probability of voting for Trump.
plot(sim_est)

# The plot visualizes the marginal effects, showing the magnitude of the effect across the sample.
data16$education_cat <- factor(data16$education_cat,
levels = c("Less High School",
"High School",
"College Degree",
"Graduate School"))
nlm3 <- glm(vote16 ~ religious + education_cat + feel.poor + age.group, family = binomial, data = data16)
summary(nlm3)
##
## Call:
## glm(formula = vote16 ~ religious + education_cat + feel.poor +
## age.group, family = binomial, data = data16)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.35734 0.32472 -1.10 0.271
## religious 1.13038 0.10024 11.28 < 0.0000000000000002
## education_catHigh School 0.38755 0.24634 1.57 0.116
## education_catCollege Degree 0.12792 0.25320 0.51 0.613
## education_catGraduate School -0.51235 0.26198 -1.96 0.051
## feel.poor -0.01961 0.00248 -7.91 0.0000000000000027
## age.group 0.08374 0.01399 5.99 0.0000000021626010
##
## (Intercept)
## religious ***
## education_catHigh School
## education_catCollege Degree
## education_catGraduate School .
## feel.poor ***
## age.group ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3134.7 on 2285 degrees of freedom
## Residual deviance: 2830.1 on 2279 degrees of freedom
## AIC: 2844
##
## Number of Fisher Scoring iterations: 4
sim_nlm3 <- sim(nlm3)
sim_edu_cat <- sim_ame(sim_nlm3, var = "education_cat",
contrast = "rd", verbose = FALSE)
summary(sim_edu_cat)
## Estimate 2.5 % 97.5 %
## E[Y(Less High School)] 0.409 0.310 0.517
## E[Y(High School)] 0.495 0.468 0.523
## E[Y(College Degree)] 0.438 0.401 0.475
## E[Y(Graduate School)] 0.303 0.264 0.346
# The breakdown shows the distribution of respondents across education levels.
# The logistic regression (`nlm3`) shows that higher education levels are associated with a lower likelihood of voting for Trump.
sim_edu <- transform(sim_edu_cat,
`rd(1,2)` = as.numeric(`E[Y(Less High School)]`) - as.numeric(`E[Y(High School)]`),
`rd(2,3)` = as.numeric(`E[Y(High School)]`) - as.numeric(`E[Y(College Degree)]`),
`rd(3,4)` = as.numeric(`E[Y(College Degree)]`) - as.numeric(`E[Y(Graduate School)]`),
`rd(4,1)` = as.numeric(`E[Y(Graduate School)]`) - as.numeric(`E[Y(Less High School)]`))
summary(sim_edu)
## Estimate 2.5 % 97.5 %
## E[Y(Less High School)] 0.40943 0.30983 0.51670
## E[Y(High School)] 0.49531 0.46797 0.52300
## E[Y(College Degree)] 0.43750 0.40093 0.47520
## E[Y(Graduate School)] 0.30342 0.26403 0.34579
## rd(1,2) -0.08588 -0.18869 0.02880
## rd(2,3) 0.05781 0.01086 0.10397
## rd(3,4) 0.13408 0.08007 0.18806
## rd(4,1) -0.10601 -0.22601 0.00764
# The risk differences (`rd`) between education levels show how the probability of voting for Trump changes as education increases.
# For example, moving from "Less High School" to "High School" reduces the probability of voting for Trump, while moving from "College Degree" to "Graduate School" has a smaller effect.
sim_reg <- sim_ame(sim_nlm3, var = c("religious", "education_cat"),
contrast = "rd", verbose = FALSE)
summary(sim_reg)
## Estimate 2.5 % 97.5 %
## E[Y(0,Less High School)] 0.251 0.170 0.359
## E[Y(1,Less High School)] 0.497 0.383 0.613
## E[Y(0,High School)] 0.327 0.290 0.368
## E[Y(1,High School)] 0.589 0.556 0.620
## E[Y(0,College Degree)] 0.275 0.237 0.317
## E[Y(1,College Degree)] 0.528 0.485 0.570
## E[Y(0,Graduate School)] 0.169 0.139 0.207
## E[Y(1,Graduate School)] 0.377 0.330 0.427
# Being religious consistently increases the probability of voting for Trump across all education levels, with the effect ranging from about 20.8 to 26.2 percentage points.
# High School graduates show the highest Truamp voting probabilities for both religious and non-religious individuals.
# Those with Graduate School education show the lowest Trump voting probabilities across both religiosity categories.
# The impact of religiosity on Trump voting probability appears to be slightly smaller for those with Graduate School education (20.8 percentage points) compared to other education levels.