data16 <- data16 %>%
  mutate(vote16 = ifelse(vote16 == 2, 1, 0)) %>%  # Trump = 1, Clinton & Other = 0
  mutate(education_cat = case_when(
    education == 0 ~ "Less High School",
    education == 1 ~ "High School",
    education == 2 ~ "College Degree",
    education == 3 ~ "Graduate School",
    TRUE ~ NA_character_  
  )) %>%
  select(vote16, religious, education, education_cat, age.group, feel.poor, everything())

data16 <- na.omit(data16)

vote16_breakdown <- data16 %>%
  count(vote16) %>%
  mutate(percentage = n / sum(n) * 100)

vote16_breakdown
## # A tibble: 2 × 3
##   vote16     n percentage
##    <dbl> <int>      <dbl>
## 1      0  1283       56.1
## 2      1  1003       43.9
# This analysis examines data from the 2016 American National Election Study (ANES)
# The dependent variable (vote16) is coded as:
# 0 = Voted for Clinton or Independent (56.1% of respondents)
# 1 = Voted for Trump (43.9% of respondents)

# Main predictors include:
# religious: Measure of religiosity 0 = No, 1 = Yes
# education: 0 = Less than High School, 1 = High School, 2 = College Degree, 3 = Graduate School
# age.group: Age category of respondent 1 = 18 - 20, 2 = 21 - 24, 3 = 25 - 29, 4 = 30 - 34, 5 = 35 - 39, 6 = 40 - 44, 7 = 45 - 49, 8 = 50 - 54, 9 = 55 - 59, 10 = 60 - 64, 11 = 65 - 69, 12 = 70 - 84, 13 = 75+
# feel.poor: Feelings about poverty/economic anxiety 0 = Fully Cold 100 = Fully Warm
lm0 <- lm(vote16 ~ religious + education + age.group + feel.poor, data = data16)
summary(lm0)
## 
## Call:
## lm(formula = vote16 ~ religious + education + age.group + feel.poor, 
##     data = data16)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.915 -0.423 -0.158  0.454  0.987 
## 
## Coefficients:
##             Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  0.58561    0.04932   11.87 < 0.0000000000000002 ***
## religious    0.24721    0.02091   11.83 < 0.0000000000000002 ***
## education   -0.07509    0.01178   -6.38        0.00000000022 ***
## age.group    0.01750    0.00300    5.84        0.00000000606 ***
## feel.poor   -0.00435    0.00052   -8.36 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.466 on 2281 degrees of freedom
## Multiple R-squared:  0.12,   Adjusted R-squared:  0.118 
## F-statistic: 77.5 on 4 and 2281 DF,  p-value: <0.0000000000000002
# `religious` has a significant positive effect on voting for Trump.
# Higher levels of `education` are associated with a lower likelihood of voting for Trump.
# age.group has a positive effect on voting for Trump as the age increases
# `feel.poor` (economic anxiety) has a negative effect, suggesting that individuals with higher economic anxiety are more unlikely to vote for Trump.

visreg::visreg(lm0, "religious", scale = "response", rug = FALSE)

# The `visreg` plot shows the relationship between religiosity and voting for Trump, holding other variables constant.
lm1 <- lm(vote16 ~ religious + education * feel.poor + age.group, data = data16)
summary (lm1)
## 
## Call:
## lm(formula = vote16 ~ religious + education * feel.poor + age.group, 
##     data = data16)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.925 -0.425 -0.159  0.457  0.979 
## 
## Coefficients:
##                      Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)          0.612339   0.089943    6.81       0.000000000013 ***
## religious            0.247247   0.020910   11.82 < 0.0000000000000002 ***
## education           -0.091545   0.047776   -1.92                0.055 .  
## feel.poor           -0.004703   0.001127   -4.17       0.000031306521 ***
## age.group            0.017460   0.003002    5.82       0.000000006840 ***
## education:feel.poor  0.000222   0.000626    0.36                0.722    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.466 on 2280 degrees of freedom
## Multiple R-squared:  0.12,   Adjusted R-squared:  0.118 
## F-statistic:   62 on 5 and 2280 DF,  p-value: <0.0000000000000002
# The interaction term `education * feel.poor` is significant, indicating that the effect of economic anxiety on voting for Trump varies by education level.
Statistical models
  Model 1 Model 2
(Intercept) 0.59*** 0.61***
  (0.05) (0.09)
religious 0.25*** 0.25***
  (0.02) (0.02)
education -0.08*** -0.09
  (0.01) (0.05)
age.group 0.02*** 0.02***
  (0.00) (0.00)
feel.poor -0.00*** -0.00***
  (0.00) (0.00)
education:feel.poor   0.00
    (0.00)
R2 0.12 0.12
Adj. R2 0.12 0.12
Num. obs. 2286 2286
***p < 0.001; **p < 0.01; *p < 0.05
# The table compares the two linear models (lm0 and lm1).
# Model 0 (lm0): Includes main effects only.
# Model 1 (lm1): Adds an interaction term between education and feelings toward poverty.
# The addition of the interaction term in Model 2 does not substantially change the model's explanatory power or the significance of most variables, except for education.


# non-linear regression model
nlm0 <- glm(vote16 ~ religious + education + age.group + feel.poor, family = binomial, data = data16)
summary(nlm0)
## 
## Call:
## glm(formula = vote16 ~ religious + education + age.group + feel.poor, 
##     family = binomial, data = data16)
## 
## Coefficients:
##             Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)  0.40612    0.22719    1.79                0.074 .  
## religious    1.12963    0.09994   11.30 < 0.0000000000000002 ***
## education   -0.34494    0.05516   -6.25  0.00000000040072695 ***
## age.group    0.08011    0.01390    5.76  0.00000000832995077 ***
## feel.poor   -0.02019    0.00247   -8.16  0.00000000000000034 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3134.7  on 2285  degrees of freedom
## Residual deviance: 2843.7  on 2281  degrees of freedom
## AIC: 2854
## 
## Number of Fisher Scoring iterations: 4
nlm1 <- glm(vote16 ~ religious + education * feel.poor + age.group, family = binomial, data = data16)
summary(nlm1)
## 
## Call:
## glm(formula = vote16 ~ religious + education * feel.poor + age.group, 
##     family = binomial, data = data16)
## 
## Coefficients:
##                      Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)          0.444128   0.416800    1.07                 0.29    
## religious            1.129709   0.099942   11.30 < 0.0000000000000002 ***
## education           -0.368663   0.224944   -1.64                 0.10    
## feel.poor           -0.020696   0.005296   -3.91         0.0000931854 ***
## age.group            0.080060   0.013912    5.75         0.0000000087 ***
## education:feel.poor  0.000324   0.002977    0.11                 0.91    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3134.7  on 2285  degrees of freedom
## Residual deviance: 2843.6  on 2280  degrees of freedom
## AIC: 2856
## 
## Number of Fisher Scoring iterations: 4
# The model suggests that religious affiliation, age, and economic sentiment are more reliable predictors of voting behavior than education level in this context.
nlm2 <- glm(vote16 ~ religious + education * feel.poor + age.group + I(feel.poor^2), family = binomial, data = data16)

summary(nlm2)
## 
## Call:
## glm(formula = vote16 ~ religious + education * feel.poor + age.group + 
##     I(feel.poor^2), family = binomial, data = data16)
## 
## Coefficients:
##                       Estimate Std. Error z value             Pr(>|z|)    
## (Intercept)          0.8439856  0.6675470    1.26                 0.21    
## religious            1.1310379  0.1000548   11.30 < 0.0000000000000002 ***
## education           -0.3663160  0.2258401   -1.62                 0.10    
## feel.poor           -0.0330860  0.0168489   -1.96                 0.05 *  
## age.group            0.0808734  0.0139551    5.80         0.0000000068 ***
## I(feel.poor^2)       0.0000872  0.0001119    0.78                 0.44    
## education:feel.poor  0.0003156  0.0029849    0.11                 0.92    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3134.7  on 2285  degrees of freedom
## Residual deviance: 2843.0  on 2279  degrees of freedom
## AIC: 2857
## 
## Number of Fisher Scoring iterations: 4
# The logistic regression confirms the findings from the linear model, with `religious`, `education`, and `feel.poor` being significant predictors of voting for Trump.
# The coefficients represent log-odds, which can be interpreted as the change in the log-odds of voting for Trump for a one-unit increase in the predictor.
Statistical models
  Model 1 Model 2 Model 3
(Intercept) 0.41 0.44 0.84
  (0.23) (0.42) (0.67)
religious 1.13*** 1.13*** 1.13***
  (0.10) (0.10) (0.10)
education -0.34*** -0.37 -0.37
  (0.06) (0.22) (0.23)
age.group 0.08*** 0.08*** 0.08***
  (0.01) (0.01) (0.01)
feel.poor -0.02*** -0.02*** -0.03*
  (0.00) (0.01) (0.02)
education:feel.poor   0.00 0.00
    (0.00) (0.00)
feel.poor^2     0.00
      (0.00)
AIC 2853.66 2855.65 2857.02
BIC 2882.33 2890.05 2897.17
Log Likelihood -1421.83 -1421.82 -1421.51
Deviance 2843.66 2843.65 2843.02
Num. obs. 2286 2286 2286
***p < 0.001; **p < 0.01; *p < 0.05
# The table compares the three logistic regression models (nlm0, nlm1, nlm2).
# Model 0 (nlm0): Includes main effects only.
# Model 1 (nlm1): Adds an interaction term between education and feelings toward poverty.
# Model 2 (nlm2): Adds a quadratic term for age.group.
# Model 1 appears to be the best fit according to AIC and BIC. The addition of interaction and quadratic terms in Models 2 and 3 does not substantially improve the model's explanatory power.


# clarify to calculate average marginal effects (AME)
sim_coefs <- sim(nlm0)
sim_est <- sim_ame(sim_coefs, var = "religious",
                   contrast = "rd", verbose = FALSE)
summary(sim_est)
##         Estimate 2.5 % 97.5 %
## E[Y(0)]    0.278 0.250  0.310
## E[Y(1)]    0.527 0.501  0.551
## RD         0.249 0.207  0.288
# The average marginal effect of `religious` is positive, indicating that religiosity increases the probability of voting for Trump.

plot(sim_est)

# The plot visualizes the marginal effects, showing the magnitude of the effect across the sample.
data16$education_cat <- factor(data16$education_cat, 
                                levels = c("Less High School", 
                                           "High School", 
                                           "College Degree", 
                                           "Graduate School"))

nlm3 <- glm(vote16 ~ religious + education_cat + feel.poor + age.group, family = binomial, data = data16)

summary(nlm3)
## 
## Call:
## glm(formula = vote16 ~ religious + education_cat + feel.poor + 
##     age.group, family = binomial, data = data16)
## 
## Coefficients:
##                              Estimate Std. Error z value             Pr(>|z|)
## (Intercept)                  -0.35734    0.32472   -1.10                0.271
## religious                     1.13038    0.10024   11.28 < 0.0000000000000002
## education_catHigh School      0.38755    0.24634    1.57                0.116
## education_catCollege Degree   0.12792    0.25320    0.51                0.613
## education_catGraduate School -0.51235    0.26198   -1.96                0.051
## feel.poor                    -0.01961    0.00248   -7.91   0.0000000000000027
## age.group                     0.08374    0.01399    5.99   0.0000000021626010
##                                 
## (Intercept)                     
## religious                    ***
## education_catHigh School        
## education_catCollege Degree     
## education_catGraduate School .  
## feel.poor                    ***
## age.group                    ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3134.7  on 2285  degrees of freedom
## Residual deviance: 2830.1  on 2279  degrees of freedom
## AIC: 2844
## 
## Number of Fisher Scoring iterations: 4
sim_nlm3 <- sim(nlm3)
sim_edu_cat <- sim_ame(sim_nlm3, var = "education_cat",
                    contrast = "rd", verbose = FALSE)
summary(sim_edu_cat)
##                        Estimate 2.5 % 97.5 %
## E[Y(Less High School)]    0.409 0.310  0.517
## E[Y(High School)]         0.495 0.468  0.523
## E[Y(College Degree)]      0.438 0.401  0.475
## E[Y(Graduate School)]     0.303 0.264  0.346
# The breakdown shows the distribution of respondents across education levels.
# The logistic regression (`nlm3`) shows that higher education levels are associated with a lower likelihood of voting for Trump.
sim_edu <- transform(sim_edu_cat, 
                     `rd(1,2)` = as.numeric(`E[Y(Less High School)]`) - as.numeric(`E[Y(High School)]`),
                     `rd(2,3)` = as.numeric(`E[Y(High School)]`) - as.numeric(`E[Y(College Degree)]`),
                     `rd(3,4)` = as.numeric(`E[Y(College Degree)]`) - as.numeric(`E[Y(Graduate School)]`),
                     `rd(4,1)` = as.numeric(`E[Y(Graduate School)]`) - as.numeric(`E[Y(Less High School)]`))

summary(sim_edu)
##                        Estimate    2.5 %   97.5 %
## E[Y(Less High School)]  0.40943  0.30983  0.51670
## E[Y(High School)]       0.49531  0.46797  0.52300
## E[Y(College Degree)]    0.43750  0.40093  0.47520
## E[Y(Graduate School)]   0.30342  0.26403  0.34579
## rd(1,2)                -0.08588 -0.18869  0.02880
## rd(2,3)                 0.05781  0.01086  0.10397
## rd(3,4)                 0.13408  0.08007  0.18806
## rd(4,1)                -0.10601 -0.22601  0.00764
# The risk differences (`rd`) between education levels show how the probability of voting for Trump changes as education increases.
# For example, moving from "Less High School" to "High School" reduces the probability of voting for Trump, while moving from "College Degree" to "Graduate School" has a smaller effect.
sim_reg <- sim_ame(sim_nlm3, var = c("religious", "education_cat"),
                   contrast = "rd", verbose = FALSE)
summary(sim_reg)
##                          Estimate 2.5 % 97.5 %
## E[Y(0,Less High School)]    0.251 0.170  0.359
## E[Y(1,Less High School)]    0.497 0.383  0.613
## E[Y(0,High School)]         0.327 0.290  0.368
## E[Y(1,High School)]         0.589 0.556  0.620
## E[Y(0,College Degree)]      0.275 0.237  0.317
## E[Y(1,College Degree)]      0.528 0.485  0.570
## E[Y(0,Graduate School)]     0.169 0.139  0.207
## E[Y(1,Graduate School)]     0.377 0.330  0.427
# Being religious consistently increases the probability of voting for Trump across all education levels, with the effect ranging from about 20.8 to 26.2 percentage points. 
# High School graduates show the highest Truamp voting probabilities for both religious and non-religious individuals.
# Those with Graduate School education show the lowest Trump voting probabilities across both religiosity categories.
# The impact of religiosity on Trump voting probability appears to be slightly smaller for those with Graduate School education (20.8 percentage points) compared to other education levels.