How has millennial’s perception of their vote having an influence on politics effected their voter turnout in the United Kingdom
# Installing and loading Various Packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "broom" "modelsummary" "fst" "infer" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "remotes" "broom" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
#Loading ESS Dataset
setwd("C:/Users/2expl/Desktop/UToronto/Year 2/SOC202")
ess <- read_fst("All-ESS-Data.fst")
GB_subset <- ess %>%
filter(cntry == "GB") %>%
mutate(
psppipla = case_when(
psppipla %in% c(7, 8, 9) ~ NA_character_,
psppipla == 1 ~ "Not At All",
psppipla == 2 ~ "Very Little",
psppipla == 3 ~ "Some",
psppipla == 4 ~ "A Lot",
psppipla == 5 ~ "A Great Deal",
TRUE ~ as.character(psppipla)
),
vote = case_when(
vote %in% c(3, 7, 8, 9) ~ NA_character_,
vote == 1 ~ "Yes",
vote == 2 ~ "No",
TRUE ~ as.character(vote)
),
age = ifelse(agea == 999, NA, agea),
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(cohort)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
) %>%
# Selecting only chosen variables
select(psppipla, gen, vote) %>%
# Filtering NAs
filter(!is.na(psppipla)) %>%
filter(!is.na(vote)) %>%
filter(!is.na(gen)) %>%
# Filtering for only Millenials
filter(gen == "Millennials")
# Replicating the GB_subset dataset but keeping all values as numeric in order to meet the confines of the datasummary_skim output.
GB_df <- ess %>%
filter(cntry == "GB") %>%
mutate(
psppipla = case_when(
psppipla %in% c(7, 8, 9) ~ NA_real_,
TRUE ~ as.numeric(psppipla)
),
vote = case_when(
vote %in% c(3, 7, 8, 9) ~ NA_real_,
TRUE ~ vote
),
# Cohorting and mutating respondent age to isolate selected age group.
age = ifelse(agea == 999, NA, agea),
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen = case_when(
yrbrn %in% 1900:1945 ~ 1,
yrbrn %in% 1946:1964 ~ 2,
yrbrn %in% 1965:1979 ~ 3,
yrbrn %in% 1980:2000 ~ 4,
TRUE ~ as.numeric(cohort)
)
) %>%
# Filtering NAs
filter(!is.na(psppipla)) %>%
filter(!is.na(vote)) %>%
filter(!is.na(gen)) %>%
filter(gen == 4)
# Creating the flextable
GB_table <- datasummary_skim(GB_df %>% dplyr::select(psppipla, vote, gen), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
GB_table
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
psppipla | 5 | 0 | 2.3 | 0.8 | 1.0 | 2.0 | 5.0 |
vote | 2 | 0 | 1.4 | 0.5 | 1.0 | 1.0 | 2.0 |
gen | 1 | 0 | 4.0 | 0.0 | 4.0 | 4.0 | 4.0 |
# renaming values to aid in readability
GB_df_named <- GB_df %>%
rename(
`Political System Allows Influence` = psppipla,
`Generation (Millennials)` = gen,
`Did Respondant Vote?` = vote,
)
GB_tableV2 <- datasummary_skim(GB_df_named %>% dplyr::select(`Political System Allows Influence`, `Generation (Millennials)`, `Did Respondant Vote?`), title = "Table 1. Descriptive statistics of main variables", output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
GB_tableV2
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
Political System Allows Influence | 5 | 0 | 2.3 | 0.8 | 1.0 | 2.0 | 5.0 |
Generation (Millennials) | 1 | 0 | 4.0 | 0.0 | 4.0 | 4.0 | 4.0 |
Did Respondant Vote? | 2 | 0 | 1.4 | 0.5 | 1.0 | 1.0 | 2.0 |
# converting psppipla to a factor
GB_subset$psppipla <- factor(GB_subset$psppipla)
# Test Stat
test_stat <- GB_subset %>%
specify(explanatory = psppipla,
response = vote) %>%
hypothesize(null = "independence") %>%
calculate(stat = "Chisq")
print(test_stat$stat)
## X-squared
## 29.37997
# Null Distribution
null_dist <- GB_subset %>%
specify(vote ~ psppipla) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq")
p_val <- null_dist %>% #
get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
# Simulating Null Distribution
conf_int <- null_dist%>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize(data, bins = 10, method = "simulation", dens_color = "black") +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int)
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf
# filtering additional pridictors to use in analysis
GB_df <- GB_df %>%
mutate(
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
# More Predictors...
GB_df <- GB_df %>%
# Clean Left-right scale
mutate(lrscale = ifelse(lrscale %in% c(77, 88, 99), NA, lrscale)) %>%
# Recode Political Interest
mutate(polintr = case_when(
polintr %in% c(1, 2) ~ "Interested",
polintr %in% c(3, 4) ~ "Not Interested",
polintr %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(polintr)
)) %>%
# Recode work for party
mutate(wrkprty = case_when(
wrkprty == 1 ~ "Yes",
wrkprty == 2 ~ "No",
wrkprty %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(wrkprty)
)) %>%
# Recode worked for civil org/association
mutate(workcivil = case_when(
wrkorg == 1 ~ "Yes",
wrkorg == 2 ~ "No",
wrkorg %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(wrkorg)
)) %>%
# Recode participation in protest
mutate(protest = case_when(
pbldmn == 1 ~ "Yes",
pbldmn == 2 ~ "No",
pbldmn %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(pbldmn)
))
)
GB_df$weight <- GB_df$dweight * GB_df$pweight
model1 <- lm(vote ~ psppipla, data = GB_df, weights = weight)
model2 <- lm(vote ~ psppipla + polintr, data = GB_df, weights = weight)
model3 <- lm(vote ~ psppipla + polintr * pbldmn, data = GB_df, weights = weight)
# Displaying Models
modelsummary(
list(model1, model2, model3),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| (1) | (2) | (3) | |
|---|---|---|---|
| psppipla | −0.1 (0.0)*** | −0.1 (0.0)*** | −0.1 (0.0)*** |
| polintrNot Interested | 0.3 (0.0)*** | 1.3 (0.3)*** | |
| pbldmn | 0.1 (0.1)+ | ||
| polintrNot Interested × pbldmn | −0.5 (0.2)** | ||
| Num.Obs. | 1173 | 1173 | 929 |
| R2 | 0.025 | 0.145 | 0.154 |
| R2 Adj. | 0.024 | 0.143 | 0.150 |
| AIC | 1734.9 | 1583.5 | 1271.8 |
| BIC | 1750.1 | 1603.7 | 1300.9 |
| Log.Lik. | −864.432 | −787.735 | −629.924 |
| RMSE | 0.47 | 0.45 | 0.45 |
equatiomatic::extract_eq(model1, use_coefs = TRUE)
\[ \operatorname{\widehat{vote}} = 1.56 - 0.09(\operatorname{psppipla}) \]
equatiomatic::extract_eq(model2, use_coefs = TRUE)
\[ \operatorname{\widehat{vote}} = 1.32 - 0.06(\operatorname{psppipla}) + 0.33(\operatorname{polintr}_{\operatorname{Not\ Interested}}) \]
equatiomatic::extract_eq(model3, use_coefs = TRUE)
\[ \operatorname{\widehat{vote}} = 1.15 - 0.06(\operatorname{psppipla}) + 1.32(\operatorname{polintr}_{\operatorname{Not\ Interested}}) + 0.11(\operatorname{pbldmn}) - 0.5(\operatorname{polintr}_{\operatorname{Not\ Interested}} \times \operatorname{pbldmn}) \]
ggplot(GB_subset, aes(x = factor(psppipla), fill = vote, group = vote)) +
geom_bar(position = "dodge", stat = "count") +
labs(
title = "Millenial's Perception of Influence on Politics and How That Affects Voting Patterns",
x = "Political System Allows People to Have an Influence on Politics",
y = "Count",
fill = "Voted in Last Election (Yes/No)"
) +
scale_fill_manual(values = c("Yes" = "#0dc813", "No" = "#c82e0d"))