Introduction

My Research Question

How has millennial’s perception of their vote having an influence on politics effected their voter turnout in the United Kingdom

Prelininary Analysis of Data

Loading ESS Dataset and Various Packages

# Installing and loading Various Packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes") 

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "remotes"      "broom"        "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"

Loading Data

#Loading ESS Dataset
setwd("C:/Users/2expl/Desktop/UToronto/Year 2/SOC202")
ess <- read_fst("All-ESS-Data.fst")

Subsetting and Cleaning Required Data

GB_subset <- ess %>% 
  filter(cntry == "GB") %>%
  mutate(
    psppipla = case_when(
      psppipla %in% c(7, 8, 9) ~ NA_character_,
      psppipla == 1 ~ "Not At All",
      psppipla == 2 ~ "Very Little",
      psppipla == 3 ~ "Some",
      psppipla == 4 ~ "A Lot",
      psppipla == 5 ~ "A Great Deal",
      TRUE ~ as.character(psppipla)
    ),
    
    vote = case_when(
      vote %in% c(3, 7, 8, 9) ~ NA_character_,
      vote == 1 ~ "Yes",
      vote == 2 ~ "No",
      
      TRUE ~ as.character(vote)
      ),
    
    age = ifelse(agea == 999, NA, agea),
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    gen = case_when(
      yrbrn %in% 1900:1945 ~ "1",
      yrbrn %in% 1946:1964 ~ "2",
      yrbrn %in% 1965:1979 ~ "3",
      yrbrn %in% 1980:1996 ~ "4",
      TRUE ~ as.character(cohort)  
    ),
    gen = factor(gen,
                 levels = c("1", "2", "3", "4"),
                 labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  ) %>%
  
  # Selecting only chosen variables 
  select(psppipla, gen, vote) %>%

  # Filtering NAs
  filter(!is.na(psppipla)) %>%
  filter(!is.na(vote)) %>%
  filter(!is.na(gen)) %>%
  
  # Filtering for only Millenials
  filter(gen == "Millennials")

Creating a Data Table

# Replicating the GB_subset dataset but keeping all values as numeric in order to meet the confines of the datasummary_skim output.

GB_df <- ess %>% 
  filter(cntry == "GB") %>%
  mutate(
    psppipla = case_when(
      psppipla %in% c(7, 8, 9) ~ NA_real_,
      TRUE ~ as.numeric(psppipla)
    ),
    
    vote = case_when(
      vote %in% c(3, 7, 8, 9) ~ NA_real_,
      TRUE ~ vote
    ),
    
    # Cohorting and mutating respondent age to isolate selected age group.  
    age = ifelse(agea == 999, NA, agea),
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
    gen = case_when(
      yrbrn %in% 1900:1945 ~ 1,
      yrbrn %in% 1946:1964 ~ 2,
      yrbrn %in% 1965:1979 ~ 3,
      yrbrn %in% 1980:2000 ~ 4,
      TRUE ~ as.numeric(cohort) 
    )
  ) %>%

  # Filtering NAs
  filter(!is.na(psppipla)) %>%
  filter(!is.na(vote)) %>%
  filter(!is.na(gen)) %>%
  
  filter(gen == 4)

# Creating the flextable
GB_table <- datasummary_skim(GB_df  %>% dplyr::select(psppipla, vote, gen), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
GB_table

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

psppipla

5

0

2.3

0.8

1.0

2.0

5.0

vote

2

0

1.4

0.5

1.0

1.0

2.0

gen

1

0

4.0

0.0

4.0

4.0

4.0

# renaming values to aid in readability
GB_df_named <- GB_df %>%
  rename(
    `Political System Allows Influence` = psppipla,
    `Generation (Millennials)` = gen,
    `Did Respondant Vote?` = vote,
  )
  
  GB_tableV2 <- datasummary_skim(GB_df_named %>% dplyr::select(`Political System Allows Influence`, `Generation (Millennials)`, `Did Respondant Vote?`), title = "Table 1. Descriptive statistics of main variables", output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
GB_tableV2
Table 1. Descriptive statistics of main variables

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

Political System Allows Influence

5

0

2.3

0.8

1.0

2.0

5.0

Generation (Millennials)

1

0

4.0

0.0

4.0

4.0

4.0

Did Respondant Vote?

2

0

1.4

0.5

1.0

1.0

2.0

Hypothisis Testing

Getting Null Distruibution

# converting psppipla to a factor 
GB_subset$psppipla <- factor(GB_subset$psppipla)

# Test Stat
test_stat <- GB_subset %>%
  specify(explanatory = psppipla, 
          response = vote) %>%
  hypothesize(null = "independence") %>%
  calculate(stat = "Chisq") 
print(test_stat$stat)
## X-squared 
##  29.37997
# Null Distribution
null_dist <- GB_subset %>%
   specify(vote ~ psppipla) %>%
   hypothesize(null = "independence") %>%
   generate(reps = 1000, type = "permute") %>%
   calculate(stat = "Chisq")

p_val <- null_dist %>% # 
  get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
# Simulating Null Distribution
conf_int <- null_dist%>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_dist %>%
  visualize(data, bins = 10, method = "simulation", dens_color = "black") +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int)
## Warning in min(diff(unique_loc)): no non-missing arguments to min; returning
## Inf

Generating Regression

# filtering additional pridictors to use in analysis
  GB_df <- GB_df %>%
  mutate(
    # Recoding gender
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
    ),
    
# More Predictors... 
GB_df <- GB_df %>%
  # Clean Left-right scale
  mutate(lrscale = ifelse(lrscale %in% c(77, 88, 99), NA, lrscale)) %>%
  # Recode Political Interest
  mutate(polintr = case_when(
    polintr %in% c(1, 2) ~ "Interested",
    polintr %in% c(3, 4) ~ "Not Interested",
    polintr %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(polintr)
  )) %>%
  # Recode work for party
  mutate(wrkprty = case_when(
    wrkprty == 1 ~ "Yes",
    wrkprty == 2 ~ "No",
    wrkprty %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkprty)
  )) %>%
  # Recode worked for civil org/association
  mutate(workcivil = case_when(
    wrkorg == 1 ~ "Yes",
    wrkorg == 2 ~ "No",
    wrkorg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkorg)
  )) %>%
  # Recode participation in protest
  mutate(protest = case_when(
    pbldmn == 1 ~ "Yes",
    pbldmn == 2 ~ "No",
    pbldmn %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(pbldmn)
  ))
)

GB_df$weight <- GB_df$dweight * GB_df$pweight
model1 <- lm(vote ~ psppipla, data = GB_df, weights = weight)
model2 <- lm(vote ~ psppipla + polintr, data = GB_df, weights = weight)
model3 <- lm(vote ~ psppipla + polintr * pbldmn, data = GB_df, weights = weight)

# Displaying Models 
modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  coef_omit = "Intercept")
 (1)   (2)   (3)
psppipla −0.1 (0.0)*** −0.1 (0.0)*** −0.1 (0.0)***
polintrNot Interested 0.3 (0.0)*** 1.3 (0.3)***
pbldmn 0.1 (0.1)+
polintrNot Interested × pbldmn −0.5 (0.2)**
Num.Obs. 1173 1173 929
R2 0.025 0.145 0.154
R2 Adj. 0.024 0.143 0.150
AIC 1734.9 1583.5 1271.8
BIC 1750.1 1603.7 1300.9
Log.Lik. −864.432 −787.735 −629.924
RMSE 0.47 0.45 0.45

Regression Equations

equatiomatic::extract_eq(model1, use_coefs = TRUE)

\[ \operatorname{\widehat{vote}} = 1.56 - 0.09(\operatorname{psppipla}) \]

equatiomatic::extract_eq(model2, use_coefs = TRUE)

\[ \operatorname{\widehat{vote}} = 1.32 - 0.06(\operatorname{psppipla}) + 0.33(\operatorname{polintr}_{\operatorname{Not\ Interested}}) \]

equatiomatic::extract_eq(model3, use_coefs = TRUE)

\[ \operatorname{\widehat{vote}} = 1.15 - 0.06(\operatorname{psppipla}) + 1.32(\operatorname{polintr}_{\operatorname{Not\ Interested}}) + 0.11(\operatorname{pbldmn}) - 0.5(\operatorname{polintr}_{\operatorname{Not\ Interested}} \times \operatorname{pbldmn}) \]

Modeling

ggplot(GB_subset, aes(x = factor(psppipla), fill = vote, group = vote)) +
  geom_bar(position = "dodge", stat = "count") +
  labs(
    title = "Millenial's Perception of Influence on Politics and How That Affects Voting Patterns",
    x = "Political System Allows People to Have an Influence on Politics",
    y = "Count",
    fill = "Voted in Last Election (Yes/No)"
  ) +
  scale_fill_manual(values = c("Yes" = "#0dc813", "No" = "#c82e0d"))