packages <- c("tidyr", "tidyverse", "ggplot2", "dplyr", "modelsummary", "forcats", "RColorBrewer", "kableExtra",
              "fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable") 

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)


lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## [[1]]
## [1] "tidyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [11] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [11] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "tidyr"        "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "tidyr"        "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[10]]
##  [1] "viridis"      "viridisLite"  "fst"          "kableExtra"   "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "kableExtra"  
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "tidyr"        "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[12]]
##  [1] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "tidyr"        "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[14]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[15]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "kableExtra"   "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tibble"       "ggplot2"      "tidyverse"    "tidyr"       
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[16]]
##  [1] "flextable"    "questionr"    "ggridges"     "rmarkdown"    "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "kableExtra"   "RColorBrewer"
## [11] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"
ess <- read_fst("/Users/jocelyn/Desktop/SOC252/Tutorial 1/All-ESS-Data.fst")
table(ess$essround)
## 
##     1     2     3     4     5     6     7     8     9    10 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  ess$year[ess$essround == i] <- replacements[i]
}
table(ess$year)
## 
##  2002  2004  2006  2008  2010  2012  2014  2016  2018  2020 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
uk_data <- ess[ess$cntry == "GB", ]
uk_data_clean <- uk_data %>%
  select(agea, hinctnta, polintr, fairelcc, clsprty, gndr, vote, essround, year)  %>%
  mutate(
     polintr = ifelse(polintr %in% c(7, 8, 9), NA, polintr),
     fairelcc = ifelse(fairelcc %in% c(77, 88, 99), NA, fairelcc),
     gndr = ifelse(gndr %in% c(9), NA, gndr),
     clsprty = ifelse(clsprty %in% c(7, 8, 9), NA, clsprty),
     hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
     vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3, 7, 8, 9), NA, vote))
  ) %>%
filter(agea > 18 & agea < 90)
  
table(uk_data$vote)
## 
##     1     2     3     7     8 
## 14576  5281  1058     9    55
table(uk_data$vote)
## 
##     1     2     3     7     8 
## 14576  5281  1058     9    55
table(uk_data_clean$vote)
## 
##     0     1 
##  5088 14330
table(uk_data_clean$vote_binary)
## < table of extent 0 >
table(uk_data_clean$vote_yes)
## < table of extent 0 >
vote_by_year <- uk_data_clean %>%
  group_by(year) %>%
 summarize(mean_vote = mean(vote, na.rm = TRUE))
vote_by_year
## # A tibble: 10 × 2
##     year mean_vote
##    <dbl>     <dbl>
##  1  2002     0.731
##  2  2004     0.695
##  3  2006     0.730
##  4  2008     0.715
##  5  2010     0.723
##  6  2012     0.722
##  7  2014     0.714
##  8  2016     0.774
##  9  2018     0.802
## 10  2020     0.809
ggplot(vote_by_year, aes(x = year, y = mean_vote)) +
  geom_line(color = "blue", size = 1) +  # Line to show the trend
  geom_point(color = "red", size = 3) +  # Points to highlight each year's value
  labs(title = "Likelihood to Vote in the UK (2002-2020)", 
       x = "Year", 
       y = "Mean Vote (No-Yes scale)") +
  ylim(0, 1) + 
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ess$vote[ess$vote %in% c(3, 7, 8, 9)] <- NA
ess_data_clean <- ess %>%
  select(vote, essround, year)  %>%
  mutate(
     vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3, 7, 8, 9), NA, vote))
  )

avg_vote_uk <- uk_data_clean %>%
  group_by(year) %>%
  summarize(mean_vote_uk = mean(vote, na.rm = TRUE))
avg_vote_all_countries <- ess_data_clean %>%
  group_by(year) %>%
  summarize(mean_vote_all = mean(vote, na.rm = TRUE))
avg_vote_all_countries
## # A tibble: 10 × 2
##     year mean_vote_all
##    <dbl>         <dbl>
##  1  2002         0.802
##  2  2004         0.770
##  3  2006         0.779
##  4  2008         0.776
##  5  2010         0.763
##  6  2012         0.760
##  7  2014         0.766
##  8  2016         0.766
##  9  2018         0.785
## 10  2020         0.817
combined_data <- left_join(avg_vote_uk, avg_vote_all_countries, by = "year")
long_data <- combined_data %>%
  pivot_longer(
    cols = starts_with("mean_vote"), 
    names_to = "Vote_Type",  
    values_to = "Mean_Vote"  
  )

ggplot(long_data) +
  geom_line(aes(x = year, y = Mean_Vote, color = Vote_Type), size = 1) +
  geom_point(aes(x = year, y = Mean_Vote, color = Vote_Type), size = 3) +
  labs(title = "Likelihood to Vote in the UK vs Europe (2002-2020)", 
       x = "Year", 
       y = "Mean Vote (No-Yes scale)") +
  ylim(0, 1) +
  scale_color_manual(values = c("blue", "red"), 
                     name = "Dataset", 
                     labels = c("UK", "All ESS"), 
                     breaks = c("mean_vote_uk", "mean_vote_all")) + 
  theme_minimal()

datasummary_skim(uk_data_clean)
Unique (#) Missing (%) Mean SD Min Median Max
agea 71 0 51.5 17.8 19.0 51.0 89.0
hinctnta 11 41 5.1 3.0 1.0 5.0 10.0
polintr 5 0 2.5 0.9 1.0 2.0 4.0
fairelcc 12 84 8.2 2.1 0.0 9.0 10.0
clsprty 3 1 1.5 0.5 1.0 1.0 2.0
gndr 2 0 1.6 0.5 1.0 2.0 2.0
vote 3 3 0.7 0.4 0.0 1.0 1.0
essround 10 0 5.3 2.7 1.0 5.0 10.0
year 10 0 2010.6 5.4 2002.0 2010.0 2020.0
#Column Percentages for clsprty and Vote
table(uk_data_clean$clsprty, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   1      28.3  60.3  51.9
##   2      71.7  39.7  48.1
##   Total 100.0 100.0 100.0
#Column Percentages for polintr and Vote
table(uk_data_clean$polintr, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   1       5.7  16.6  13.7
##   2      25.8  47.8  42.0
##   3      33.2  24.3  26.6
##   4      35.3  11.3  17.6
##   Total 100.0 100.0 100.0
#Column Percentages for gndr and Vote
table(uk_data_clean$gndr, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   1      44.3  45.0  44.8
##   2      55.7  55.0  55.2
##   Total 100.0 100.0 100.0
#Column Percentages for hinctnta and Vote
table(uk_data_clean$hinctnta, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   1      20.8  11.6  13.9
##   2      14.3  11.9  12.5
##   3      12.6  10.2  10.8
##   4       8.5   8.9   8.8
##   5       8.5   8.7   8.6
##   6       7.5   9.2   8.8
##   7       8.1   9.9   9.5
##   8       7.1   9.7   9.1
##   9       6.2   8.8   8.2
##   10      6.4  11.0   9.8
##   Total 100.0 100.0 100.0
#Column Percentages for fairelcc and Vote
table(uk_data_clean$fairelcc, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   0       1.9   0.6   0.9
##   1       0.3   0.2   0.2
##   2       1.8   0.7   1.0
##   3       1.6   1.1   1.2
##   4       3.2   1.8   2.2
##   5      14.9   5.6   7.8
##   6       6.9   4.8   5.3
##   7      10.8   9.0   9.4
##   8      19.2  18.8  18.9
##   9      13.1  16.2  15.4
##   10     26.4  41.2  37.7
##   Total 100.0 100.0 100.0
#Column Percentages for agea and Vote
table(uk_data_clean$agea, uk_data_clean$vote) %>%
  cprop()
##        
##         0     1     All  
##   19      1.4   0.2   0.5
##   20      1.9   0.4   0.8
##   21      2.1   0.5   0.9
##   22      2.0   0.5   0.9
##   23      2.0   0.6   1.0
##   24      2.5   0.8   1.2
##   25      2.3   0.7   1.1
##   26      2.5   0.9   1.3
##   27      2.5   0.7   1.2
##   28      2.4   0.9   1.3
##   29      2.3   1.1   1.4
##   30      2.8   1.3   1.7
##   31      2.7   1.2   1.6
##   32      2.4   1.4   1.6
##   33      2.6   1.3   1.7
##   34      2.1   1.4   1.6
##   35      2.5   1.6   1.8
##   36      2.7   1.7   2.0
##   37      2.2   1.6   1.8
##   38      2.4   1.5   1.7
##   39      2.3   1.5   1.7
##   40      1.8   1.9   1.9
##   41      2.2   1.8   1.9
##   42      1.7   1.7   1.7
##   43      1.5   1.6   1.6
##   44      1.7   1.8   1.8
##   45      1.7   1.7   1.7
##   46      1.5   1.6   1.6
##   47      1.7   1.7   1.7
##   48      1.8   1.8   1.8
##   49      1.7   1.8   1.8
##   50      1.7   1.6   1.7
##   51      1.4   1.8   1.7
##   52      1.5   1.7   1.7
##   53      1.5   1.6   1.6
##   54      1.7   1.8   1.8
##   55      1.5   1.7   1.6
##   56      1.3   2.0   1.8
##   57      1.4   1.9   1.8
##   58      0.9   1.9   1.6
##   59      1.1   2.0   1.8
##   60      1.1   2.0   1.8
##   61      1.3   2.0   1.8
##   62      1.1   1.9   1.7
##   63      1.0   1.8   1.6
##   64      1.0   2.1   1.8
##   65      0.9   1.9   1.6
##   66      0.9   2.0   1.7
##   67      1.1   2.0   1.8
##   68      1.1   1.8   1.6
##   69      0.8   2.0   1.7
##   70      0.7   2.0   1.6
##   71      0.8   1.7   1.5
##   72      0.7   1.8   1.5
##   73      0.6   1.6   1.4
##   74      0.6   1.6   1.3
##   75      0.6   1.6   1.4
##   76      0.6   1.4   1.1
##   77      0.7   1.4   1.3
##   78      0.6   1.2   1.1
##   79      0.3   1.1   0.9
##   80      0.5   1.1   0.9
##   81      0.5   1.1   1.0
##   82      0.5   1.1   0.9
##   83      0.4   1.0   0.9
##   84      0.3   0.7   0.6
##   85      0.3   0.6   0.5
##   86      0.3   0.6   0.5
##   87      0.3   0.5   0.4
##   88      0.2   0.4   0.3
##   89      0.1   0.4   0.3
##   Total 100.0 100.0 100.0
model1 <- table(uk_data_clean$fairelcc, uk_data_clean$vote) %>%
  cprop()
  
kbl(model1, caption = "Conditional Probabilities of Voting and Perception of Fair Elections", booktabs = T) %>%
  add_header_above(c("How Fair Elections Are (0=not fair, 10=very fair)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Perception of Fair Elections
How Fair Elections Are (0=not fair, 10=very fair)
Did not Vote
Voted
0 1 All
0 1.8918919 0.6284039 0.9274065
1 0.2702703 0.1675744 0.1918772
2 1.7567568 0.7121910 0.9593860
3 1.6216216 1.1311269 1.2472018
4 3.2432432 1.8433180 2.1746083
5 14.8648649 5.5718475 7.7710265
6 6.8918919 4.8177629 5.3086025
7 10.8108108 9.0071219 9.4339623
8 19.1891892 18.7683284 18.8679245
9 13.1081081 16.1709258 15.4461145
10 26.3513514 41.1813992 37.6718900
Total 100.0000000 100.0000000 100.0000000
model2 <- table(uk_data_clean$polintr, uk_data_clean$vote) %>%
  cprop()
  
kbl(model2, caption = "Conditional Probabilities of Voting and Political Interest", booktabs = T) %>%
  add_header_above(c("Political Interest (1=very interested, 4=not interested)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Political Interest
Political Interest (1=very interested, 4=not interested)
Did not Vote
Voted
0 1 All
1 5.724966 16.58291 13.73963
2 25.791855 47.81547 42.04832
3 33.169388 24.30905 26.62923
4 35.313791 11.29257 17.58281
Total 100.000000 100.00000 100.00000
model3 <- table(uk_data_clean$hinctnta, uk_data_clean$vote) %>%
  cprop()
  
kbl(model3, caption = "Conditional Probabilities of Voting and Total Household Income", booktabs = T) %>%
  add_header_above(c("Total Household Income (1=lowest decile, 10=highest decile)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Total Household Income
Total Household Income (1=lowest decile, 10=highest decile)
Did not Vote
Voted
0 1 All
1 20.771930 11.595042 13.872681
2 14.315789 11.861462 12.470609
3 12.596491 10.216611 10.807280
4 8.526316 8.930847 8.830445
5 8.491228 8.699178 8.647566
6 7.508772 9.232017 8.804319
7 8.105263 9.938608 9.483584
8 7.122807 9.730105 9.082992
9 6.210526 8.838179 8.186014
10 6.350877 10.957952 9.814508
Total 100.000000 100.000000 100.000000
model4 <- table(uk_data_clean$gndr, uk_data_clean$vote) %>%
  cprop()
  
kbl(model4, caption = "Conditional Probabilities of Voting and Gender", booktabs = T) %>%
  add_header_above(c("Gender (1=male, 2=female)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Gender
Gender (1=male, 2=female)
Did not Vote
Voted
0 1 All
1 44.28066 45.0314 44.83469
2 55.71934 54.9686 55.16531
Total 100.00000 100.0000 100.00000
model5 <- table(uk_data_clean$agea, uk_data_clean$vote) %>%
  cprop()
  
kbl(model5, caption = "Conditional Probabilities of Voting and Age", booktabs = T) %>%
  add_header_above(c("Age (Years)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Age
Age (Years)
Did not Vote
Voted
0 1 All
19 1.3954403 0.2233077 0.5304357
20 1.9261006 0.4117237 0.8085282
21 2.0636792 0.5163992 0.9218251
22 2.0440252 0.5163992 0.9166752
23 1.9654088 0.6489881 0.9939232
24 2.4960692 0.7745987 1.2256669
25 2.2798742 0.7466853 1.1484190
26 2.5157233 0.8862526 1.3132145
27 2.5157233 0.7327285 1.1999176
28 2.3781447 0.9490579 1.3235143
29 2.3388365 1.1304955 1.4471109
30 2.7712264 1.2700628 1.6634051
31 2.6533019 1.1723657 1.5604079
32 2.3977987 1.3607816 1.6325059
33 2.5550314 1.3398465 1.6582552
34 2.1422956 1.3538032 1.5604079
35 2.4960692 1.5701326 1.8127511
36 2.6533019 1.7166783 1.9620970
37 2.2405660 1.6259595 1.7870018
38 2.4371069 1.4584787 1.7149037
39 2.2995283 1.5491975 1.7458029
40 1.8474843 1.9469644 1.9208981
41 2.1619497 1.7655269 1.8693995
42 1.7492138 1.7236567 1.7303533
43 1.5133648 1.6050244 1.5810073
44 1.7099057 1.8492673 1.8127511
45 1.7295597 1.7027216 1.7097538
46 1.5133648 1.6468946 1.6119065
47 1.7492138 1.6678297 1.6891544
48 1.7688679 1.7864620 1.7818519
49 1.7099057 1.7864620 1.7664023
50 1.7492138 1.6259595 1.6582552
51 1.3757862 1.8213538 1.7046040
52 1.5133648 1.7445918 1.6840045
53 1.4544025 1.6120028 1.5707076
54 1.7099057 1.7864620 1.7664023
55 1.4544025 1.7027216 1.6376558
56 1.2971698 1.9888346 1.8076012
57 1.3954403 1.9469644 1.8024513
58 0.9433962 1.8562456 1.6170563
59 1.1202830 1.9748779 1.7509527
60 1.0809748 2.0097697 1.7664023
61 1.3364780 2.0097697 1.8333505
62 1.1202830 1.9260293 1.7149037
63 1.0416667 1.8353105 1.6273561
64 1.0416667 2.0865318 1.8127511
65 0.8647799 1.8771807 1.6119065
66 0.8844340 2.0376832 1.7355031
67 1.0613208 2.0237264 1.7715522
68 1.1006289 1.7585485 1.5861572
69 0.7861635 2.0097697 1.6891544
70 0.7468553 1.9678995 1.6479555
71 0.8451258 1.7096999 1.4831600
72 0.6682390 1.7515701 1.4677104
73 0.5699686 1.6399163 1.3595633
74 0.6485849 1.5910677 1.3441137
75 0.6485849 1.6189812 1.3647132
76 0.5699686 1.3538032 1.1484190
77 0.7272013 1.4375436 1.2514162
78 0.5699686 1.2421493 1.0660212
79 0.3144654 1.1235171 0.9115254
80 0.4716981 1.1165387 0.9475744
81 0.5306604 1.1025820 0.9527243
82 0.5306604 1.0746685 0.9321248
83 0.4323899 1.0258200 0.8703265
84 0.2948113 0.7327285 0.6179833
85 0.2555031 0.6140963 0.5201360
86 0.2555031 0.5722261 0.4892368
87 0.2948113 0.4954641 0.4428880
88 0.1572327 0.3838102 0.3244412
89 0.1179245 0.3768318 0.3089917
Total 100.0000000 100.0000000 100.0000000
model6 <- table(uk_data_clean$clsprty, uk_data_clean$vote) %>%
  cprop()
  
kbl(model6, caption = "Conditional Probabilities of Voting and Closeness to a Political Party", booktabs = T) %>%
  add_header_above(c("Closeness to Party (1=close, 2=not close)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))
Conditional Probabilities of Voting and Closeness to a Political Party
Closeness to Party (1=close, 2=not close)
Did not Vote
Voted
0 1 All
1 28.28882 60.34422 51.94401
2 71.71118 39.65578 48.05599
Total 100.00000 100.00000 100.00000
#Mean and Standard Deviation for Vote by Gender  
summary_gender_vote <- uk_data_clean %>% 
  group_by(gndr) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            standard_deviation = sd(vote, na.rm = TRUE))
summary_gender_vote 
## # A tibble: 2 × 3
##    gndr  mean standard_deviation
##   <dbl> <dbl>              <dbl>
## 1     1 0.741              0.438
## 2     2 0.735              0.441
#Mean and Standard Deviation for Vote by Income
summary_income_vote <- uk_data_clean %>% 
  group_by(hinctnta) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_income_vote
## # A tibble: 11 × 3
##    hinctnta  mean std_dev
##       <dbl> <dbl>   <dbl>
##  1        1 0.628   0.483
##  2        2 0.715   0.452
##  3        3 0.711   0.454
##  4        4 0.760   0.427
##  5        5 0.756   0.430
##  6        6 0.788   0.409
##  7        7 0.788   0.409
##  8        8 0.805   0.396
##  9        9 0.812   0.391
## 10       10 0.839   0.367
## 11       NA 0.718   0.450
#Mean and Standard Deviation for Vote by Fair Election  
summary_fair_election_vote <- uk_data_clean %>% 
  group_by(fairelcc) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_fair_election_vote
## # A tibble: 12 × 3
##    fairelcc  mean std_dev
##       <dbl> <dbl>   <dbl>
##  1        0 0.517   0.509
##  2        1 0.667   0.516
##  3        2 0.567   0.504
##  4        3 0.692   0.468
##  5        4 0.647   0.481
##  6        5 0.547   0.499
##  7        6 0.693   0.463
##  8        7 0.729   0.445
##  9        8 0.759   0.428
## 10        9 0.799   0.401
## 11       10 0.834   0.372
## 12       NA 0.733   0.442
#Mean and Standard Deviation for Vote by Age 
summary_age_vote <- uk_data_clean %>% 
  group_by(agea) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_age_vote
## # A tibble: 71 × 3
##     agea  mean std_dev
##    <dbl> <dbl>   <dbl>
##  1    19 0.311   0.465
##  2    20 0.376   0.486
##  3    21 0.413   0.494
##  4    22 0.416   0.494
##  5    23 0.482   0.501
##  6    24 0.466   0.500
##  7    25 0.480   0.501
##  8    26 0.498   0.501
##  9    27 0.451   0.499
## 10    28 0.529   0.500
## # ℹ 61 more rows
#Mean and Standard Deviation for Vote by Political Interest  
summary_political_interest_vote <- uk_data_clean %>% 
  group_by(polintr) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_political_interest_vote
## # A tibble: 5 × 3
##   polintr  mean std_dev
##     <dbl> <dbl>   <dbl>
## 1       1 0.891   0.312
## 2       2 0.839   0.367
## 3       3 0.674   0.469
## 4       4 0.474   0.499
## 5      NA 0.286   0.488
#Mean and Standard Deviation for Vote by Closeness to Party   
summary_close_to_party_vote <- uk_data_clean %>% 
  group_by(clsprty) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_close_to_party_vote
## # A tibble: 3 × 3
##   clsprty  mean std_dev
##     <dbl> <dbl>   <dbl>
## 1       1 0.857   0.350
## 2       2 0.609   0.488
## 3      NA 0.742   0.439
vote_by_age <- uk_data_clean %>%
  group_by(agea) %>%
 summarize(mean_vote = mean(vote, na.rm = TRUE))
vote_by_age
## # A tibble: 71 × 2
##     agea mean_vote
##    <dbl>     <dbl>
##  1    19     0.311
##  2    20     0.376
##  3    21     0.413
##  4    22     0.416
##  5    23     0.482
##  6    24     0.466
##  7    25     0.480
##  8    26     0.498
##  9    27     0.451
## 10    28     0.529
## # ℹ 61 more rows
ggplot (data = vote_by_age,
        mapping = aes(x = agea,
y = mean_vote,
color = "red")) +
  geom_smooth() +
  labs(title = "Likelihood to Vote by Age", x = "Age", y = "Likelihood to Vote") + 
  ylim (0,1) +
  geom_point()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'