Stepping Stone Two

packages <- c("tidyr", "tidyverse", "ggplot2", "dplyr", "modelsummary", "forcats", "RColorBrewer", "kableExtra",
              "fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable") 

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)


lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.4     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## Loading required package: viridisLite
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose

## [[1]]
## [1] "tidyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tibble"    "ggplot2"   "tidyverse" "tidyr"     "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [11] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [11] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "tidyr"        "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "tidyr"        "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[10]]
##  [1] "viridis"      "viridisLite"  "fst"          "kableExtra"   "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "kableExtra"  
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "tidyr"        "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[12]]
##  [1] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "tidyr"        "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[14]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "kableExtra"   "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "tidyr"        "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[15]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "kableExtra"   "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tibble"       "ggplot2"      "tidyverse"    "tidyr"       
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[16]]
##  [1] "flextable"    "questionr"    "ggridges"     "rmarkdown"    "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "kableExtra"   "RColorBrewer"
## [11] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "tidyr"        "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"

ess <- read_fst("/Users/jocelyn/Desktop/SOC252/Tutorial 1/All-ESS-Data.fst")

table(ess$essround)

## 
##     1     2     3     4     5     6     7     8     9    10 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685

ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  ess$year[ess$essround == i] <- replacements[i]
}

table(ess$year)

## 
##  2002  2004  2006  2008  2010  2012  2014  2016  2018  2020 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685

uk_data <- ess[ess$cntry == "GB", ]

uk_data_clean <- uk_data %>%
  select(agea, hinctnta, polintr, fairelcc, clsprty, gndr, vote, essround, year)  %>%
  mutate(
     polintr = ifelse(polintr %in% c(7, 8, 9), NA, polintr),
     fairelcc = ifelse(fairelcc %in% c(77, 88, 99), NA, fairelcc),
     gndr = ifelse(gndr %in% c(9), NA, gndr),
     clsprty = ifelse(clsprty %in% c(7, 8, 9), NA, clsprty),
     hinctnta = ifelse(hinctnta %in% c(77, 88, 99), NA, hinctnta),
     vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3, 7, 8, 9), NA, vote))
  ) %>%
filter(agea > 18 & agea < 90)
  
table(uk_data$vote)

## 
##     1     2     3     7     8 
## 14576  5281  1058     9    55

table(uk_data$vote)

## 
##     1     2     3     7     8 
## 14576  5281  1058     9    55

table(uk_data_clean$vote)

## 
##     0     1 
##  5088 14330

table(uk_data_clean$vote_binary)

## < table of extent 0 >

table(uk_data_clean$vote_yes)

## < table of extent 0 >

vote_by_year <- uk_data_clean %>%
  group_by(year) %>%
 summarize(mean_vote = mean(vote, na.rm = TRUE))
vote_by_year

## # A tibble: 10 × 2
##     year mean_vote
##    <dbl>     <dbl>
##  1  2002     0.731
##  2  2004     0.695
##  3  2006     0.730
##  4  2008     0.715
##  5  2010     0.723
##  6  2012     0.722
##  7  2014     0.714
##  8  2016     0.774
##  9  2018     0.802
## 10  2020     0.809

ggplot(vote_by_year, aes(x = year, y = mean_vote)) +
  geom_line(color = "blue", size = 1) +  # Line to show the trend
  geom_point(color = "red", size = 3) +  # Points to highlight each year's value
  labs(title = "Likelihood to Vote in the UK (2002-2020)", 
       x = "Year", 
       y = "Mean Vote (No-Yes scale)") +
  ylim(0, 1) + 
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ess$vote[ess$vote %in% c(3, 7, 8, 9)] <- NA
ess_data_clean <- ess %>%
  select(vote, essround, year)  %>%
  mutate(
     vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3, 7, 8, 9), NA, vote))
  )

avg_vote_uk <- uk_data_clean %>%
  group_by(year) %>%
  summarize(mean_vote_uk = mean(vote, na.rm = TRUE))

avg_vote_all_countries <- ess_data_clean %>%
  group_by(year) %>%
  summarize(mean_vote_all = mean(vote, na.rm = TRUE))
avg_vote_all_countries

## # A tibble: 10 × 2
##     year mean_vote_all
##    <dbl>         <dbl>
##  1  2002         0.802
##  2  2004         0.770
##  3  2006         0.779
##  4  2008         0.776
##  5  2010         0.763
##  6  2012         0.760
##  7  2014         0.766
##  8  2016         0.766
##  9  2018         0.785
## 10  2020         0.817

combined_data <- left_join(avg_vote_uk, avg_vote_all_countries, by = "year")
long_data <- combined_data %>%
  pivot_longer(
    cols = starts_with("mean_vote"), 
    names_to = "Vote_Type",  
    values_to = "Mean_Vote"  
  )

ggplot(long_data) +
  geom_line(aes(x = year, y = Mean_Vote, color = Vote_Type), size = 1) +
  geom_point(aes(x = year, y = Mean_Vote, color = Vote_Type), size = 3) +
  labs(title = "Likelihood to Vote in the UK vs Europe (2002-2020)", 
       x = "Year", 
       y = "Mean Vote (No-Yes scale)") +
  ylim(0, 1) +
  scale_color_manual(values = c("blue", "red"), 
                     name = "Dataset", 
                     labels = c("UK", "All ESS"), 
                     breaks = c("mean_vote_uk", "mean_vote_all")) + 
  theme_minimal()

datasummary_skim(uk_data_clean)

	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
agea	71	0	51.5	17.8	19.0	51.0	89.0
hinctnta	11	41	5.1	3.0	1.0	5.0	10.0
polintr	5	0	2.5	0.9	1.0	2.0	4.0
fairelcc	12	84	8.2	2.1	0.0	9.0	10.0
clsprty	3	1	1.5	0.5	1.0	1.0	2.0
gndr	2	0	1.6	0.5	1.0	2.0	2.0
vote	3	3	0.7	0.4	0.0	1.0	1.0
essround	10	0	5.3	2.7	1.0	5.0	10.0
year	10	0	2010.6	5.4	2002.0	2010.0	2020.0

#Column Percentages for clsprty and Vote
table(uk_data_clean$clsprty, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   1      28.3  60.3  51.9
##   2      71.7  39.7  48.1
##   Total 100.0 100.0 100.0

#Column Percentages for polintr and Vote
table(uk_data_clean$polintr, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   1       5.7  16.6  13.7
##   2      25.8  47.8  42.0
##   3      33.2  24.3  26.6
##   4      35.3  11.3  17.6
##   Total 100.0 100.0 100.0

#Column Percentages for gndr and Vote
table(uk_data_clean$gndr, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   1      44.3  45.0  44.8
##   2      55.7  55.0  55.2
##   Total 100.0 100.0 100.0

#Column Percentages for hinctnta and Vote
table(uk_data_clean$hinctnta, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   1      20.8  11.6  13.9
##   2      14.3  11.9  12.5
##   3      12.6  10.2  10.8
##   4       8.5   8.9   8.8
##   5       8.5   8.7   8.6
##   6       7.5   9.2   8.8
##   7       8.1   9.9   9.5
##   8       7.1   9.7   9.1
##   9       6.2   8.8   8.2
##   10      6.4  11.0   9.8
##   Total 100.0 100.0 100.0

#Column Percentages for fairelcc and Vote
table(uk_data_clean$fairelcc, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   0       1.9   0.6   0.9
##   1       0.3   0.2   0.2
##   2       1.8   0.7   1.0
##   3       1.6   1.1   1.2
##   4       3.2   1.8   2.2
##   5      14.9   5.6   7.8
##   6       6.9   4.8   5.3
##   7      10.8   9.0   9.4
##   8      19.2  18.8  18.9
##   9      13.1  16.2  15.4
##   10     26.4  41.2  37.7
##   Total 100.0 100.0 100.0

#Column Percentages for agea and Vote
table(uk_data_clean$agea, uk_data_clean$vote) %>%
  cprop()

##        
##         0     1     All  
##   19      1.4   0.2   0.5
##   20      1.9   0.4   0.8
##   21      2.1   0.5   0.9
##   22      2.0   0.5   0.9
##   23      2.0   0.6   1.0
##   24      2.5   0.8   1.2
##   25      2.3   0.7   1.1
##   26      2.5   0.9   1.3
##   27      2.5   0.7   1.2
##   28      2.4   0.9   1.3
##   29      2.3   1.1   1.4
##   30      2.8   1.3   1.7
##   31      2.7   1.2   1.6
##   32      2.4   1.4   1.6
##   33      2.6   1.3   1.7
##   34      2.1   1.4   1.6
##   35      2.5   1.6   1.8
##   36      2.7   1.7   2.0
##   37      2.2   1.6   1.8
##   38      2.4   1.5   1.7
##   39      2.3   1.5   1.7
##   40      1.8   1.9   1.9
##   41      2.2   1.8   1.9
##   42      1.7   1.7   1.7
##   43      1.5   1.6   1.6
##   44      1.7   1.8   1.8
##   45      1.7   1.7   1.7
##   46      1.5   1.6   1.6
##   47      1.7   1.7   1.7
##   48      1.8   1.8   1.8
##   49      1.7   1.8   1.8
##   50      1.7   1.6   1.7
##   51      1.4   1.8   1.7
##   52      1.5   1.7   1.7
##   53      1.5   1.6   1.6
##   54      1.7   1.8   1.8
##   55      1.5   1.7   1.6
##   56      1.3   2.0   1.8
##   57      1.4   1.9   1.8
##   58      0.9   1.9   1.6
##   59      1.1   2.0   1.8
##   60      1.1   2.0   1.8
##   61      1.3   2.0   1.8
##   62      1.1   1.9   1.7
##   63      1.0   1.8   1.6
##   64      1.0   2.1   1.8
##   65      0.9   1.9   1.6
##   66      0.9   2.0   1.7
##   67      1.1   2.0   1.8
##   68      1.1   1.8   1.6
##   69      0.8   2.0   1.7
##   70      0.7   2.0   1.6
##   71      0.8   1.7   1.5
##   72      0.7   1.8   1.5
##   73      0.6   1.6   1.4
##   74      0.6   1.6   1.3
##   75      0.6   1.6   1.4
##   76      0.6   1.4   1.1
##   77      0.7   1.4   1.3
##   78      0.6   1.2   1.1
##   79      0.3   1.1   0.9
##   80      0.5   1.1   0.9
##   81      0.5   1.1   1.0
##   82      0.5   1.1   0.9
##   83      0.4   1.0   0.9
##   84      0.3   0.7   0.6
##   85      0.3   0.6   0.5
##   86      0.3   0.6   0.5
##   87      0.3   0.5   0.4
##   88      0.2   0.4   0.3
##   89      0.1   0.4   0.3
##   Total 100.0 100.0 100.0

model1 <- table(uk_data_clean$fairelcc, uk_data_clean$vote) %>%
  cprop()
  
kbl(model1, caption = "Conditional Probabilities of Voting and Perception of Fair Elections", booktabs = T) %>%
  add_header_above(c("How Fair Elections Are (0=not fair, 10=very fair)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Perception of Fair Elections
How Fair Elections Are (0=not fair, 10=very fair)	Did not Vote	Voted
	0	1	All
0	1.8918919	0.6284039	0.9274065
1	0.2702703	0.1675744	0.1918772
2	1.7567568	0.7121910	0.9593860
3	1.6216216	1.1311269	1.2472018
4	3.2432432	1.8433180	2.1746083
5	14.8648649	5.5718475	7.7710265
6	6.8918919	4.8177629	5.3086025
7	10.8108108	9.0071219	9.4339623
8	19.1891892	18.7683284	18.8679245
9	13.1081081	16.1709258	15.4461145
10	26.3513514	41.1813992	37.6718900
Total	100.0000000	100.0000000	100.0000000

model2 <- table(uk_data_clean$polintr, uk_data_clean$vote) %>%
  cprop()
  
kbl(model2, caption = "Conditional Probabilities of Voting and Political Interest", booktabs = T) %>%
  add_header_above(c("Political Interest (1=very interested, 4=not interested)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Political Interest
Political Interest (1=very interested, 4=not interested)	Did not Vote	Voted
	0	1	All
1	5.724966	16.58291	13.73963
2	25.791855	47.81547	42.04832
3	33.169388	24.30905	26.62923
4	35.313791	11.29257	17.58281
Total	100.000000	100.00000	100.00000

model3 <- table(uk_data_clean$hinctnta, uk_data_clean$vote) %>%
  cprop()
  
kbl(model3, caption = "Conditional Probabilities of Voting and Total Household Income", booktabs = T) %>%
  add_header_above(c("Total Household Income (1=lowest decile, 10=highest decile)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Total Household Income
Total Household Income (1=lowest decile, 10=highest decile)	Did not Vote	Voted
	0	1	All
1	20.771930	11.595042	13.872681
2	14.315789	11.861462	12.470609
3	12.596491	10.216611	10.807280
4	8.526316	8.930847	8.830445
5	8.491228	8.699178	8.647566
6	7.508772	9.232017	8.804319
7	8.105263	9.938608	9.483584
8	7.122807	9.730105	9.082992
9	6.210526	8.838179	8.186014
10	6.350877	10.957952	9.814508
Total	100.000000	100.000000	100.000000

model4 <- table(uk_data_clean$gndr, uk_data_clean$vote) %>%
  cprop()
  
kbl(model4, caption = "Conditional Probabilities of Voting and Gender", booktabs = T) %>%
  add_header_above(c("Gender (1=male, 2=female)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Gender
Gender (1=male, 2=female)	Did not Vote	Voted
	0	1	All
1	44.28066	45.0314	44.83469
2	55.71934	54.9686	55.16531
Total	100.00000	100.0000	100.00000

model5 <- table(uk_data_clean$agea, uk_data_clean$vote) %>%
  cprop()
  
kbl(model5, caption = "Conditional Probabilities of Voting and Age", booktabs = T) %>%
  add_header_above(c("Age (Years)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Age
Age (Years)	Did not Vote	Voted
	0	1	All
19	1.3954403	0.2233077	0.5304357
20	1.9261006	0.4117237	0.8085282
21	2.0636792	0.5163992	0.9218251
22	2.0440252	0.5163992	0.9166752
23	1.9654088	0.6489881	0.9939232
24	2.4960692	0.7745987	1.2256669
25	2.2798742	0.7466853	1.1484190
26	2.5157233	0.8862526	1.3132145
27	2.5157233	0.7327285	1.1999176
28	2.3781447	0.9490579	1.3235143
29	2.3388365	1.1304955	1.4471109
30	2.7712264	1.2700628	1.6634051
31	2.6533019	1.1723657	1.5604079
32	2.3977987	1.3607816	1.6325059
33	2.5550314	1.3398465	1.6582552
34	2.1422956	1.3538032	1.5604079
35	2.4960692	1.5701326	1.8127511
36	2.6533019	1.7166783	1.9620970
37	2.2405660	1.6259595	1.7870018
38	2.4371069	1.4584787	1.7149037
39	2.2995283	1.5491975	1.7458029
40	1.8474843	1.9469644	1.9208981
41	2.1619497	1.7655269	1.8693995
42	1.7492138	1.7236567	1.7303533
43	1.5133648	1.6050244	1.5810073
44	1.7099057	1.8492673	1.8127511
45	1.7295597	1.7027216	1.7097538
46	1.5133648	1.6468946	1.6119065
47	1.7492138	1.6678297	1.6891544
48	1.7688679	1.7864620	1.7818519
49	1.7099057	1.7864620	1.7664023
50	1.7492138	1.6259595	1.6582552
51	1.3757862	1.8213538	1.7046040
52	1.5133648	1.7445918	1.6840045
53	1.4544025	1.6120028	1.5707076
54	1.7099057	1.7864620	1.7664023
55	1.4544025	1.7027216	1.6376558
56	1.2971698	1.9888346	1.8076012
57	1.3954403	1.9469644	1.8024513
58	0.9433962	1.8562456	1.6170563
59	1.1202830	1.9748779	1.7509527
60	1.0809748	2.0097697	1.7664023
61	1.3364780	2.0097697	1.8333505
62	1.1202830	1.9260293	1.7149037
63	1.0416667	1.8353105	1.6273561
64	1.0416667	2.0865318	1.8127511
65	0.8647799	1.8771807	1.6119065
66	0.8844340	2.0376832	1.7355031
67	1.0613208	2.0237264	1.7715522
68	1.1006289	1.7585485	1.5861572
69	0.7861635	2.0097697	1.6891544
70	0.7468553	1.9678995	1.6479555
71	0.8451258	1.7096999	1.4831600
72	0.6682390	1.7515701	1.4677104
73	0.5699686	1.6399163	1.3595633
74	0.6485849	1.5910677	1.3441137
75	0.6485849	1.6189812	1.3647132
76	0.5699686	1.3538032	1.1484190
77	0.7272013	1.4375436	1.2514162
78	0.5699686	1.2421493	1.0660212
79	0.3144654	1.1235171	0.9115254
80	0.4716981	1.1165387	0.9475744
81	0.5306604	1.1025820	0.9527243
82	0.5306604	1.0746685	0.9321248
83	0.4323899	1.0258200	0.8703265
84	0.2948113	0.7327285	0.6179833
85	0.2555031	0.6140963	0.5201360
86	0.2555031	0.5722261	0.4892368
87	0.2948113	0.4954641	0.4428880
88	0.1572327	0.3838102	0.3244412
89	0.1179245	0.3768318	0.3089917
Total	100.0000000	100.0000000	100.0000000

model6 <- table(uk_data_clean$clsprty, uk_data_clean$vote) %>%
  cprop()
  
kbl(model6, caption = "Conditional Probabilities of Voting and Closeness to a Political Party", booktabs = T) %>%
  add_header_above(c("Closeness to Party (1=close, 2=not close)", "Did not Vote" = 1, "Voted" = 1, " " = 1)) %>%
  kable_styling(latex_options = c("repeat_header"))

Conditional Probabilities of Voting and Closeness to a Political Party
Closeness to Party (1=close, 2=not close)	Did not Vote	Voted
	0	1	All
1	28.28882	60.34422	51.94401
2	71.71118	39.65578	48.05599
Total	100.00000	100.00000	100.00000

#Mean and Standard Deviation for Vote by Gender  
summary_gender_vote <- uk_data_clean %>% 
  group_by(gndr) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            standard_deviation = sd(vote, na.rm = TRUE))
summary_gender_vote

## # A tibble: 2 × 3
##    gndr  mean standard_deviation
##   <dbl> <dbl>              <dbl>
## 1     1 0.741              0.438
## 2     2 0.735              0.441

#Mean and Standard Deviation for Vote by Income
summary_income_vote <- uk_data_clean %>% 
  group_by(hinctnta) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_income_vote

## # A tibble: 11 × 3
##    hinctnta  mean std_dev
##       <dbl> <dbl>   <dbl>
##  1        1 0.628   0.483
##  2        2 0.715   0.452
##  3        3 0.711   0.454
##  4        4 0.760   0.427
##  5        5 0.756   0.430
##  6        6 0.788   0.409
##  7        7 0.788   0.409
##  8        8 0.805   0.396
##  9        9 0.812   0.391
## 10       10 0.839   0.367
## 11       NA 0.718   0.450

#Mean and Standard Deviation for Vote by Fair Election  
summary_fair_election_vote <- uk_data_clean %>% 
  group_by(fairelcc) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_fair_election_vote

## # A tibble: 12 × 3
##    fairelcc  mean std_dev
##       <dbl> <dbl>   <dbl>
##  1        0 0.517   0.509
##  2        1 0.667   0.516
##  3        2 0.567   0.504
##  4        3 0.692   0.468
##  5        4 0.647   0.481
##  6        5 0.547   0.499
##  7        6 0.693   0.463
##  8        7 0.729   0.445
##  9        8 0.759   0.428
## 10        9 0.799   0.401
## 11       10 0.834   0.372
## 12       NA 0.733   0.442

#Mean and Standard Deviation for Vote by Age 
summary_age_vote <- uk_data_clean %>% 
  group_by(agea) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_age_vote

## # A tibble: 71 × 3
##     agea  mean std_dev
##    <dbl> <dbl>   <dbl>
##  1    19 0.311   0.465
##  2    20 0.376   0.486
##  3    21 0.413   0.494
##  4    22 0.416   0.494
##  5    23 0.482   0.501
##  6    24 0.466   0.500
##  7    25 0.480   0.501
##  8    26 0.498   0.501
##  9    27 0.451   0.499
## 10    28 0.529   0.500
## # ℹ 61 more rows

#Mean and Standard Deviation for Vote by Political Interest  
summary_political_interest_vote <- uk_data_clean %>% 
  group_by(polintr) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_political_interest_vote

## # A tibble: 5 × 3
##   polintr  mean std_dev
##     <dbl> <dbl>   <dbl>
## 1       1 0.891   0.312
## 2       2 0.839   0.367
## 3       3 0.674   0.469
## 4       4 0.474   0.499
## 5      NA 0.286   0.488

#Mean and Standard Deviation for Vote by Closeness to Party   
summary_close_to_party_vote <- uk_data_clean %>% 
  group_by(clsprty) %>% 
  summarize(mean = mean(vote, na.rm = TRUE),
            std_dev = sd(vote, na.rm = TRUE)) 
summary_close_to_party_vote

## # A tibble: 3 × 3
##   clsprty  mean std_dev
##     <dbl> <dbl>   <dbl>
## 1       1 0.857   0.350
## 2       2 0.609   0.488
## 3      NA 0.742   0.439

vote_by_age <- uk_data_clean %>%
  group_by(agea) %>%
 summarize(mean_vote = mean(vote, na.rm = TRUE))
vote_by_age

## # A tibble: 71 × 2
##     agea mean_vote
##    <dbl>     <dbl>
##  1    19     0.311
##  2    20     0.376
##  3    21     0.413
##  4    22     0.416
##  5    23     0.482
##  6    24     0.466
##  7    25     0.480
##  8    26     0.498
##  9    27     0.451
## 10    28     0.529
## # ℹ 61 more rows

ggplot (data = vote_by_age,
        mapping = aes(x = agea,
y = mean_vote,
color = "red")) +
  geom_smooth() +
  labs(title = "Likelihood to Vote by Age", x = "Age", y = "Likelihood to Vote") + 
  ylim (0,1) +
  geom_point()

## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Stepping Stone Two

Jocelyn Mattka and Abha Roy Simpson

2023-10-30