packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer", 
              "fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", "viridis", "questionr")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[7]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "rmarkdown"    "kableExtra"   "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[11]]
##  [1] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[12]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "kableExtra"   "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"
setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
ess <- read_fst("All-ESS-Data.fst")

Homework 5 (2.5%) due Oct. 16

Important: Must post link to your markdown on the discussion board (file and “knit” html), along with post detailing your general takeaways from exploring variables of interest, comparing to other countries, and looking into socio-demographics. You must also attach your R markdown file.

In your post, please link to the ESS website for the three variables you considered, as well as highlight the country you wish to focus on and for what main reason. Finally, please attach (or provide a screenshot) the visual that you produced while doing the tasks that is most interesting to you and explain why (just one visual not all).

Important note

You cannot use the same three variables we considered in the tutorial. You can use one of the three, but not all three.

Task 1

Do a data summary table of three variables of interest. Discuss briefly what you note (i.e., add text in your markdown after the Task 1 code).

table(ess$essround)
## 
##     1     2     3     4     5     6     7     8     9    10 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  ess$year[ess$essround == i] <- replacements[i]
}
finland_data <- ess %>%
  filter(cntry == "FI") %>% 
  mutate(
    vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3,7,8,9), NA, vote)),
    rlgdnafi = ifelse(rlgdnafi %in% c(6666, 7777, 9999), NA, rlgdnafi), 
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),  
  )

finland_data <- finland_data %>%
  mutate(vote = case_when(
    vote == 1 ~ "Yes",
    vote == 0 ~ "No",
    TRUE ~ as.character(vote) 
  ))


finland_data <- finland_data %>%
  mutate(rlgdnafi = case_when(
    rlgdnafi == 1 ~ "Evangelical Lutheran",
    rlgdnafi == 2 ~ "Eastern Orthodox",
    rlgdnafi == 3 ~ "Roman Catholic",
    rlgdnafi == 4 ~ "Pentecostalism",
    rlgdnafi == 5 ~ "Free church",
    rlgdnafi == 6 ~ "Advent church",
    rlgdnafi == 7 ~ "Jehovah's Witness",
    rlgdnafi == 8 ~ "Mormon",
    rlgdnafi == 9 ~ "Jewish",
    rlgdnafi == 10 ~ "Islam",
    rlgdnafi == 11 ~ "Other Protestant denomination",
    rlgdnafi == 12 ~ "Other Christian denomination",
    rlgdnafi == 13 ~ "Eastern religions",
    rlgdnafi == 14 ~ "Other Non-Christian religions",
    TRUE ~ as.character(rlgdnafi) 
  ))
table(finland_data$vote)
## 
##    No   Yes 
##  2995 14727
table(finland_data$rlgdnafi)
## 
##                 Advent church              Eastern Orthodox 
##                            12                           114 
##             Eastern religions          Evangelical Lutheran 
##                            19                          5596 
##                   Free church                         Islam 
##                            26                            60 
##             Jehovah's Witness                        Jewish 
##                            58                             4 
##                        Mormon  Other Christian denomination 
##                             3                            53 
## Other Non-Christian religions Other Protestant denomination 
##                            26                            10 
##                Pentecostalism                Roman Catholic 
##                           104                            22
table(finland_data$stfdem)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##  160  202  420  853 1264 2228 2498 4512 4677 1866  347
## The majority of Finland's population feel are fairly satisfied with the state of democracy, have voted in the most recent election, and are overwhelmingly of the Evangelical Lutheran faith.

Task 2

Choose one of the three variables you just summarized in the table. This will be your current main outcome of interest.

Produce a visual that showcases the mean (average) for your outcome of interest by survey year (can be, e.g., point + line plot or ridge plot, depending on your variable). Discuss briefly what you note (i.e., add text in your markdown after the Task 2 code).

table(ess$year)
## 
##  2002  2004  2006  2008  2010  2012  2014  2016  2018  2020 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
fistfdem <- finland_data %>%
  group_by(year) %>%
  summarize(mean_stfdem = mean(stfdem, na.rm = TRUE))
fistfdem
## # A tibble: 10 × 2
##     year mean_stfdem
##    <dbl>       <dbl>
##  1  2002        6.35
##  2  2004        6.70
##  3  2006        6.76
##  4  2008        6.52
##  5  2010        6.26
##  6  2012        6.85
##  7  2014        5.91
##  8  2016        6.24
##  9  2018        6.41
## 10  2020        7.33
ggplot(fistfdem, aes(x = year, y = mean_stfdem)) +
  geom_line(color = "blue", size = 1) +  # Line to show the trend
  geom_point(color = "red", size = 3) +  # Points to highlight each year's value
  labs(title = "Satisfaction with Democracy (2002-2020)", 
       x = "Survey Year", 
       y = "Satisfaction (0-10)") +
  ylim(0, 10) +  # Setting the y-axis limits from 0 to 10
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Opinions of Finnish democracy have been steady for the past 18 years with a slight increase nearing 2020. 

Task 3

Provide a comparison visual of your outcome of interest with two other countries. You can choose the geom() you prefer. Discuss briefly what you note (i.e., add text in your markdown after the Task 3 code).

ess_selected <- ess %>%
  filter(cntry %in% c("FI", "AL", "GR")) %>%
  mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem))


task3plot <- ggplot(ess_selected, aes(x = reorder(cntry, -stfdem, FUN=median), y = stfdem, fill = cntry)) +
  geom_boxplot() +
  theme_minimal() + 
  theme(legend.position = "none") + 
  labs(title = "Satisfaction with Democracy (Finland, Albania, Greece)", 
       x = "Country", 
       y = "Scale (0-10)")

task3plot
## Warning: Removed 700 rows containing non-finite values (`stat_boxplot()`).

## Finnish democracy has a higher median of satisfaction by the general population relative to Greek democracy, which came in second place, and Albanian democracy, last place. Furthermore, their satisfaction is highly concentrated, thus smaller in length, than the other nations. 

## This could be attributed to standard of living and other institutional advancements in Finland that may be absent in Greece or Albania. This is all speculative.

Task 4

Produce a cross-tab between your outcome of interest and a socio-demographic variable (use datasummary_crosstab). Then, calculate column percentages using cprop(), making sure to pick a second socio-demographic variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 4 code).

finland_data <- finland_data %>%
  mutate(geo = recode(as.character(domicil), 
                      '1' = "Urban", 
                      '2' = "Peri-Urban",
                      '3' = "Rural", 
                      '4' = "Rural", 
                      '5' = "Rural",
                      '7' = NA_character_,
                      '8' = NA_character_,
                      '9' = NA_character_))

table(finland_data$geo)
## 
## Peri-Urban      Rural      Urban 
##       2388      13387       3741
table(finland_data$domicil)
## 
##    1    2    3    4    5    8    9 
## 3741 2388 5811 3709 3867   15    1
stfdemgeo <- datasummary_crosstab(stfdem ~ geo, data = finland_data)

stfdemgeo
stfdem Peri-Urban Rural Urban All
0 N 17 125 18 160
% row 10.6 78.1 11.2 100.0
1 N 23 153 26 202
% row 11.4 75.7 12.9 100.0
2 N 59 296 65 420
% row 14.0 70.5 15.5 100.0
3 N 92 624 137 853
% row 10.8 73.2 16.1 100.0
4 N 143 940 179 1264
% row 11.3 74.4 14.2 100.0
5 N 271 1637 318 2228
% row 12.2 73.5 14.3 100.0
6 N 283 1746 467 2498
% row 11.3 69.9 18.7 100.0
7 N 516 3104 891 4512
% row 11.4 68.8 19.7 100.0
8 N 620 3048 1007 4677
% row 13.3 65.2 21.5 100.0
9 N 280 1096 489 1866
% row 15.0 58.7 26.2 100.0
10 N 41 210 94 347
% row 11.8 60.5 27.1 100.0
All N 2388 13387 3741 19532
% row 12.2 68.5 19.2 100.0
## The majority of the rural and peri-urban population are favourable of Finnish democracy (7)
## The majority of the urban population are more favourable of Finnish democracy than the population of other areas (8)
finland_data <- finland_data %>%
  mutate(pdwrk_recode = case_when(
    pdwrk == 1 ~ 'yes',
    pdwrk == 0 ~ 'no', 
  ))

table(finland_data$pdwrk_recode)
## 
##    no   yes 
##  9355 10177
table(finland_data$pdwrk)
## 
##     0     1 
##  9355 10177
stfdempdwrk <- datasummary_crosstab(stfdem ~ pdwrk_recode, data = finland_data)

stfdempdwrk
stfdem no yes All
0 N 106 54 160
% row 66.2 33.8 100.0
1 N 124 78 202
% row 61.4 38.6 100.0
2 N 224 196 420
% row 53.3 46.7 100.0
3 N 474 379 853
% row 55.6 44.4 100.0
4 N 640 624 1264
% row 50.6 49.4 100.0
5 N 1138 1090 2228
% row 51.1 48.9 100.0
6 N 1151 1347 2498
% row 46.1 53.9 100.0
7 N 2034 2478 4512
% row 45.1 54.9 100.0
8 N 2046 2631 4677
% row 43.7 56.3 100.0
9 N 806 1060 1866
% row 43.2 56.8 100.0
10 N 194 153 347
% row 55.9 44.1 100.0
All N 9355 10177 19532
% row 47.9 52.1 100.0
table(finland_data$stfdem, finland_data$pdwrk_recode) %>%
  cprop()
##        
##         no    yes   All  
##   0       1.2   0.5   0.8
##   1       1.4   0.8   1.1
##   2       2.5   1.9   2.2
##   3       5.3   3.8   4.5
##   4       7.2   6.2   6.6
##   5      12.7  10.8  11.7
##   6      12.9  13.3  13.1
##   7      22.8  24.6  23.7
##   8      22.9  26.1  24.6
##   9       9.0  10.5   9.8
##   10      2.2   1.5   1.8
##   Total 100.0 100.0 100.0
## The category of satisfaction for Finland democracy where there is the highest concentration of people who have been paid in the past 7 days is (8). Similarly, this is the same category with the highest concentration of people for those who haven't been paid in the past 7 days. 

## A higher rate of people who don't satisfaction Finnish democracy are people who haven't been paid in the past 7 days from (0)-(5). This is true for the biggest supporters, or those with the highest satisfaction, of Finnish democracy at (10) as well.

Task 5

Choose one of the two socio-demographic variables you just worked with. Visualize the conditional probability (or column percentages) of your outcome given your selected socio-dem variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 5 code).

df <- finland_data %>%
  filter(!is.na(geo) & !is.na(stfdem))

table(df$geo)
## 
## Peri-Urban      Rural      Urban 
##       2345      12979       3691
finland_clean <- finland_data %>%
  filter(!is.na(geo) & !is.na(stfdem))

finland_probs <- finland_clean %>%
  count(stfdem, geo) %>%
  group_by(geo) %>%
  mutate(prob = n / sum(n))

ggplot(finland_probs, aes(x = as.factor(stfdem), y = prob, color = geo)) +
  geom_point() +
  geom_line(aes(group = geo)) +
  labs(title = "Conditional Probabilities of Satisfaction for Democracy in Finland",
       subtitle = "by Place of Residence",
       x = "Satisfaction (0-10)", 
       y = "Probability") +
  theme_minimal()

## There is a clear left skew in the visualization. Furthermore, there is a large drop off for all 3 area categories with regards to stfdem after (8).