Homework_5_Adrien

packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer", 
              "fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", "viridis", "questionr")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [16] "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[6]]
##  [1] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[7]]
##  [1] "knitr"        "viridis"      "viridisLite"  "fst"          "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "kableExtra"   "knitr"        "viridis"      "viridisLite"  "fst"         
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "rmarkdown"    "kableExtra"   "knitr"        "viridis"      "viridisLite" 
##  [6] "fst"          "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[11]]
##  [1] "ggridges"     "rmarkdown"    "kableExtra"   "knitr"        "viridis"     
##  [6] "viridisLite"  "fst"          "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[12]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "kableExtra"   "knitr"       
##  [6] "viridis"      "viridisLite"  "fst"          "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"

setwd("~/SOC202 Documents/tutorial")
getwd()

## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"

ess <- read_fst("All-ESS-Data.fst")

Homework 5 (2.5%) due Oct. 16

Important: Must post link to your markdown on the discussion board (file and “knit” html), along with post detailing your general takeaways from exploring variables of interest, comparing to other countries, and looking into socio-demographics. You must also attach your R markdown file.

In your post, please link to the ESS website for the three variables you considered, as well as highlight the country you wish to focus on and for what main reason. Finally, please attach (or provide a screenshot) the visual that you produced while doing the tasks that is most interesting to you and explain why (just one visual not all).

Important note

You cannot use the same three variables we considered in the tutorial. You can use one of the three, but not all three.

Task 1

Do a data summary table of three variables of interest. Discuss briefly what you note (i.e., add text in your markdown after the Task 1 code).

table(ess$essround)

## 
##     1     2     3     4     5     6     7     8     9    10 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685

ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  ess$year[ess$essround == i] <- replacements[i]
}

finland_data <- ess %>%
  filter(cntry == "FI") %>% 
  mutate(
    vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3,7,8,9), NA, vote)),
    rlgdnafi = ifelse(rlgdnafi %in% c(6666, 7777, 9999), NA, rlgdnafi), 
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),  
  )

finland_data <- finland_data %>%
  mutate(vote = case_when(
    vote == 1 ~ "Yes",
    vote == 0 ~ "No",
    TRUE ~ as.character(vote) 
  ))


finland_data <- finland_data %>%
  mutate(rlgdnafi = case_when(
    rlgdnafi == 1 ~ "Evangelical Lutheran",
    rlgdnafi == 2 ~ "Eastern Orthodox",
    rlgdnafi == 3 ~ "Roman Catholic",
    rlgdnafi == 4 ~ "Pentecostalism",
    rlgdnafi == 5 ~ "Free church",
    rlgdnafi == 6 ~ "Advent church",
    rlgdnafi == 7 ~ "Jehovah's Witness",
    rlgdnafi == 8 ~ "Mormon",
    rlgdnafi == 9 ~ "Jewish",
    rlgdnafi == 10 ~ "Islam",
    rlgdnafi == 11 ~ "Other Protestant denomination",
    rlgdnafi == 12 ~ "Other Christian denomination",
    rlgdnafi == 13 ~ "Eastern religions",
    rlgdnafi == 14 ~ "Other Non-Christian religions",
    TRUE ~ as.character(rlgdnafi) 
  ))

table(finland_data$vote)

## 
##    No   Yes 
##  2995 14727

table(finland_data$rlgdnafi)

## 
##                 Advent church              Eastern Orthodox 
##                            12                           114 
##             Eastern religions          Evangelical Lutheran 
##                            19                          5596 
##                   Free church                         Islam 
##                            26                            60 
##             Jehovah's Witness                        Jewish 
##                            58                             4 
##                        Mormon  Other Christian denomination 
##                             3                            53 
## Other Non-Christian religions Other Protestant denomination 
##                            26                            10 
##                Pentecostalism                Roman Catholic 
##                           104                            22

table(finland_data$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
##  160  202  420  853 1264 2228 2498 4512 4677 1866  347

## The majority of Finland's population feel are fairly satisfied with the state of democracy, have voted in the most recent election, and are overwhelmingly of the Evangelical Lutheran faith.

Task 2

Choose one of the three variables you just summarized in the table. This will be your current main outcome of interest.

Produce a visual that showcases the mean (average) for your outcome of interest by survey year (can be, e.g., point + line plot or ridge plot, depending on your variable). Discuss briefly what you note (i.e., add text in your markdown after the Task 2 code).

table(ess$year)

## 
##  2002  2004  2006  2008  2010  2012  2014  2016  2018  2020 
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685

fistfdem <- finland_data %>%
  group_by(year) %>%
  summarize(mean_stfdem = mean(stfdem, na.rm = TRUE))
fistfdem

## # A tibble: 10 × 2
##     year mean_stfdem
##    <dbl>       <dbl>
##  1  2002        6.35
##  2  2004        6.70
##  3  2006        6.76
##  4  2008        6.52
##  5  2010        6.26
##  6  2012        6.85
##  7  2014        5.91
##  8  2016        6.24
##  9  2018        6.41
## 10  2020        7.33

ggplot(fistfdem, aes(x = year, y = mean_stfdem)) +
  geom_line(color = "blue", size = 1) +  # Line to show the trend
  geom_point(color = "red", size = 3) +  # Points to highlight each year's value
  labs(title = "Satisfaction with Democracy (2002-2020)", 
       x = "Survey Year", 
       y = "Satisfaction (0-10)") +
  ylim(0, 10) +  # Setting the y-axis limits from 0 to 10
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Opinions of Finnish democracy have been steady for the past 18 years with a slight increase nearing 2020.

Task 3

Provide a comparison visual of your outcome of interest with two other countries. You can choose the geom() you prefer. Discuss briefly what you note (i.e., add text in your markdown after the Task 3 code).

ess_selected <- ess %>%
  filter(cntry %in% c("FI", "AL", "GR")) %>%
  mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem))


task3plot <- ggplot(ess_selected, aes(x = reorder(cntry, -stfdem, FUN=median), y = stfdem, fill = cntry)) +
  geom_boxplot() +
  theme_minimal() + 
  theme(legend.position = "none") + 
  labs(title = "Satisfaction with Democracy (Finland, Albania, Greece)", 
       x = "Country", 
       y = "Scale (0-10)")

task3plot

## Warning: Removed 700 rows containing non-finite values (`stat_boxplot()`).

## Finnish democracy has a higher median of satisfaction by the general population relative to Greek democracy, which came in second place, and Albanian democracy, last place. Furthermore, their satisfaction is highly concentrated, thus smaller in length, than the other nations. 

## This could be attributed to standard of living and other institutional advancements in Finland that may be absent in Greece or Albania. This is all speculative.

Task 4

Produce a cross-tab between your outcome of interest and a socio-demographic variable (use datasummary_crosstab). Then, calculate column percentages using cprop(), making sure to pick a second socio-demographic variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 4 code).

finland_data <- finland_data %>%
  mutate(geo = recode(as.character(domicil), 
                      '1' = "Urban", 
                      '2' = "Peri-Urban",
                      '3' = "Rural", 
                      '4' = "Rural", 
                      '5' = "Rural",
                      '7' = NA_character_,
                      '8' = NA_character_,
                      '9' = NA_character_))

table(finland_data$geo)

## 
## Peri-Urban      Rural      Urban 
##       2388      13387       3741

table(finland_data$domicil)

## 
##    1    2    3    4    5    8    9 
## 3741 2388 5811 3709 3867   15    1

stfdemgeo <- datasummary_crosstab(stfdem ~ geo, data = finland_data)

stfdemgeo

stfdem		Peri-Urban	Rural	Urban	All
0	N	17	125	18	160
	% row	10.6	78.1	11.2	100.0
1	N	23	153	26	202
	% row	11.4	75.7	12.9	100.0
2	N	59	296	65	420
	% row	14.0	70.5	15.5	100.0
3	N	92	624	137	853
	% row	10.8	73.2	16.1	100.0
4	N	143	940	179	1264
	% row	11.3	74.4	14.2	100.0
5	N	271	1637	318	2228
	% row	12.2	73.5	14.3	100.0
6	N	283	1746	467	2498
	% row	11.3	69.9	18.7	100.0
7	N	516	3104	891	4512
	% row	11.4	68.8	19.7	100.0
8	N	620	3048	1007	4677
	% row	13.3	65.2	21.5	100.0
9	N	280	1096	489	1866
	% row	15.0	58.7	26.2	100.0
10	N	41	210	94	347
	% row	11.8	60.5	27.1	100.0
All	N	2388	13387	3741	19532
	% row	12.2	68.5	19.2	100.0

## The majority of the rural and peri-urban population are favourable of Finnish democracy (7)
## The majority of the urban population are more favourable of Finnish democracy than the population of other areas (8)

finland_data <- finland_data %>%
  mutate(pdwrk_recode = case_when(
    pdwrk == 1 ~ 'yes',
    pdwrk == 0 ~ 'no', 
  ))

table(finland_data$pdwrk_recode)

## 
##    no   yes 
##  9355 10177

table(finland_data$pdwrk)

## 
##     0     1 
##  9355 10177

stfdempdwrk <- datasummary_crosstab(stfdem ~ pdwrk_recode, data = finland_data)

stfdempdwrk

stfdem		no	yes	All
0	N	106	54	160
	% row	66.2	33.8	100.0
1	N	124	78	202
	% row	61.4	38.6	100.0
2	N	224	196	420
	% row	53.3	46.7	100.0
3	N	474	379	853
	% row	55.6	44.4	100.0
4	N	640	624	1264
	% row	50.6	49.4	100.0
5	N	1138	1090	2228
	% row	51.1	48.9	100.0
6	N	1151	1347	2498
	% row	46.1	53.9	100.0
7	N	2034	2478	4512
	% row	45.1	54.9	100.0
8	N	2046	2631	4677
	% row	43.7	56.3	100.0
9	N	806	1060	1866
	% row	43.2	56.8	100.0
10	N	194	153	347
	% row	55.9	44.1	100.0
All	N	9355	10177	19532
	% row	47.9	52.1	100.0

table(finland_data$stfdem, finland_data$pdwrk_recode) %>%
  cprop()

##        
##         no    yes   All  
##   0       1.2   0.5   0.8
##   1       1.4   0.8   1.1
##   2       2.5   1.9   2.2
##   3       5.3   3.8   4.5
##   4       7.2   6.2   6.6
##   5      12.7  10.8  11.7
##   6      12.9  13.3  13.1
##   7      22.8  24.6  23.7
##   8      22.9  26.1  24.6
##   9       9.0  10.5   9.8
##   10      2.2   1.5   1.8
##   Total 100.0 100.0 100.0

## The category of satisfaction for Finland democracy where there is the highest concentration of people who have been paid in the past 7 days is (8). Similarly, this is the same category with the highest concentration of people for those who haven't been paid in the past 7 days. 

## A higher rate of people who don't satisfaction Finnish democracy are people who haven't been paid in the past 7 days from (0)-(5). This is true for the biggest supporters, or those with the highest satisfaction, of Finnish democracy at (10) as well.

Task 5

Choose one of the two socio-demographic variables you just worked with. Visualize the conditional probability (or column percentages) of your outcome given your selected socio-dem variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 5 code).

df <- finland_data %>%
  filter(!is.na(geo) & !is.na(stfdem))

table(df$geo)

## 
## Peri-Urban      Rural      Urban 
##       2345      12979       3691

finland_clean <- finland_data %>%
  filter(!is.na(geo) & !is.na(stfdem))

finland_probs <- finland_clean %>%
  count(stfdem, geo) %>%
  group_by(geo) %>%
  mutate(prob = n / sum(n))

ggplot(finland_probs, aes(x = as.factor(stfdem), y = prob, color = geo)) +
  geom_point() +
  geom_line(aes(group = geo)) +
  labs(title = "Conditional Probabilities of Satisfaction for Democracy in Finland",
       subtitle = "by Place of Residence",
       x = "Satisfaction (0-10)", 
       y = "Probability") +
  theme_minimal()

## There is a clear left skew in the visualization. Furthermore, there is a large drop off for all 3 area categories with regards to stfdem after (8).

Homework_5_Adrien_Lin

2023-10-05