packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer",
"fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", "viridis", "questionr")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[7]]
## [1] "knitr" "viridis" "viridisLite" "fst" "RColorBrewer"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[8]]
## [1] "kableExtra" "knitr" "viridis" "viridisLite" "fst"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "rmarkdown" "kableExtra" "knitr" "viridis" "viridisLite"
## [6] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "fst" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[11]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "fst" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[12]]
## [1] "questionr" "ggridges" "rmarkdown" "kableExtra" "knitr"
## [6] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
ess <- read_fst("All-ESS-Data.fst")
Important: Must post link to your markdown on the discussion board (file and “knit” html), along with post detailing your general takeaways from exploring variables of interest, comparing to other countries, and looking into socio-demographics. You must also attach your R markdown file.
In your post, please link to the ESS website for the three variables you considered, as well as highlight the country you wish to focus on and for what main reason. Finally, please attach (or provide a screenshot) the visual that you produced while doing the tasks that is most interesting to you and explain why (just one visual not all).
You cannot use the same three variables we considered in the tutorial. You can use one of the three, but not all three.
Do a data summary table of three variables of interest. Discuss briefly what you note (i.e., add text in your markdown after the Task 1 code).
table(ess$essround)
##
## 1 2 3 4 5 6 7 8 9 10
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
ess$year[ess$essround == i] <- replacements[i]
}
finland_data <- ess %>%
filter(cntry == "FI") %>%
mutate(
vote = ifelse(vote == 2, 0, ifelse(vote %in% c(3,7,8,9), NA, vote)),
rlgdnafi = ifelse(rlgdnafi %in% c(6666, 7777, 9999), NA, rlgdnafi),
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)
finland_data <- finland_data %>%
mutate(vote = case_when(
vote == 1 ~ "Yes",
vote == 0 ~ "No",
TRUE ~ as.character(vote)
))
finland_data <- finland_data %>%
mutate(rlgdnafi = case_when(
rlgdnafi == 1 ~ "Evangelical Lutheran",
rlgdnafi == 2 ~ "Eastern Orthodox",
rlgdnafi == 3 ~ "Roman Catholic",
rlgdnafi == 4 ~ "Pentecostalism",
rlgdnafi == 5 ~ "Free church",
rlgdnafi == 6 ~ "Advent church",
rlgdnafi == 7 ~ "Jehovah's Witness",
rlgdnafi == 8 ~ "Mormon",
rlgdnafi == 9 ~ "Jewish",
rlgdnafi == 10 ~ "Islam",
rlgdnafi == 11 ~ "Other Protestant denomination",
rlgdnafi == 12 ~ "Other Christian denomination",
rlgdnafi == 13 ~ "Eastern religions",
rlgdnafi == 14 ~ "Other Non-Christian religions",
TRUE ~ as.character(rlgdnafi)
))
table(finland_data$vote)
##
## No Yes
## 2995 14727
table(finland_data$rlgdnafi)
##
## Advent church Eastern Orthodox
## 12 114
## Eastern religions Evangelical Lutheran
## 19 5596
## Free church Islam
## 26 60
## Jehovah's Witness Jewish
## 58 4
## Mormon Other Christian denomination
## 3 53
## Other Non-Christian religions Other Protestant denomination
## 26 10
## Pentecostalism Roman Catholic
## 104 22
table(finland_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 160 202 420 853 1264 2228 2498 4512 4677 1866 347
## The majority of Finland's population feel are fairly satisfied with the state of democracy, have voted in the most recent election, and are overwhelmingly of the Evangelical Lutheran faith.
Choose one of the three variables you just summarized in the table. This will be your current main outcome of interest.
Produce a visual that showcases the mean (average) for your outcome of interest by survey year (can be, e.g., point + line plot or ridge plot, depending on your variable). Discuss briefly what you note (i.e., add text in your markdown after the Task 2 code).
table(ess$year)
##
## 2002 2004 2006 2008 2010 2012 2014 2016 2018 2020
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
fistfdem <- finland_data %>%
group_by(year) %>%
summarize(mean_stfdem = mean(stfdem, na.rm = TRUE))
fistfdem
## # A tibble: 10 × 2
## year mean_stfdem
## <dbl> <dbl>
## 1 2002 6.35
## 2 2004 6.70
## 3 2006 6.76
## 4 2008 6.52
## 5 2010 6.26
## 6 2012 6.85
## 7 2014 5.91
## 8 2016 6.24
## 9 2018 6.41
## 10 2020 7.33
ggplot(fistfdem, aes(x = year, y = mean_stfdem)) +
geom_line(color = "blue", size = 1) + # Line to show the trend
geom_point(color = "red", size = 3) + # Points to highlight each year's value
labs(title = "Satisfaction with Democracy (2002-2020)",
x = "Survey Year",
y = "Satisfaction (0-10)") +
ylim(0, 10) + # Setting the y-axis limits from 0 to 10
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Opinions of Finnish democracy have been steady for the past 18 years with a slight increase nearing 2020.
Provide a comparison visual of your outcome of interest with two other countries. You can choose the geom() you prefer. Discuss briefly what you note (i.e., add text in your markdown after the Task 3 code).
ess_selected <- ess %>%
filter(cntry %in% c("FI", "AL", "GR")) %>%
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem))
task3plot <- ggplot(ess_selected, aes(x = reorder(cntry, -stfdem, FUN=median), y = stfdem, fill = cntry)) +
geom_boxplot() +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Satisfaction with Democracy (Finland, Albania, Greece)",
x = "Country",
y = "Scale (0-10)")
task3plot
## Warning: Removed 700 rows containing non-finite values (`stat_boxplot()`).
## Finnish democracy has a higher median of satisfaction by the general population relative to Greek democracy, which came in second place, and Albanian democracy, last place. Furthermore, their satisfaction is highly concentrated, thus smaller in length, than the other nations.
## This could be attributed to standard of living and other institutional advancements in Finland that may be absent in Greece or Albania. This is all speculative.
Produce a cross-tab between your outcome of interest and a socio-demographic variable (use datasummary_crosstab). Then, calculate column percentages using cprop(), making sure to pick a second socio-demographic variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 4 code).
finland_data <- finland_data %>%
mutate(geo = recode(as.character(domicil),
'1' = "Urban",
'2' = "Peri-Urban",
'3' = "Rural",
'4' = "Rural",
'5' = "Rural",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_))
table(finland_data$geo)
##
## Peri-Urban Rural Urban
## 2388 13387 3741
table(finland_data$domicil)
##
## 1 2 3 4 5 8 9
## 3741 2388 5811 3709 3867 15 1
stfdemgeo <- datasummary_crosstab(stfdem ~ geo, data = finland_data)
stfdemgeo
| stfdem | Peri-Urban | Rural | Urban | All | |
|---|---|---|---|---|---|
| 0 | N | 17 | 125 | 18 | 160 |
| % row | 10.6 | 78.1 | 11.2 | 100.0 | |
| 1 | N | 23 | 153 | 26 | 202 |
| % row | 11.4 | 75.7 | 12.9 | 100.0 | |
| 2 | N | 59 | 296 | 65 | 420 |
| % row | 14.0 | 70.5 | 15.5 | 100.0 | |
| 3 | N | 92 | 624 | 137 | 853 |
| % row | 10.8 | 73.2 | 16.1 | 100.0 | |
| 4 | N | 143 | 940 | 179 | 1264 |
| % row | 11.3 | 74.4 | 14.2 | 100.0 | |
| 5 | N | 271 | 1637 | 318 | 2228 |
| % row | 12.2 | 73.5 | 14.3 | 100.0 | |
| 6 | N | 283 | 1746 | 467 | 2498 |
| % row | 11.3 | 69.9 | 18.7 | 100.0 | |
| 7 | N | 516 | 3104 | 891 | 4512 |
| % row | 11.4 | 68.8 | 19.7 | 100.0 | |
| 8 | N | 620 | 3048 | 1007 | 4677 |
| % row | 13.3 | 65.2 | 21.5 | 100.0 | |
| 9 | N | 280 | 1096 | 489 | 1866 |
| % row | 15.0 | 58.7 | 26.2 | 100.0 | |
| 10 | N | 41 | 210 | 94 | 347 |
| % row | 11.8 | 60.5 | 27.1 | 100.0 | |
| All | N | 2388 | 13387 | 3741 | 19532 |
| % row | 12.2 | 68.5 | 19.2 | 100.0 |
## The majority of the rural and peri-urban population are favourable of Finnish democracy (7)
## The majority of the urban population are more favourable of Finnish democracy than the population of other areas (8)
finland_data <- finland_data %>%
mutate(pdwrk_recode = case_when(
pdwrk == 1 ~ 'yes',
pdwrk == 0 ~ 'no',
))
table(finland_data$pdwrk_recode)
##
## no yes
## 9355 10177
table(finland_data$pdwrk)
##
## 0 1
## 9355 10177
stfdempdwrk <- datasummary_crosstab(stfdem ~ pdwrk_recode, data = finland_data)
stfdempdwrk
| stfdem | no | yes | All | |
|---|---|---|---|---|
| 0 | N | 106 | 54 | 160 |
| % row | 66.2 | 33.8 | 100.0 | |
| 1 | N | 124 | 78 | 202 |
| % row | 61.4 | 38.6 | 100.0 | |
| 2 | N | 224 | 196 | 420 |
| % row | 53.3 | 46.7 | 100.0 | |
| 3 | N | 474 | 379 | 853 |
| % row | 55.6 | 44.4 | 100.0 | |
| 4 | N | 640 | 624 | 1264 |
| % row | 50.6 | 49.4 | 100.0 | |
| 5 | N | 1138 | 1090 | 2228 |
| % row | 51.1 | 48.9 | 100.0 | |
| 6 | N | 1151 | 1347 | 2498 |
| % row | 46.1 | 53.9 | 100.0 | |
| 7 | N | 2034 | 2478 | 4512 |
| % row | 45.1 | 54.9 | 100.0 | |
| 8 | N | 2046 | 2631 | 4677 |
| % row | 43.7 | 56.3 | 100.0 | |
| 9 | N | 806 | 1060 | 1866 |
| % row | 43.2 | 56.8 | 100.0 | |
| 10 | N | 194 | 153 | 347 |
| % row | 55.9 | 44.1 | 100.0 | |
| All | N | 9355 | 10177 | 19532 |
| % row | 47.9 | 52.1 | 100.0 |
table(finland_data$stfdem, finland_data$pdwrk_recode) %>%
cprop()
##
## no yes All
## 0 1.2 0.5 0.8
## 1 1.4 0.8 1.1
## 2 2.5 1.9 2.2
## 3 5.3 3.8 4.5
## 4 7.2 6.2 6.6
## 5 12.7 10.8 11.7
## 6 12.9 13.3 13.1
## 7 22.8 24.6 23.7
## 8 22.9 26.1 24.6
## 9 9.0 10.5 9.8
## 10 2.2 1.5 1.8
## Total 100.0 100.0 100.0
## The category of satisfaction for Finland democracy where there is the highest concentration of people who have been paid in the past 7 days is (8). Similarly, this is the same category with the highest concentration of people for those who haven't been paid in the past 7 days.
## A higher rate of people who don't satisfaction Finnish democracy are people who haven't been paid in the past 7 days from (0)-(5). This is true for the biggest supporters, or those with the highest satisfaction, of Finnish democracy at (10) as well.
Choose one of the two socio-demographic variables you just worked with. Visualize the conditional probability (or column percentages) of your outcome given your selected socio-dem variable. Discuss briefly what you note (i.e., add text in your markdown after the Task 5 code).
df <- finland_data %>%
filter(!is.na(geo) & !is.na(stfdem))
table(df$geo)
##
## Peri-Urban Rural Urban
## 2345 12979 3691
finland_clean <- finland_data %>%
filter(!is.na(geo) & !is.na(stfdem))
finland_probs <- finland_clean %>%
count(stfdem, geo) %>%
group_by(geo) %>%
mutate(prob = n / sum(n))
ggplot(finland_probs, aes(x = as.factor(stfdem), y = prob, color = geo)) +
geom_point() +
geom_line(aes(group = geo)) +
labs(title = "Conditional Probabilities of Satisfaction for Democracy in Finland",
subtitle = "by Place of Residence",
x = "Satisfaction (0-10)",
y = "Probability") +
theme_minimal()
## There is a clear left skew in the visualization. Furthermore, there is a large drop off for all 3 area categories with regards to stfdem after (8).