##Set Up
packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer",
"fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", "viridis", "questionr")
setwd("C:\\Users\\helen\\OneDrive\\Desktop\\soc202")
library(fst)
ess <- read_fst("All-ESS-Data.fst")
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore" "fst"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "viridis" "viridisLite" "RColorBrewer" "modelsummary" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore"
## [16] "fst" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "knitr" "viridis" "viridisLite" "RColorBrewer" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "fstcore" "fst" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "kableExtra" "knitr" "viridis" "viridisLite" "RColorBrewer"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "rmarkdown" "kableExtra" "knitr" "viridis" "viridisLite"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[10]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "fstcore" "fst"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[11]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "fstcore" "fst"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[12]]
## [1] "questionr" "ggridges" "rmarkdown" "kableExtra" "knitr"
## [6] "viridis" "viridisLite" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore"
## [21] "fst" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
#TASK 1
table(ess$essround)
##
## 1 2 3 4 5 6 7 8 9 10
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for( i in 1:10){
ess$year[ess$essround == i] <- replacements[i]
}
table(ess$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 3933 3540 7343 13759 17016 52117 43707 87869 130802 76229 51006
## 77 88 99
## 330 2163 741
table(ess$health)
##
## 1 2 3 4 5 7 8 9
## 112145 203129 131573 35374 7435 119 421 359
table(ess$impsafe)
##
## 1 2 3 4 5 6 7 8 9
## 128667 163675 85309 43811 25787 5323 1352 4044 4522
belgium_data <- ess %>%
filter(cntry == "BE") %>%
mutate(
health = ifelse(health %in% c(7, 8, 9), NA, health),
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
impsafe = ifelse(impsafe %in% c(7, 8, 9), NA, impsafe),
)
table(belgium_data$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 50 27 104 194 234 830 999 3503 6521 3402 1565
table(belgium_data$health)
##
## 1 2 3 4 5
## 4033 9262 3338 674 137
table(belgium_data$impsafe)
##
## 1 2 3 4 5 6
## 3205 7074 4025 1849 987 232
datasummary_skim(belgium_data %>% select(happy, health, impsafe))
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | ||
|---|---|---|---|---|---|---|---|---|
| happy | 12 | 0 | 7.7 | 1.5 | 0.0 | 8.0 | 10.0 | |
| health | 6 | 0 | 2.1 | 0.8 | 1.0 | 2.0 | 5.0 | |
| impsafe | 7 | 0 | 2.5 | 1.2 | 1.0 | 2.0 | 6.0 |
In belgium there is generally a good amount of happiness (in happiness the mean is 7.7 and the max is 10) there is also a good amount of health with a mean of 2 meaning generally good. The variable impsafe represents whether or not people think it is important to live in a safe space, which most people responded as like me or somewhat like me (mean of 2.5) meaning that while there is some concern on living in a safe spot, it is not the biggest priority.
impsafe_by_year <- belgium_data %>%
group_by(year) %>%
summarize(impsafe_mean = mean(impsafe, na.rm = TRUE))
impsafe_by_year
## # A tibble: 10 × 2
## year impsafe_mean
## <dbl> <dbl>
## 1 2002 2.43
## 2 2004 2.37
## 3 2006 2.53
## 4 2008 2.51
## 5 2010 2.52
## 6 2012 2.39
## 7 2014 2.47
## 8 2016 2.54
## 9 2018 2.55
## 10 2020 2.56
ggplot(impsafe_by_year, aes(x = year, y = impsafe_mean)) +
geom_line(color = "blue", size = 1) + # Line to show the trend
geom_point(color = "red", size = 3) + # Points to highlight each year's value
labs(title = "Important to live in secure and safe surroundings (2002-2020)",
x = "Survey Year",
y = "importance (0-10)") +
ylim(0, 6) + # Setting the y-axis limits from 0 to 10
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Throughout the years there was little change in the importance of living in a safe space as the line shows only slight increase or decrease throughout the graph.
ess_selected <- ess %>%
filter(cntry %in% c("BE", "GB", "FR")) %>%
mutate(impsafe = ifelse(impsafe %in% c(77, 88, 99), NA, impsafe))
task3plot <- ggplot(ess_selected, aes(x = reorder(cntry, -impsafe, FUN=median), y = impsafe, fill = cntry)) +
geom_boxplot() +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Boxplot comparison for importance of living in a safe enviroment
(Belgium, United Kingdom, France)",
x = "Country",
y = "Scale (0-6)")
task3plot
## Warning: Removed 381 rows containing non-finite values (`stat_boxplot()`).
All countries have the same median, meaning that across all the countries they value living in a safe space equally. However in Belgium there is more outliers towards 6 (not like me). In France their maximum also goes a lot further than the other two countries, meaning that there was a wider range of responses than other countries. For belgium there is more variety in responses leaning towards 1 (Very much like me)
belgium_data <- belgium_data %>%
mutate(geo = recode(as.character(domicil),
'1' = "Urban",
'2' = "Peri-Urban", # or set to Urban | Regardless decision needs to be justified
'3' = "Rural",
'4' = "Rural",
'5' = "Rural",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_))
# check
table(belgium_data$geo)
##
## Peri-Urban Rural Urban
## 1797 13293 2322
table(belgium_data$domicil)
##
## 1 2 3 4 5 7 8 9
## 2322 1797 4197 8185 911 1 5 33
belgium_data <- belgium_data %>%
mutate(born_in_country = recode(brncntr,
`1` = "Yes",
`2` = "No",
`7` = NA_character_,
`8` = NA_character_,
`9` = NA_character_))
table(belgium_data$born_in_country)
##
## No Yes
## 2077 15370
table(belgium_data$geo, belgium_data$born_in_country) %>%
cprop()
##
## No Yes All
## Peri-Urban 10.3 10.3 10.3
## Rural 56.1 79.1 76.4
## Urban 33.5 10.6 13.3
## Total 100.0 100.0 100.0
Majority of the people who live in the rural parts of belgium were born there (80%). Urban areas have a lower percent of people born in the country who live there. An infrence could be made as to why that is due to the fact that urban areas are more popular for those who have immigrated to the country. There is a similar percentage of people who live in peri-urban areas for those born in and outside of the country.
table(belgium_data$impsafe, belgium_data$geo) %>%
cprop() %>%
as.data.frame() %>%
filter(Var1 != "Total",
Var2 != "All") %>%
ggplot(aes(x=Var1, y=Freq, fill=Var2)) +
geom_col(position = "dodge") +
labs(title="Importance of living in a safe enviroment",
y = "Conditional Percentage",
x = "Belief Scale (1 = Very much like me - 6 = Not like me at all)",
fill = "")
Majority of the responses are in 2 which (like me), and most amount of people who responded in this category were from rural areas of the country. Interestingly, the only categories in which people responded the most as were in categories 1 (very much like me), 5 (not like me), and 6 (not like me at all). While overall there is a lower percent of people in urban areas who responded with 6, there is a small minoirty that value living in a safe enviroment less than the rest of the population.