packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer",
"fst", "viridis", "knitr", "kableExtra", "rmarkdown", "ggridges", "viridis", "questionr")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [16] "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[7]]
## [1] "knitr" "viridis" "viridisLite" "fst" "RColorBrewer"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[8]]
## [1] "kableExtra" "knitr" "viridis" "viridisLite" "fst"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "rmarkdown" "kableExtra" "knitr" "viridis" "viridisLite"
## [6] "fst" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "fst" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[11]]
## [1] "ggridges" "rmarkdown" "kableExtra" "knitr" "viridis"
## [6] "viridisLite" "fst" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[12]]
## [1] "questionr" "ggridges" "rmarkdown" "kableExtra" "knitr"
## [6] "viridis" "viridisLite" "fst" "RColorBrewer" "modelsummary"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
setwd("~/Desktop/SOC_202_YAY/")
getwd()
## [1] "/Users/apple/Desktop/SOC_202_YAY"
ess <- read_fst("All-ESS-Data.fst")
table(ess$essround)
##
## 1 2 3 4 5 6 7 8 9 10
## 42359 47537 43000 56752 52458 54673 40185 44387 49519 59685
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
ess$year[ess$essround == i] <- replacements[i]
}
table(ess$eisced)
##
## 0 1 2 3 4 5 6 7 55 77 88 99
## 73306 38823 71917 74258 87348 49268 42651 49558 1052 483 497 1394
table(ess$stfeco)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 39574 25671 45330 59142 55182 75639 57084 58459 41634 13226 8012 424 10562
## 99
## 616
table(ess$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 3933 3540 7343 13759 17016 52117 43707 87869 130802 76229 51006
## 77 88 99
## 330 2163 741
uk_data <- ess %>%
filter(cntry == "GB") %>%
mutate(
stfeco = ifelse(stfeco %in% c( 77, 88, 99), NA, stfeco),
eisced = ifelse(eisced %in% c(55,77, 88, 99), NA, eisced),
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
)
datasummary_skim(uk_data %>% select(stfeco, eisced, happy))
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | ||
|---|---|---|---|---|---|---|---|---|
| stfeco | 12 | 2 | 4.4 | 2.3 | 0.0 | 5.0 | 10.0 | |
| eisced | 9 | 2 | 2.2 | 2.4 | 0.0 | 1.0 | 7.0 | |
| happy | 12 | 0 | 7.5 | 1.9 | 0.0 | 8.0 | 10.0 |
As can be seen from this chart, the average happiness index of British people is high. The British public is halfway between satisfied with the current state of the British economy, not very satisfied and not very dissatisfied overall. It can also be seen from the table that the highest average level of education in Britain is 2.2, which is the secondary level, which is somewhat unexpected.
happy_by_year <- uk_data %>%
group_by(year) %>%
summarize(mean_trust = mean(happy, na.rm = TRUE))
happy_by_year
## # A tibble: 10 × 2
## year mean_trust
## <dbl> <dbl>
## 1 2002 7.54
## 2 2004 7.37
## 3 2006 7.43
## 4 2008 7.44
## 5 2010 7.41
## 6 2012 7.50
## 7 2014 7.47
## 8 2016 7.64
## 9 2018 7.57
## 10 2020 7.29
ggplot(happy_by_year, aes(x = year, y = mean_trust)) +
geom_line(color = "blue", size = 1) +
geom_point(color = "red", size = 3) +
labs(title = "Happy Level in UK (2002-2020)",
x = "Survey Year",
y = "Happy (0-10)") +
ylim(0, 10) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
It can be seen that the average happiness of the British people is high, and has remained stable at around 7.5 from 2002 to 2020. The past ten years have been stable, without major fluctuations and changes. But you can see a slight decline in average happiness from around 2017 through 2020.
ess_selected <- ess %>%
filter(cntry %in% c("GB", "DE", "FR")) %>%
mutate(happy = ifelse(happy %in% c(77, 88, 99), NA, happy))
task3plot <- ggplot(ess_selected, aes(x = reorder(cntry, -happy, FUN=median), y = happy, fill = cntry)) +
geom_boxplot() +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Boxplot comparison about happiness in politicians (UK, Germany, France)",
x = "Country",
y = "Scale (0-10)")
task3plot
## Warning: Removed 253 rows containing non-finite values (`stat_boxplot()`).
The chart directly illustrates the differences in various happiness values between German, French and British people. The median happiness score in these countries is similar, at around 7.7. Germany has the lowest minimum value of happiness of the three countries. It is worth noting that the French data are negatively skewed.
uk_data <- uk_data %>%
mutate(
edulvla = case_when(
essround < 5 & edulvla == 55 ~ NA_real_,
TRUE ~ edulvla
),
edulvlb = case_when(
essround >= 5 & edulvlb == 5555 ~ NA_real_,
TRUE ~ edulvlb
),
educ_level = case_when(
essround < 5 & edulvla == 5 ~ "BA",
essround >= 5 & edulvlb > 600 ~ "BA",
TRUE ~ "No BA"
)
)
table(uk_data$educ_level)
##
## BA No BA
## 6116 14863
happyedu <- datasummary_crosstab(happy ~ educ_level, data = uk_data)
happyedu
| happy | BA | No BA | All | |
|---|---|---|---|---|
| 0 | N | 14 | 99 | 113 |
| % row | 12.4 | 87.6 | 100.0 | |
| 1 | N | 23 | 89 | 112 |
| % row | 20.5 | 79.5 | 100.0 | |
| 2 | N | 33 | 207 | 240 |
| % row | 13.8 | 86.2 | 100.0 | |
| 3 | N | 113 | 389 | 502 |
| % row | 22.5 | 77.5 | 100.0 | |
| 4 | N | 153 | 517 | 670 |
| % row | 22.8 | 77.2 | 100.0 | |
| 5 | N | 322 | 1246 | 1568 |
| % row | 20.5 | 79.5 | 100.0 | |
| 6 | N | 448 | 1093 | 1541 |
| % row | 29.1 | 70.9 | 100.0 | |
| 7 | N | 1252 | 2552 | 3804 |
| % row | 32.9 | 67.1 | 100.0 | |
| 8 | N | 1913 | 4186 | 6099 |
| % row | 31.4 | 68.6 | 100.0 | |
| 9 | N | 1282 | 2458 | 3740 |
| % row | 34.3 | 65.7 | 100.0 | |
| 10 | N | 552 | 2005 | 2557 |
| % row | 21.6 | 78.4 | 100.0 | |
| All | N | 6116 | 14863 | 20979 |
| % row | 29.2 | 70.8 | 100.0 |
table(uk_data$happy, uk_data$educ_level) %>%
cprop()
##
## BA No BA All
## 0 0.2 0.7 0.5
## 1 0.4 0.6 0.5
## 2 0.5 1.4 1.1
## 3 1.9 2.6 2.4
## 4 2.5 3.5 3.2
## 5 5.3 8.4 7.5
## 6 7.3 7.4 7.4
## 7 20.5 17.2 18.2
## 8 31.3 28.2 29.1
## 9 21.0 16.6 17.9
## 10 9.0 13.5 12.2
## Total 100.0 100.0 100.0
What you can see here is that the majority of people in the UK are happy with their lives, regardless of whether they have a degree or not. Since there are more than twice as many non-BA holders as there are ba holders, it’s hard to see whether education has any effect on a person’s happiness.
uk_data <- uk_data %>%
mutate(pdwrk_recode = case_when(
pdwrk == 1 ~ 'yes',
pdwrk == 0 ~ 'no',
))
table(uk_data$pdwrk_recode)
##
## no yes
## 10164 10815
table(uk_data$pdwrk)
##
## 0 1
## 10164 10815
happypdwrk <- datasummary_crosstab(happy ~ pdwrk_recode, data = uk_data)
happypdwrk
| happy | no | yes | All | |
|---|---|---|---|---|
| 0 | N | 89 | 24 | 113 |
| % row | 78.8 | 21.2 | 100.0 | |
| 1 | N | 75 | 37 | 112 |
| % row | 67.0 | 33.0 | 100.0 | |
| 2 | N | 149 | 91 | 240 |
| % row | 62.1 | 37.9 | 100.0 | |
| 3 | N | 288 | 214 | 502 |
| % row | 57.4 | 42.6 | 100.0 | |
| 4 | N | 370 | 300 | 670 |
| % row | 55.2 | 44.8 | 100.0 | |
| 5 | N | 866 | 702 | 1568 |
| % row | 55.2 | 44.8 | 100.0 | |
| 6 | N | 733 | 808 | 1541 |
| % row | 47.6 | 52.4 | 100.0 | |
| 7 | N | 1589 | 2215 | 3804 |
| % row | 41.8 | 58.2 | 100.0 | |
| 8 | N | 2749 | 3350 | 6099 |
| % row | 45.1 | 54.9 | 100.0 | |
| 9 | N | 1708 | 2032 | 3740 |
| % row | 45.7 | 54.3 | 100.0 | |
| 10 | N | 1523 | 1034 | 2557 |
| % row | 59.6 | 40.4 | 100.0 | |
| All | N | 10164 | 10815 | 20979 |
| % row | 48.4 | 51.6 | 100.0 |
table(uk_data$happy, uk_data$pdwrk) %>%
cprop()
##
## 0 1 All
## 0 0.9 0.2 0.5
## 1 0.7 0.3 0.5
## 2 1.5 0.8 1.1
## 3 2.8 2.0 2.4
## 4 3.6 2.8 3.2
## 5 8.5 6.5 7.5
## 6 7.2 7.5 7.4
## 7 15.7 20.5 18.2
## 8 27.1 31.0 29.1
## 9 16.8 18.8 17.9
## 10 15.0 9.6 12.2
## Total 100.0 100.0 100.0
Here you can see how having a gainful job affects happiness. More people with unpaid jobs say they are unhappy with their lives than those with paid jobs. In Britain, though, overall happiness is high. But it can be seen that more people with jobs and wages rate their own happiness as higher.
df <- uk_data %>%
filter(!is.na(happy) & !is.na(pdwrk))
df <- df %>%
mutate(pdwrk = case_when(
pdwrk == 1 ~ "Yes",
pdwrk == 0 ~ "No",
TRUE ~ as.character(pdwrk) # or "Other", or NA_character_, depending on your needs
))
table(df$pdwrk)
##
## No Yes
## 10139 10807
uk_clean <- uk_data %>%
filter(!is.na(pdwrk) & !is.na(happy))
uk_probs <- uk_clean %>%
count(happy, pdwrk) %>%
group_by(pdwrk) %>%
mutate(prob = n / sum(n))
ggplot(uk_probs, aes(x = as.factor(happy), y = prob, color = pdwrk)) +
geom_point() +
geom_line(aes(group = pdwrk)) +
labs(title = "Conditional Probabilities of happy level",
subtitle = "with the paid job",
x = "Happiness Scale",
y = "Probability") +
theme_minimal()
After visualization, the happiness levels of the two groups become clearer and easier to compare. Light blue is those with paid work, dark blue is those without paid work. It can be seen that in the happiness index range from 0 to 4, that is, feeling very unhappy or somewhat unhappy, the proportion of people who do not pay work is higher. And the people who have paid work are 6 to 10 points happier than the other group, that is, they are happier. So whether or not you have a job can affect how happy people are. Both groups peak at 8 at the same time. The trend of the two data points is very similar. Finally, it’s worth noting that more people who don’t have paid work are extremely happy than those who do.