# List of packages
packages <- c("tidyverse", "fst", "modelsummary") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
##Task 1 Provide code and answer.
Prompt and question: calculate the average for the variable ‘health’ for the countries of Serbia and Belgium. On average, based on the ESS data, who reports higher levels of subjective health: Serbia and Belgium?
#serbia_data <- read.fst("serbia_data.fst")
#belgium_data <- read.fst("belgium_data.fst")
##Health Data for Serbia
serbia_data <- read.fst("serbia_data.fst")
serbia_health <- serbia_data%>%
select(health)
# Recode invalid values to NA
serbia_health$health[serbia_health$health %in% c(77, 88, 99)] <- NA
##Calculate Mean for Serbia
# Calculate mean
mean_serbia <- mean(serbia_health$health, na.rm = TRUE)
cat("Mean of 'y' is:", mean_serbia, "\n")
## Mean of 'y' is: 2.575254
##Health Data for Belgium
belgium_data <- read.fst("belgium_data.fst")
belgium_health <- belgium_data %>%
select(health)
# Recode invalid values to NA
belgium_health$health[belgium_health$health %in% c(77, 88, 99)] <- NA
##Calculate Mean for Belgium
# Calculate mean
mean_belgium <- mean(belgium_health$health, na.rm = TRUE)
cat("Mean of 'y' is:", mean_belgium, "\n")
## Mean of 'y' is: 2.063377
##Compare Belgium = 2.06, Serbia = 2.58, therefore Serbia reports slightly higher levels of subjective health
mean_belgium
## [1] 2.063377
mean_serbia
## [1] 2.575254
##Task 2 Provide code and answer.
Prompt and question: what is the most common category selected, for Norwegian respondents, for how often they socially meet with friends or relatives. The variable of interest is: sclmeet.
norway_data <- read.fst("norway_data.fst")
norway_social <- norway_data %>%
select(sclmeet)
norway_social <- (norway_data) %>%
filter(cntry == "NO") %>%
select(sclmeet)
table(norway_social$sclmeet)
##
## 1 2 3 4 5 6 7 77 88
## 44 356 746 2569 2119 5764 4458 2 7
1 Daily or almost daily
2 Weekly
3 Monthly
4 Less than monthly
5 Never
6 Not applicable*
7 Refusal*
8 Don’t know*
9 No answer
norway_social$sclmeet[norway_social$sclmeet %in% 77:88] <- NA
table(norway_social$sclmeet)
##
## 1 2 3 4 5 6 7
## 44 356 746 2569 2119 5764 4458
norway_social$sclmeet_category <- factor(norway_social$sclmeet, labels = c ("Daily or almost daily","Weekly","Monthly","Less than monthly","Never", "Not Applicable", "Don't Know"))
table(norway_social$sclmeet_category)
##
## Daily or almost daily Weekly Monthly
## 44 356 746
## Less than monthly Never Not Applicable
## 2569 2119 5764
## Don't Know
## 4458
get_mode <- function(v) {
tbl <- table(v)
mode_vals <- as.character(names(tbl)[tbl == max(tbl)])
return(mode_vals)
}
mode_norway_social <- get_mode(norway_social$sclmeet_category)
mode_norway_social
## [1] "Not Applicable"
Norwegians meet socially with friends or relatives less than monthly
##Task 3 Provide code and answer.
Prompt and question: when you use the summary() function for the variable wrclmch (i.e., how worried about climate change) for both the countries of Estonia and Portugal – what do you notice? What stands out as different when you compare the two countries (note: look up the variable information on the ESS website to help with interpretation)? Explain while referring to the output generated.
portugal_data <- read.fst("portugal_data.fst")
estonia_data <- read.fst("estonia_data.fst")
# For Portugal
portugal_wrclmch <- portugal_data %>%
filter(cntry == "PT") %>%
select(wrclmch)
# For Estonia
estonia_wrclmch <- estonia_data %>%
filter(cntry == "EE") %>%
select(wrclmch)
summary(portugal_wrclmch)
## wrclmch
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.583
## 3rd Qu.:4.000
## Max. :8.000
## NA's :14773
summary(estonia_wrclmch)
## wrclmch
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :2.855
## 3rd Qu.:3.000
## Max. :8.000
## NA's :13295
table(portugal_wrclmch$wrclmch)
##
## 1 2 3 4 5 6 7 8
## 55 251 1171 1203 380 13 4 31
table(estonia_wrclmch$wrclmch)
##
## 1 2 3 4 5 6 7 8
## 343 852 1612 565 125 56 1 7
summary(portugal_wrclmch)
## wrclmch
## Min. :1.000
## 1st Qu.:3.000
## Median :4.000
## Mean :3.583
## 3rd Qu.:4.000
## Max. :8.000
## NA's :14773
summary(estonia_wrclmch)
## wrclmch
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :2.855
## 3rd Qu.:3.000
## Max. :8.000
## NA's :13295
What stands out is the first quartile and mean.
1st Q for Portugal is 3, for Estonia is 2. That means the middle 50% for Estonia actually starts at 2. The mean for Portugal 3.58 while the mean for Estonia is 2.86. The median for Portugal is 4 while it is only 4 for Estonia.
They also both have the same maximum and minimum values.
##Task 4 Provide code and answer.
Prompt and question: using the variables stfhlth and gndr, answer the following: on average, who is more sastified with health services in Italy, men or women? Explain while referring to the output generated.
italy_data <- read.fst("italy_data.fst")
table(italy_data$gndr)
##
## 1 2 9
## 4836 5329 13
italy_data$gndr[italy_data$gndr %in% 9] <- NA
table(italy_data$stfhlth)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88 99
## 455 279 553 778 958 1470 1718 1869 1363 411 192 24 105 3
italy_data$stfhlth[italy_data$stfhlth %in% 77:99] <- NA
italy_data <- italy_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ as.character(gndr)
)
)
italy_data <- italy_data %>% filter(!is.na(gndr))
italy_data <- italy_data %>% filter(!is.na(stfhlth))
table(italy_data$gndr)
##
## Female Male
## 5260 4774
table(italy_data$stfhlth)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 455 278 552 775 957 1466 1716 1869 1363 411 192
italy_data %>%
group_by(gndr) %>%
summarize(meanstfhlth = mean(stfhlth, na.rm = TRUE))
## # A tibble: 2 × 2
## gndr meanstfhlth
## <chr> <dbl>
## 1 Female 5.38
## 2 Male 5.54
Men are more satisfied with health services in Italy.
##Task 5 Create boxplots, in the same graph, for stfdem and stfgov for the country of France. Provide an appropriate title and labels. Further, tell us: according to ESS data, would we say that the median French person is more satisfied with the democracy or the national government? Explain.
france_data <- read.fst("france_data.fst")
france_data %>%
# Setting values to NA
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
stfgov = ifelse(stfgov %in% c(77, 88, 99), NA, stfgov)) %>%
# Reshaping the data
select(stfdem, stfgov) %>%
gather(variable, value, c(stfdem, stfgov)) %>%
# Creating the boxplot
ggplot(aes(x = variable, y = value)) +
geom_boxplot() +
labs(y = "Y-axis", x = "X-axis", title = "Boxplot of stfdem vs. stfgov") +
theme_minimal()
## Warning: Removed 557 rows containing non-finite values (`stat_boxplot()`).
median(france_data$stfdem)
## [1] 5
median(france_data$stfgov)
## [1] 4
The median French person is more satisfied with the democracy as the median for national government is only 4 but the median for democracy is 5.