# List of packages
packages <- c("tidyverse", "fst", "modelsummary") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'fst' was built under R version 4.3.2
## Warning: package 'modelsummary' was built under R version 4.3.2
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
ess <- read_fst("All-ESS-Data.fst")
## Warning: package 'fstcore' was built under R version 4.3.2
Provide code and answer.
Prompt and question: calculate the average for the variable ‘happy’ for the country of Norway. On average, based on the ESS data, who reports higher levels of happiness: Norway or Belgium?
Note: we already did it for Belgium. You just need to compare to Norway’s average, making sure to provide the code for both.
unique(ess$cntry)
## [1] "AT" "BE" "CH" "CZ" "DE" "DK" "ES" "FI" "FR" "GB" "GR" "HU" "IE" "IL" "IT"
## [16] "LU" "NL" "NO" "PL" "PT" "SE" "SI" "EE" "IS" "SK" "TR" "UA" "BG" "CY" "RU"
## [31] "HR" "LV" "RO" "LT" "AL" "XK" "ME" "RS" "MK"
belgium_happy <- ess %>%
filter(cntry == "BE") %>%
select(happy)
belgium_happy$y <- belgium_happy$happy
table(belgium_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88 99
## 50 27 104 194 234 830 999 3503 6521 3402 1565 3 16 3
# need to remove 77, 88, 99 or else will alter results. See data portal for what they represent (e.g. DK, Refusal, etc.)
# Recode values 77 through 99 to NA
belgium_happy$y[belgium_happy$y %in% 77:99] <- NA
# checking again
table(belgium_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 50 27 104 194 234 830 999 3503 6521 3402 1565
mean_b <- mean(belgium_happy$y, na.rm = TRUE)
cat("Mean of 'Belgium' is:", mean_b, "\n")
## Mean of 'Belgium' is: 7.737334
norway_happy <- ess %>%
filter(cntry == "NO") %>%
select(happy)
norway_happy$y <- norway_happy$happy
table(norway_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 15 29 59 163 238 730 817 2617 5235 3796 2344 12 10
belgium_happy$y[belgium_happy$y %in% 77:99] <- NA
table(belgium_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 50 27 104 194 234 830 999 3503 6521 3402 1565
mean_n <- mean(norway_happy$y, na.rm = TRUE)
cat("Mean of 'Norway' is:", mean_n, "\n")
## Mean of 'Norway' is: 8.076377
Ans: On average, Norway reports higher amounts of happiness compared to Belgium.
Provide code and answer.
Prompt and question: what is the most common category selected, for Irish respondents, for frequency of binge drinking? The variable of interest is: alcbnge.
More info here: https://ess-search.nsd.no/en/variable/0c65116e-7481-4ca6-b1d9-f237db99a694.
Hint: need to convert numeric value entries to categories as specified in the variable information link. We did similar steps for Estonia and the climate change attitude variable.
ireland_alcbnge <- ess %>%
filter(cntry == "IE") %>%
select(alcbnge)
ireland_alcbnge$y <- ireland_alcbnge$alcbnge
table(ireland_alcbnge$y)
##
## 1 2 3 4 5 6 7 8
## 65 650 346 417 239 641 26 6
# Recode values 6 through 8 to NA
ireland_alcbnge$y[ireland_alcbnge$y %in% 6:8] <- NA
# Converting to categories to get mode as a category instead of a number
df <- ireland_alcbnge %>%
mutate(
y_category = case_when(
y == 1 ~ "Daily or almost daily",
y == 2 ~ "Weekly",
y == 3 ~ "Monthly",
y == 4 ~ "Less than monthly",
y == 5 ~ "Never",
TRUE ~ NA_character_
),
y_category = fct_relevel(factor(y_category), ### here you would put the categories in order you want them to appear or else it will appear alphabetically
"Daily or almost daily",
"Weekly",
"Monthly",
"Less than monthly",
"Never")
)
# To confirm the conversion:
table(df$y_category)
##
## Daily or almost daily Weekly Monthly
## 65 650 346
## Less than monthly Never
## 417 239
get_mode <- function(v) {
tbl <- table(v)
mode_vals <- as.character(names(tbl)[tbl == max(tbl)])
return(mode_vals)
}
mode_values <- get_mode(df$y_category)
cat("Mode of y category:", paste(mode_values, collapse = ", "), "\n")
## Mode of y category: Weekly
Ans: The most common category selected by Irish respondents is ‘weekly’.
Provide code and answer.
Prompt and question: when you use the summary() function for the variable plnftr (about planning for future or taking every each day as it comes from 0-10) for both the countries of Portugal and Serbia, what do you notice? What stands out as different when you compare the two countries (note: look up the variable information on the ESS website to help with interpretation)? Explain while referring to the output generated.
# Step 1: Filter for the countries of interest
portugal_plnftr <- ess %>%
filter(cntry == "PT") %>%
select(plnftr)
serbia_plnftr <- ess %>%
filter(cntry == "RS") %>%
select(plnftr)
# Step 3: Summary
summary(portugal_plnftr)
## plnftr
## Min. : 0.000
## 1st Qu.: 3.000
## Median : 5.000
## Mean : 6.426
## 3rd Qu.: 8.000
## Max. :88.000
## NA's :14604
summary(serbia_plnftr)
## plnftr
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 4.000
## Mean : 4.983
## 3rd Qu.: 8.000
## Max. :88.000
## NA's :1505
Ans: People in Portugal are more likely to take the day as it comes instead of planning ahead, as their mean is closer to taking every each as it comes on the scale of 0-10.
Provide code and answer.
Prompt and question: using the variables stfdem and gndr, answer the following: on average, who is more dissastified with democracy in Italy, men or women? Explain while referring to the output generated.
Info on variable here: https://ess.sikt.no/en/variable/query/stfdem/page/1
italy_data <- ess %>%
filter(cntry == "IT")
# Convert gender and stfdem
italy_data <- italy_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ as.character(gndr)
),
stfdem = ifelse(stfdem %in% c(77, 88), NA, stfdem) # Convert stfdem values
)
italy_data$gndr[italy_data$gndr%in%9]<- NA
# Compute average of stfdem by gender
means_by_gender <- italy_data %>%
group_by(gndr) %>%
summarize(lrscale = mean(lrscale, na.rm = TRUE))
print(means_by_gender)
## # A tibble: 3 × 2
## gndr lrscale
## <chr> <dbl>
## 1 Female 28.3
## 2 Male 24.3
## 3 <NA> 22.2
Ans: On average, women are more dissatisfied with democracy in Italy.
Provide code and answer.
Prompt: Interpret the boxplot graph of stfedu and stfhlth that we generated already: according to ESS data, would we say that the median French person is more satisfied with the education system or health services? Explain.
Change the boxplot graph: provide the code to change some of the key labels: (1) Change the title to: Boxplot of satisfaction with the state of education vs. health services; (2) Remove the x-axis label; (3) Change the y-axis label to: Satisfaction (0-10).
Hint: copy the boxplot code above and just replace or cut what is asked.
france_data <- ess %>%
filter(cntry == "FR")
france_data %>%
# Setting values to NA
mutate(stfedu = ifelse(stfedu %in% c(77, 88, 99), NA, stfedu),
stfhlth = ifelse(stfhlth %in% c(77, 88, 99), NA, stfhlth)) %>%
# Reshaping the data
select(stfedu, stfhlth) %>%
gather(variable, value, c(stfedu, stfhlth)) %>%
# Creating the boxplot
ggplot(aes(x = variable, y = value)) +
geom_boxplot() +
labs(y = "Sastifaction(0-10)", title = "Boxplot of sastifaction with the state of education vs. health services") +
theme_minimal()
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).
Ans: The median French person is more satisfied with health services as the level of satisfaction on a scale of 0-10 is higher for health services than the education system.