# List of packages
packages <- c("tidyverse", "fst", "modelsummary") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "fst" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "fst" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [16] "utils" "datasets" "methods" "base"
ess <- read_fst("All-ESS-Data.fst")
belgium_happy <- ess %>%
filter(cntry == "BE") %>%
select(happy)
belgium_happy$y <- belgium_happy$happy
table(belgium_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88 99
## 50 27 104 194 234 830 999 3503 6521 3402 1565 3 16 3
# Recode values 77 through 99 to NA
belgium_happy$y[belgium_happy$y %in% 77:99] <- NA
# checking again
table(belgium_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 50 27 104 194 234 830 999 3503 6521 3402 1565
mean_y <- mean(belgium_happy$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")
## Mean of 'y' is: 7.737334
norway_happy <- ess %>%
filter(cntry == "NO") %>%
select(happy)
norway_happy$y <- norway_happy$happy
table(norway_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 15 29 59 163 238 730 817 2617 5235 3796 2344 12 10
# Recode values 77 through 88 to NA
norway_happy$y[norway_happy$y %in% 77:88] <- NA
# checking again
table(norway_happy$y)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 15 29 59 163 238 730 817 2617 5235 3796 2344
mean_y <- mean(norway_happy$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")
## Mean of 'y' is: 7.975005
Prompt and question: calculate the average for the variable ‘happy’ for the country of Norway. On average, based on the ESS data, who reports higher levels of happiness: Norway or Belgium?
ANSWER: On average, Norway reports higher level of happiness with a mean of 7.975005 while Belgium reports lower levels of happiness at 7.737334
ireland_ccdrinking <- ess %>%
filter(cntry == "IE") %>%
select(alcbnge)
ireland_ccdrinking$y <- ireland_ccdrinking$alcbnge
table(ireland_ccdrinking$y)
##
## 1 2 3 4 5 6 7 8
## 65 650 346 417 239 641 26 6
# Recode values 6 through 8 to NA
ireland_ccdrinking$y[ireland_ccdrinking$y %in% 6:8] <- NA
# CONVERTING TO CATEGORIES FROM NUMBERS
df <- ireland_ccdrinking %>%
mutate(
y_category = case_when(
y == 1 ~ "Daily or almost daily",
y == 2 ~ "Weekly",
y == 3 ~ "Monthly",
y == 4 ~ "Less than monthly",
y == 5 ~ "Never",
TRUE ~ NA_character_
),
y_category = fct_relevel(factor(y_category),
"Daily or almost daily",
"Weekly",
"Monthly",
"Less than monthly",
"Never")
)
# To confirm the conversion:
table(df$y_category)
##
## Daily or almost daily Weekly Monthly
## 65 650 346
## Less than monthly Never
## 417 239
# DETERMINING THE MODE OF OUR NEW CATEGORY:
get_mode <- function(v) {
tbl <- table(v)
mode_vals <- as.character(names(tbl)[tbl == max(tbl)])
return(mode_vals)
}
mode_values <- get_mode(df$y_category)
cat("Mode of y category:", paste(mode_values, collapse = ", "), "\n")
## Mode of y category: Weekly
Prompt and question: what is the most common category selected, for Irish respondents, for frequency of binge drinking? The variable of interest is: alcbnge.
ANSWER: The most common category for irish respondents for frequency of binge drinking is weekly.
portugal_plnftr <- ess %>%
filter(cntry == "PT")%>%
select(plnftr)
portugal_plnftr$c <- portugal_plnftr$plnftr
table(portugal_plnftr$c)
##
## 0 1 2 3 4 5 6 7 8 9 10 88
## 114 184 313 356 264 481 262 382 345 166 370 40
# Recode values 88 to NA
portugal_plnftr$c[portugal_plnftr$c %in% 88] <- NA
# checking again
table(portugal_plnftr$c)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 114 184 313 356 264 481 262 382 345 166 370
serbia_plnftr <- ess %>%
filter(cntry == "RS")%>%
select(plnftr)
serbia_plnftr$b <- serbia_plnftr$plnftr
table(serbia_plnftr$b)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 587 133 152 138 95 246 70 87 103 47 364 4 17
# Recode values 77 through 88 to NA
serbia_plnftr$b[serbia_plnftr$b %in% 77:88] <- NA
# checking again
table(serbia_plnftr$b)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 587 133 152 138 95 246 70 87 103 47 364
summary(portugal_plnftr)
## plnftr c
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.: 3.000
## Median : 5.000 Median : 5.000
## Mean : 6.426 Mean : 5.418
## 3rd Qu.: 8.000 3rd Qu.: 8.000
## Max. :88.000 Max. :10.000
## NA's :14604 NA's :14644
summary(serbia_plnftr)
## plnftr b
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 4.000 Median : 4.000
## Mean : 4.983 Mean : 4.143
## 3rd Qu.: 8.000 3rd Qu.: 8.000
## Max. :88.000 Max. :10.000
## NA's :1505 NA's :1526
Prompt and question: when you use the summary() function for the variable plnftr (about planning for future or taking every each day as it comes from 0-10) for both the countries of Portugal and Serbia, what do you notice? What stands out as different when you compare the two countries (note: look up the variable information on the ESS website to help with interpretation)? Explain while referring to the output generated.
ANSWER: As we compare the summary statistics for the variable plnftr for Portugal and Serbia, we find that the mean value of Portugal (5.418) is higher than the mean value of Serbia (4.143) suggesting that Serbia has a higher level of future planning as it is closer to the value of 0 (planning for the future as much as possible). When comparing the 1st quartile of Serbia and Portugal, we notice that Serbia has a 1st quartile of 0 while Portugal has a 1st quartile of 3. This suggests that individuals from Serbia are more interested in planning for the future as much as possible.
italy_data <- ess %>%
filter(cntry == "IT")
italy_data <- italy_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ "Refusal to Answer",
TRUE ~ as.character(gndr)
),
stfdem = ifelse(stfdem %in% c(77, 88), NA, stfdem)
)
# Compute mean for male
mean_male_stfdem <- italy_data %>%
filter(gndr == "Male") %>%
summarize(mean_stfdem_men = mean(stfdem, na.rm = TRUE))
print(mean_male_stfdem)
## mean_stfdem_men
## 1 4.782646
# Compute average of stfdem by gender
means_by_gender <- italy_data %>%
group_by(gndr) %>%
summarize(stfdem = mean(stfdem, na.rm = TRUE))
print(means_by_gender)
## # A tibble: 3 × 2
## gndr stfdem
## <chr> <dbl>
## 1 Female 4.69
## 2 Male 4.78
## 3 Refusal to Answer 3.25
Prompt and question: using the variables stfdem and gndr, answer the following: on average, who is more dissatisfied with democracy in Italy, men or women? Explain while referring to the output generated.
ANSWER: Based on the data generated, women have an average of 4.69 while men have an average of 4.78. This indicates that women are slightly more dissatisfied with democracy in Italy than men.
france_data <- ess %>%
filter(cntry == "FR")
france_data %>%
# Setting values to NA
mutate(stfedu = ifelse(stfedu %in% c(77, 88, 99), NA, stfedu),
stfhlth = ifelse(stfhlth %in% c(77, 88, 99), NA, stfhlth)) %>%
# Reshaping the data
select(stfedu, stfhlth) %>%
gather(variable, value, c(stfedu, stfhlth)) %>%
# Creating the boxplot
ggplot(aes(x = variable, y = value)) +
geom_boxplot() +
labs(y = "Satisfaction (0-10)", title = "Boxplot of satisfaction with the state of education vs. health services") +
theme_minimal()
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).
Prompt: Interpret the boxplot graph of stfedu and stfhlth that we generated already: according to ESS data, would we say that the median French person is more satisfied with the education system or health services? Explain.
ANSWER: The median french person is more satisfied with the health services (stfhlth) compared to the state of education (stfedu). When we take a look at the median for stfhlth, it is approximately 7.1 while the median for stfedu is 5.0. This indicates that the higher number median correlates with a higher level of satisfaction. Furthermore, the 1st quartile and 3rd quartile are lower for stfedu in comparison to stfhlth.