# List of packages
packages <- c("tidyverse", "fst", "modelsummary") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"
getwd ()
## [1] "/Users/hayaouri/Desktop/SOC222 R"
#vector data not working 
ess <- read_fst("All-ESS-Data.fst")

Task 1

Provide code and answer.

Prompt and question: calculate the average for the variable ‘happy’ for the country of Norway. On average, based on the ESS data, who reports higher levels of happiness: Norway or Belgium?

Note: we already did it for Belgium. You just need to compare to Norway’s average, making sure to provide the code for both.

norway_data <- read.fst("norway_data.fst")
norway_happy <- ess %>% # note: if you work from belgium_data replace "ess" with belgium_data
  filter(cntry == "NO") %>% 
  select(happy)
norway_happy$y <- norway_happy$happy

table(norway_happy$y)
## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88 
##   15   29   59  163  238  730  817 2617 5235 3796 2344   12   10
norway_happy$y[norway_happy$y %in% 77:99] <- NA

table(norway_happy$y)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##   15   29   59  163  238  730  817 2617 5235 3796 2344
mean_y <- mean(norway_happy$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")
## Mean of 'y' is: 7.975005

Mean of ‘y’ is: 7.975005

belgium_data <- read.fst("belgium_data.fst")
belgium_happy <- ess %>% 
  filter(cntry == "BE") %>% 
  select(happy)
belgium_happy$y <- belgium_happy$happy

table(belgium_happy$y)
## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88   99 
##   50   27  104  194  234  830  999 3503 6521 3402 1565    3   16    3
belgium_happy$y[belgium_happy$y %in% 77:99] <- NA

table(belgium_happy$y)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##   50   27  104  194  234  830  999 3503 6521 3402 1565
mean_y <- mean(belgium_happy$y, na.rm = TRUE)
cat("Mean of 'y' is:", mean_y, "\n")
## Mean of 'y' is: 7.737334

Mean of ‘y’ is: 7.737334

On average Norway reports higher averages of happiness

Task 2

Provide code and answer.

Prompt and question: what is the most common category selected, for Irish respondents, for frequency of binge drinking? The variable of interest is: alcbnge.

More info here: https://ess-search.nsd.no/en/variable/0c65116e-7481-4ca6-b1d9-f237db99a694.

Hint: need to convert numeric value entries to categories as specified in the variable information link. We did similar steps for Estonia and the climate change attitude variable.

ireland_data <- read.fst("ireland_data.fst")
ireland_binge <- ess %>%
  filter(cntry == "IE") %>%
  select(alcbnge)

ireland_binge$y <- ireland_binge$alcbnge

table(ireland_binge$y)
## 
##   1   2   3   4   5   6   7   8 
##  65 650 346 417 239 641  26   6
ireland_binge$y[ireland_binge$y %in% 6:8] <- NA
df <- ireland_binge %>%
  mutate(
    y_category = case_when(
      y == 1 ~ "daily",
      y == 2 ~ "weekly",
      y == 3 ~ "monthly",
      y == 4 ~ "less than monthly",
      y == 5 ~ "never",
      TRUE ~ NA_character_
    ),
    y_category = fct_relevel(factor(y_category),  
                             "daily", 
                             "weekly", 
                             "monthly", 
                             "less than monthly", 
                             "never")
  )

table(df$y_category)
## 
##             daily            weekly           monthly less than monthly 
##                65               650               346               417 
##             never 
##               239
get_mode <- function(v) {
  tbl <- table(v)
  mode_vals <- as.character(names(tbl)[tbl == max(tbl)])
  return(mode_vals)
}

mode_values <- get_mode(df$y_category)
cat("Mode of y category:", paste(mode_values, collapse = ", "), "\n")
## Mode of y category: weekly

##Mode of y category: weekly Weekly is the most frequently selected category, although the categories that are assigned and the order in which they are assigned affect its subjectivity.

Task 3

Provide code and answer.

Prompt and question: when you use the summary() function for the variable plnftr (about planning for future or taking every each day as it comes from 0-10) for both the countries of Portugal and Serbia, what do you notice? What stands out as different when you compare the two countries (note: look up the variable information on the ESS website to help with interpretation)? Explain while referring to the output generated.

portugal_data <- read.fst("portugal_data.fst")
portugal_future <- ess %>% 
  filter(cntry == "PT") %>% 
  select(plnftr)
summary(portugal_future)
##      plnftr      
##  Min.   : 0.000  
##  1st Qu.: 3.000  
##  Median : 5.000  
##  Mean   : 6.426  
##  3rd Qu.: 8.000  
##  Max.   :88.000  
##  NA's   :14604
serbia_data <- read.fst("serbia_data.fst")
serbia_future <- ess %>% 
  filter(cntry == "RS") %>% 
  select(plnftr)
summary(serbia_future)
##      plnftr      
##  Min.   : 0.000  
##  1st Qu.: 0.000  
##  Median : 4.000  
##  Mean   : 4.983  
##  3rd Qu.: 8.000  
##  Max.   :88.000  
##  NA's   :1505

Portugal has lower level of planning a mean of 6.426 and median of 5.000, while Serbia has a mean of 4.983 and a median of 4.000. Although people in Serbia plan for the future (0), individuals in Portugal often take each day as it comes (10). Portugal has a higher mean than Serbia, which causes the data to be skewed. The closeness of Serbia’s mean and median points to a symmetric distribution with fewer extreme extremes. ## Task 4

Provide code and answer.

Prompt and question: using the variables stfdem and gndr, answer the following: on average, who is more dissastified with democracy in Italy, men or women? Explain while referring to the output generated.

Info on variable here: https://ess.sikt.no/en/variable/query/stfdem/page/1

italy_data <- read.fst("italy_data.fst")
italy_data <- ess %>% 
  filter(cntry == "IT")

italy_data <- italy_data %>%
  mutate(
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      TRUE ~ as.character(gndr)
    ),
    stfdem = ifelse(stfdem %in% c(77, 88), NA, stfdem)  
  )
means_by_gender <- italy_data %>%
  group_by(gndr) %>% 
  summarize(stfdem = mean(stfdem, na.rm = TRUE)) 

print(means_by_gender)
## # A tibble: 3 × 2
##   gndr   stfdem
##   <chr>   <dbl>
## 1 9        3.25
## 2 Female   4.69
## 3 Male     4.78

The average for Italian males was 4.78, and the average for Italian women was 4.69. In contrast to women, males are more content with democracy, with 0 representing extreme dissatisfaction and 10 representing high satisfaction. ## Task 5

Provide code and answer.

Prompt: Interpret the boxplot graph of stfedu and stfhlth that we generated already: according to ESS data, would we say that the median French person is more satisfied with the education system or health services? Explain.

Change the boxplot graph: provide the code to change some of the key labels: (1) Change the title to: Boxplot of satisfaction with the state of education vs. health services; (2) Remove the x-axis label; (3) Change the y-axis label to: Satisfaction (0-10).

Hint: copy the boxplot code above and just replace or cut what is asked.

france_data <- read.fst("france_data.fst")
france_data %>%
  mutate(stfedu = ifelse(stfedu %in% c(77, 88, 99), NA, stfedu),
         stfhlth = ifelse(stfhlth %in% c(77, 88, 99), NA, stfhlth)) %>%
  select(stfedu, stfhlth) %>%
  gather(variable, value, c(stfedu, stfhlth)) %>%
  ggplot(aes(x = variable, y = value)) +
  geom_boxplot() +
  labs(y = "Satisfaction (0-10)", x = "", title = "Boxplot of satisfaction with the state of education vs. health services") +
  theme_minimal()
## Warning: Removed 364 rows containing non-finite values (`stat_boxplot()`).

The average French individual is more happy with health services than with education services, according to ESS statistics. Regarding health services, the median satisfaction is approximately 7, but the median for education services is approximately 5.