Set Up

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "remotes"      "broom"        "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"
#load ESS
setwd("C:/Users/2expl/Desktop/UToronto/Year 2/SOC202")
ess <- read_fst("All-ESS-Data.fst")

Task 1

#subsetting and filtering data
belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(
    trstep = case_when(
      trstep %in% c(7, 8, 9) ~ NA_character_,  
      TRUE ~ as.character(trstep)
    ),
    
    trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep),
    
    wrkprty = case_when(
      wrkprty == 1 ~ "Yes",  # Recode 1 to "Yes"
      wrkprty == 2 ~ "No",   # Recode 2 to "No"
      wrkprty %in% c(7, 8, 9) ~ NA_character_,
      TRUE ~ as.character(wrkprty)  
    )
  ) %>% select(wrkprty, trstep)
  
belgium_data <- belgium_data %>% filter(!is.na(trstep))
belgium_data <- belgium_data %>% filter(!is.na(wrkprty))

# Producing linier regression model
model_BE <- lm(trstep ~ wrkprty, data = belgium_data)

# Finding Coefficients
coefficientsBE <- coef(model_BE)
print(coefficientsBE)
## (Intercept)  wrkprtyYes 
##   4.0336688   0.2538055

Analysis

From the modelling, we can determine that when our explanatory variable (wrkprty) is at its reference outcome (workprty= “NO”), our outcome variable (trstep) has an approximate value of 4.033. However, when our explanatory variable is (wrkprty = yes), this results in a value of +0.25. Therefore, people who have worked for a political organization over the last 12 months report a 0.25 increase in trust in the European parliament compared to those who haven’t.

Reference Category (wrkprty = “no”), trstep = 4.03 Non-Reference Category (wrkprty = “yes”), trstep =

sum(4.0336688 + 0.2538055) #Non-Reference Category (wrkprty = "yes"), trstep =
## [1] 4.287474

Task 2

# Filtering Selected data
bulg_data <- ess %>%
  filter(cntry == "BG") %>%
  mutate(
    stfdem = case_when(
      stfdem %in% c(7, 8, 9) ~ NA_character_,  
      TRUE ~ as.character(stfdem)
    ),
    
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
  )
  
  bulg_data <- bulg_data %>%
  mutate(
    # Recoding place of residence
    geo = recode(as.character(domicil), 
                 '1' = "Urban", 
                 '2' = "Peri-Urban", 
                 '3' = "Rural", 
                 '4' = "Rural", 
                 '5' = "Rural",
                 '7' = NA_character_,
                 '8' = NA_character_,
                 '9' = NA_character_),
    # Recoding if born in the country
    native = recode(brncntr,
                             `1` = "Yes",
                             `2` = "No",
                             `7` = NA_character_,
                             `8` = NA_character_,
                             `9` = NA_character_)
  ) %>%
    
select(native, stfdem)
  
  # Removing NA
bulg_data <- bulg_data %>% filter(!is.na(native))
bulg_data <- bulg_data %>% filter(!is.na(stfdem))


# Modeling

bulg_model1 <- lm(stfdem ~ native, data = bulg_data)

summary(bulg_model1)
## 
## Call:
## lm(formula = stfdem ~ native, data = bulg_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6486 -1.6486  0.3514  1.3514  7.5055 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.4945     0.2069  12.058   <2e-16 ***
## nativeYes     0.1541     0.2077   0.742    0.458    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.974 on 11642 degrees of freedom
## Multiple R-squared:  4.726e-05,  Adjusted R-squared:  -3.863e-05 
## F-statistic: 0.5503 on 1 and 11642 DF,  p-value: 0.4582

Analysis

From the data presented by the summary() function,we can interpret that the value for “stfdem:native” = “no” to be 2.4945 (the same as the intercept value).

Task 3

# Filtering Selected data
GB_data <- ess %>%
  filter(cntry == "GB") %>%
  mutate(
    trstlgl = case_when(
      trstlgl %in% c(77, 88, 99) ~ NA_character_,  
      TRUE ~ as.character(trstlgl)
    ),
    
    trstlgl = ifelse(trstlgl %in% c(77, 88, 99), NA, trstlgl),
  )
  
GB_data <- GB_data %>%
  mutate(
    # Recoding gender
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
    )) %>%
    
select(gndr, trstlgl)

# Removing NA
GB_data <- GB_data %>% filter(!is.na(gndr))
GB_data <- GB_data %>% filter(!is.na(trstlgl))

# Modelling Regression

GB_model1 <- lm(trstlgl ~ gndr, data = GB_data)

# Extracting Coefficients
GB_coefficients <- coef(GB_model1)
print(GB_coefficients)
## (Intercept)    gndrMale 
##   5.2960824   0.1314312
# extracting equation

equatiomatic::extract_eq(GB_model1, use_coefs = TRUE)

\[ \operatorname{\widehat{trstlgl}} = 5.3 + 0.13(\operatorname{gndr}_{\operatorname{Male}}) \]

Analysis

from the output equation, we can determine that “gndr=female:trstlgl” = 5.3

we can also understand that “gndr=male:trstlgl” =

sum(5.30 - 0.02)
## [1] 5.28