Huynh_MinhTam_Project202

Setting up environment

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.1.2     v readr     2.1.4
## v forcats   1.0.0     v stringr   1.5.0
## v ggplot2   3.4.3     v tibble    3.2.1
## v lubridate 1.9.2     v tidyr     1.3.0
## v purrr     1.0.1     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"

ess <- read_fst("All-ESS-Data.fst")

Task 1:

Filtering and recoding:

belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep), # Filtering 'Trust in the European Parliament' in Belgium.
  )

unique(belgium_data$trstep)

##  [1]  0  7  8  5  6 NA  4  9  3  1 10  2

belgium_data <- belgium_data %>% filter(!is.na(trstep))

belgium_data <- belgium_data %>% 
  mutate(wrkprty = case_when(
    wrkprty == 1 ~ "Yes",
    wrkprty == 2 ~ "No",
    wrkprty %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkprty)
  ))

table(belgium_data$wrkprty)

## 
##    No   Yes 
## 14871   727

Producing a linear regression model:

model_m1 <- lm(trstep ~ wrkprty, data = belgium_data)

Extracting coefficients and intercept:

coefficients <- coef(model_m1)
print(coefficients)

## (Intercept)  wrkprtyYes 
##   4.9405554   0.4308338

Interpretation:

An intercept of 4.94 indicates that when looking at respondents who have NOT worked in a political party, the expected value of their trust in the European Parliament is approximately 4.94.

A coefficient of 0.43 indicates that when looking at respondents who HAVE worked in a political party, there is an increase of approximately 0.43 in the predicted value of their trust in the European Parliament, compared to respondents who have not worked in a political party.

Task 2:

Filtering and recoding:

bulgaria_data <- ess %>%
  filter(cntry == "BG") %>%
  mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # Filtering 'Satisfaction with Democracy' in Bulgaria
  )

unique(bulgaria_data$stfdem)

##  [1]  0  1 NA  2  3  5  4  6  8  7 10  9

bulgaria_data <- bulgaria_data %>% filter(!is.na(stfdem))

bulgaria_data <- bulgaria_data %>%
  mutate(
  native = recode(brncntr,
                             `1` = "Yes",
                             `2` = "No",
                             `7` = NA_character_,
                             `8` = NA_character_,
                             `9` = NA_character_)
  )

table(bulgaria_data$native)

## 
##    No   Yes 
##    97 12239

Producing a linear regression model:

model_m2 <- lm(stfdem ~ native, data = bulgaria_data)

Extracting coefficient and intercept:

coefficients <- coef(model_m2)
print(coefficients)

## (Intercept)   nativeYes 
##   2.8041237   0.1189909

Producing a summary model output:

tidy(model_m2)

## # A tibble: 2 x 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)    2.80      0.226    12.4   5.30e-35
## 2 nativeYes      0.119     0.227     0.523 6.01e- 1

Interpretation:

The expected average of satisfaction with democracy for respondents who were NOT born in Bulgaria is 2.8, as indicated by an intercept value of 2.8.

Task 3:

Filtering and recoding:

germany_data <- ess %>%
  filter(cntry == "DE") %>%
  mutate(trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep), # Filtering 'Trust in the European Parliament' in Germany as my outcome variable of interest. 
  )

unique(germany_data$trstep)

##  [1]  7 NA  5 10  6  8  3  2  9  4  0  1

germany_data <- germany_data %>% filter(!is.na(trstep))

germany_data <- germany_data %>%
  mutate(
 minority = case_when(
      blgetmg %in% c(7, 8, 9) ~ NA_character_,
      blgetmg == 1 ~ "Ethnic Minority",
      blgetmg == 2 ~ "Not Ethnic Minority",
      TRUE ~ as.character(blgetmg)
    ))

table(germany_data$minority)

## 
##     Ethnic Minority Not Ethnic Minority 
##                1095               22820

Producing a linear regression model:

model_m3 <- lm(trstep ~ minority, data = germany_data)

Extracting coefficents and intercept:

coefficients <- coef(model_m3)
print(coefficients)

##                 (Intercept) minorityNot Ethnic Minority 
##                   4.8173516                  -0.6333025

Producing an equation output with equatiomatic:

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model_m3, use_coefs = TRUE)

\[ \operatorname{\widehat{trstep}} = 4.82 - 0.63(\operatorname{minority}_{\operatorname{Not\ Ethnic\ Minority}}) \] Interpretation:

This regression equation indicates that the approximate predicted value of respondents’ trust in the European Parliament, given that they are NOT an ethnic minority, is about 4.82 - 0.63, or 4.19. Looking at respondents who ARE an ethnic minority, the average predicted value of trstep is 4.82.

Huynh_MinhTam_Project202_8

2023-11-20

Setting up environment

Task 1:

Task 2:

Task 3: