Homework 8 _ Lyra Dong

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"

ess <- read_fst("All-ESS-Data.fst")

Task 1：

belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep), 
  )

unique(belgium_data$trstep)

##  [1]  0  7  8  5  6 NA  4  9  3  1 10  2

belgium_data <- belgium_data %>% filter(!is.na(trstep))

belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(wrkprty = ifelse(wrkprty %in% c(7, 8, 9), NA, wrkprty), 
  )

unique(belgium_data$wrkprty)

## [1]  2  1 NA

belgium_data <- belgium_data %>% filter(!is.na(wrkprty))

belgium_data <- belgium_data %>%
 
  mutate(lrscale = ifelse(lrscale %in% c(77, 88, 99), NA, lrscale)) %>%
  
  mutate(polintr = case_when(
    polintr %in% c(1, 2) ~ "Interested",
    polintr %in% c(3, 4) ~ "Not Interested",
    polintr %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(polintr)
  )) %>%
  
  mutate(wrkprty = case_when(
    wrkprty == 1 ~ "Yes",
    wrkprty == 2 ~ "No",
    wrkprty %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkprty)
  )) %>%
  
  mutate(workcivil = case_when(
    wrkorg == 1 ~ "Yes",
    wrkorg == 2 ~ "No",
    wrkorg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkorg)
  )) %>%
  
  mutate(protest = case_when(
    pbldmn == 1 ~ "Yes",
    pbldmn == 2 ~ "No",
    pbldmn %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(pbldmn)
  )) 

table(belgium_data$wrkprty)

## 
##    No   Yes 
## 15354   738

model1 <- lm(trstep ~ wrkprty, data = belgium_data)

coefficients <- coef(model1)
print(coefficients)

## (Intercept)  wrkprtyYes 
##   7.5491077  -0.9610318

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model1, use_coefs = TRUE)

\[ \operatorname{\widehat{trstep}} = 7.55 - 0.96(\operatorname{wrkprty}_{\operatorname{Yes}}) \]

Interpretation: The intercept represents the estimated value of trust in the European Parliament, ranging from 0 to 10, when the explanatory variable is zero or, for categorical variables, at their reference level. In other words, when considering individuals who have not “worked in a political party” (thus indicating a response of “No”), the anticipated value of trstplt is approximately 7.5491077 (or rounded to 7.55).

wrkprtyYes -0.9610318： This coefficient represents the discrepancy in the predicted trust value towards the European Parliament between individuals who have worked in a political party (“Yes” category) and those who have not (“No” category). Specifically, being categorized as “Yes” (i.e., having prior experience in a political party) is associated with an approximate decrease of 0.9610318 (rounded to 0.96) in the predicted trust value towards the European Parliament compared to being categorized as “No” (i.e., lacking experience in a political party). Therefore, the estimated value for people who have had worked in a political party before will be 7.55 - 0.96, which after calculation is 6.59.

Regression Equation as follows: trstepˆ=7.5491077 - 0.9610318(wrkprtyYes)

y(hat) = intercept + B(hat)0 + B(hat)1 * x1 + epsilon

trstep(hat) = 7.5491077 - 0.9610318 (wrkprtyYes) + error term

Task 2：

bulgaria_data <- ess %>%
  filter(cntry == "BG") %>%
  mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
  )

unique(bulgaria_data$stfdem)

##  [1]  0  1 NA  2  3  5  4  6  8  7 10  9

bulgaria_data <- bulgaria_data %>% filter(!is.na(stfdem))

bulgaria_data <- bulgaria_data %>%
  mutate(
    
    geo = recode(as.character(domicil), 
                 '1' = "Urban", 
                 '2' = "Peri-Urban", 
                 '3' = "Rural", 
                 '4' = "Rural", 
                 '5' = "Rural",
                 '7' = NA_character_,
                 '8' = NA_character_,
                 '9' = NA_character_),
    
    native = recode(brncntr,
                             `1` = "Yes",
                             `2` = "No",
                             `7` = NA_character_,
                             `8` = NA_character_,
                             `9` = NA_character_)
  )

table(bulgaria_data$native)

## 
##    No   Yes 
##    97 12239

model2 <- lm(stfdem ~ native, data = bulgaria_data)

coefficients <- coef(model2)
print(coefficients)

## (Intercept)   nativeYes 
##   2.8041237   0.1189909

summary(model2)

## 
## Call:
## lm(formula = stfdem ~ native, data = bulgaria_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9231 -1.9231  0.0769  2.0769  7.1959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8041     0.2265  12.382   <2e-16 ***
## nativeYes     0.1190     0.2274   0.523    0.601    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.231 on 12334 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  2.22e-05,   Adjusted R-squared:  -5.887e-05 
## F-statistic: 0.2739 on 1 and 12334 DF,  p-value: 0.6007

The expected average satisfaction with democracy for respondents that were not born in Bulgaria is 2.8041 ≈ 2.80

Task 3:

germany_data <- ess %>%
  filter(cntry == "DE") %>%
  mutate(trstlgl = ifelse(trstlgl %in% c(77, 88, 99), NA, trstlgl),
  )

unique(germany_data$trstlgl)

##  [1]  6  7  9  8  4 10  5  3 NA  2  0  1

germany_data <- germany_data %>% filter(!is.na(trstlgl))

germany_data <- germany_data %>%
  mutate(
    
    edulvla = case_when(
      essround < 5 & edulvla == 55 ~ NA_real_,
      TRUE ~ edulvla
    ),
   
    edulvlb = case_when(
      essround >= 5 & edulvlb == 5555 ~ NA_real_,
      TRUE ~ edulvlb
    ),
   
    edulvlfa = case_when(
      essround < 5 & edulvlfa == 55 ~ NA_real_,
      TRUE ~ edulvlfa
    ),
    
    edulvlfb = case_when(
      essround >= 5 & edulvlfb == 5555 ~ NA_real_,
      TRUE ~ edulvlfb
    ),
    
    edulvlma = case_when(
      essround < 5 & edulvlma == 55 ~ NA_real_,
      TRUE ~ edulvlma
    ),
    
    edulvlmb = case_when(
      essround >= 5 & edulvlmb == 5555 ~ NA_real_,
      TRUE ~ edulvlmb
    ),
    
    educ_level = case_when(
      essround < 5 & edulvla == 5 ~ "BA",
      essround >= 5 & edulvlb > 600 ~ "BA",
      TRUE ~ "No BA"
    ),
    
    educ_level_father = case_when(
      essround < 5 & edulvlfa == 5 ~ "BA",
      essround >= 5 & edulvlfb > 600 ~ "BA",
      TRUE ~ "No BA"
    ),
    
    educ_level_mother = case_when(
      essround < 5 & edulvlma == 5 ~ "BA",
      essround >= 5 & edulvlmb > 600 ~ "BA",
      TRUE ~ "No BA"
    ),
    
     eisced = case_when(
      eisced == 0 | eisced > 7 ~ as.character(NA), 
      eisced %in% 1:5 ~ "No BA",           
      eisced == 6 ~ "BA",
      eisced == 7 ~ "MA",
      TRUE ~ as.character(eisced)           
    ),
  )


table(germany_data$educ_level)

## 
##    BA No BA 
##  8728 25239

model3 <- lm(trstlgl ~ educ_level, data = germany_data)

coefficients <- coef(model3)
print(coefficients)

##     (Intercept) educ_levelNo BA 
##        6.415330       -0.813761

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model3, use_coefs = TRUE)

\[ \operatorname{\widehat{trstlgl}} = 6.42 - 0.81(\operatorname{educ\_level}_{\operatorname{No\ BA}}) \]

Regression Equation as follows: trstlglˆ=6.42 - 0.81(educ_levelNoBA)

y(hat) = intercept + B(hat)0 + B(hat)1 * x1 + epsilon

trstlgl(hat) = 6.42 - 0.81 (educ_levelNoBA) + error term

Interpretation:

The intercept represents the estimated value of Trust in the legal system, specifically in the country of Germany, ranging from 0 to 10, when the explanatory variable is zero or, for categorical variables, at their reference level. In other words, when considering individuals who have “BA” (thus indicating that group has a BA degree), the anticipated value of trstlgl is approximately 6.415330 (or rounded to 6.42).

educ_levelNoBA -0.813761： This coefficient represents the discrepancy in the predicted trust value towards legal system between individuals who have attained a BA degree (“BA” category) and those who have not attained a BA degree (“No BA” category). Specifically, being categorized as “NO BA” (i.e., who have not attained a BA degree) is associated with an approximate decrease of 0.813761 (rounded to 0.81) in the predicted trust value towards the legal system in Germany compared to being categorized as “BA” (i.e., who have attained a BA degree). Therefore, the estimated value of Trust in the legal system for people who do not have a BA degree would be 6.42 - 0.81, which after calculation is 5.61.

Homework 8 _ Lyra Dong

Lyra Dong

2023-11-15