HW8 SOC202

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"

ess <- read_fst("All-ESS-Data.fst")

Task 1

belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(
    trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep), 
    wrkprty = case_when(
      wrkprty == 1 ~ "Yes",
      wrkprty == 2 ~ "No",
      wrkprty %in% c(7,8,9) ~ NA_character_,
      TRUE ~ as.character(wrkprty)
    )
  )

belgium_data <- belgium_data %>% filter(!is.na(trstep))
belgium_data <- belgium_data %>% filter(!is.na(wrkprty))
table(belgium_data$trstep)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 1034  512  917 1303 1442 3589 2498 2528 1373  280  122

table(belgium_data$wrkprty)

## 
##    No   Yes 
## 14871   727

model1 <- lm(trstep ~ wrkprty, data = belgium_data)

coefficients <- coef(model1)
print(coefficients)

## (Intercept)  wrkprtyYes 
##   4.9405554   0.4308338

(Intercept): 4.940 wrkprty Yes: 0.430

tidy(model1)

## # A tibble: 2 × 5
##   term        estimate std.error statistic     p.value
##   <chr>          <dbl>     <dbl>     <dbl>       <dbl>
## 1 (Intercept)    4.94     0.0187    264.   0          
## 2 wrkprtyYes     0.431    0.0867      4.97 0.000000683

tidy(model1) |>
knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value
(Intercept)	4.941	0.019	263.892	0
wrkprtyYes	0.431	0.087	4.968	0

summary(model1)

## 
## Call:
## lm(formula = trstep ~ wrkprty, data = belgium_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.3714 -0.9406  0.0594  2.0594  5.0594 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.94056    0.01872 263.892  < 2e-16 ***
## wrkprtyYes   0.43083    0.08672   4.968 6.83e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.283 on 15596 degrees of freedom
## Multiple R-squared:  0.00158,    Adjusted R-squared:  0.001516 
## F-statistic: 24.68 on 1 and 15596 DF,  p-value: 6.832e-07

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model1, use_coefs = TRUE)

\[ \operatorname{\widehat{trstep}} = 4.94 + 0.43(\operatorname{wrkprty}_{\operatorname{Yes}}) \]

If a person hasn’t worked in a political party or action group in the last 12 months, their predicted average trust in the European Parliament is 4.94. If a person has worked in a political party or action group in the last 12 months, their predicted average trust in politicians is 5.37.

Task 2

bulgaria_data <- ess %>%
  filter(cntry == "BG") %>%
  mutate(
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), 
    native = recode(brncntr,
                             `1` = "Yes",
                             `2` = "No",
                             `7` = NA_character_,
                             `8` = NA_character_,
                             `9` = NA_character_)
  )

bulgaria_data <- bulgaria_data %>% filter(!is.na(stfdem))
bulgaria_data <- bulgaria_data %>% filter(!is.na(native))
table(bulgaria_data$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 2247 1588 1778 1968 1597 1742  640  400  206   86   84

table(bulgaria_data$native)

## 
##    No   Yes 
##    97 12239

model2 <- lm(stfdem ~ native, data = bulgaria_data)

coefficients <- coef(model2)
print(coefficients)

## (Intercept)   nativeYes 
##   2.8041237   0.1189909

tidy(model2)

## # A tibble: 2 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)    2.80      0.226    12.4   5.30e-35
## 2 nativeYes      0.119     0.227     0.523 6.01e- 1

tidy(model2) |>
knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value
(Intercept)	2.804	0.226	12.382	0.000
nativeYes	0.119	0.227	0.523	0.601

summary(model2)

## 
## Call:
## lm(formula = stfdem ~ native, data = bulgaria_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9231 -1.9231  0.0769  2.0769  7.1959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8041     0.2265  12.382   <2e-16 ***
## nativeYes     0.1190     0.2274   0.523    0.601    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.231 on 12334 degrees of freedom
## Multiple R-squared:  2.22e-05,   Adjusted R-squared:  -5.887e-05 
## F-statistic: 0.2739 on 1 and 12334 DF,  p-value: 0.6007

The expected average satisfaction with democracy for respondents that were not born in Bulgaria is 2.804.

Task 3

finland_data <- ess %>%
  filter(cntry == "FI") %>%
  mutate(
    happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
    edulvla = case_when(
      essround < 5 & edulvla == 55 ~ NA_real_,
      TRUE ~ edulvla
    ),
    edulvlb = case_when(
      essround >= 5 & edulvlb == 5555 ~ NA_real_,
      TRUE ~ edulvlb
    ),
    educ_level = case_when(
      essround < 5 & edulvla == 5 ~ "BA",
      essround >= 5 & edulvlb > 600 ~ "BA",
      TRUE ~ "No BA"
    )
  )

finland_data <- finland_data %>% filter(!is.na(happy))
finland_data <- finland_data %>% filter(!is.na(educ_level))

table(finland_data$happy)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
##   24   26   80  166  231  628  790 2646 6855 6128 1928

table(finland_data$educ_level)

## 
##    BA No BA 
##  5452 14050

model3 <- lm(happy ~ educ_level, data = finland_data)

coefficients <- coef(model3)
print(coefficients)

##     (Intercept) educ_levelNo BA 
##       8.2435803      -0.2479931

tidy(model3)

## # A tibble: 2 × 5
##   term            estimate std.error statistic  p.value
##   <chr>              <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)        8.24     0.0190     433.  0       
## 2 educ_levelNo BA   -0.248    0.0224     -11.1 2.39e-28

tidy(model3) |>
knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value
(Intercept)	8.244	0.019	433.135	0
educ_levelNo BA	-0.248	0.022	-11.060	0

summary(model3)

## 
## Call:
## lm(formula = happy ~ educ_level, data = finland_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2436 -0.2436  0.0044  1.0044  2.0044 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      8.24358    0.01903  433.13   <2e-16 ***
## educ_levelNo BA -0.24799    0.02242  -11.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.405 on 19500 degrees of freedom
## Multiple R-squared:  0.006234,   Adjusted R-squared:  0.006183 
## F-statistic: 122.3 on 1 and 19500 DF,  p-value: < 2.2e-16

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model3, use_coefs = TRUE)

\[ \operatorname{\widehat{happy}} = 8.24 - 0.25(\operatorname{educ\_level}_{\operatorname{No\ BA}}) \]

If a person has a BA, their predicted average happiness is 8.24. If a person doesn’t have a BA, their predicted average average happiness is 7.99.

HW8 SOC202

Tim Liu

2023-11-15