# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "broom" "modelsummary" "fst" "infer" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
ess <- read_fst("All-ESS-Data.fst")
Task 1
belgium_data <- ess %>%
filter(cntry == "BE") %>%
mutate(
trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep),
wrkprty = case_when(
wrkprty == 1 ~ "Yes",
wrkprty == 2 ~ "No",
wrkprty %in% c(7,8,9) ~ NA_character_,
TRUE ~ as.character(wrkprty)
)
)
belgium_data <- belgium_data %>% filter(!is.na(trstep))
belgium_data <- belgium_data %>% filter(!is.na(wrkprty))
table(belgium_data$trstep)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1034 512 917 1303 1442 3589 2498 2528 1373 280 122
table(belgium_data$wrkprty)
##
## No Yes
## 14871 727
model1 <- lm(trstep ~ wrkprty, data = belgium_data)
coefficients <- coef(model1)
print(coefficients)
## (Intercept) wrkprtyYes
## 4.9405554 0.4308338
(Intercept): 4.940 wrkprty Yes: 0.430
tidy(model1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4.94 0.0187 264. 0
## 2 wrkprtyYes 0.431 0.0867 4.97 0.000000683
tidy(model1) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 4.941 | 0.019 | 263.892 | 0 |
| wrkprtyYes | 0.431 | 0.087 | 4.968 | 0 |
summary(model1)
##
## Call:
## lm(formula = trstep ~ wrkprty, data = belgium_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3714 -0.9406 0.0594 2.0594 5.0594
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.94056 0.01872 263.892 < 2e-16 ***
## wrkprtyYes 0.43083 0.08672 4.968 6.83e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.283 on 15596 degrees of freedom
## Multiple R-squared: 0.00158, Adjusted R-squared: 0.001516
## F-statistic: 24.68 on 1 and 15596 DF, p-value: 6.832e-07
remotes::install_github("datalorax/equatiomatic")
## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
## Use `force = TRUE` to force installation
equatiomatic::extract_eq(model1, use_coefs = TRUE)
\[ \operatorname{\widehat{trstep}} = 4.94 + 0.43(\operatorname{wrkprty}_{\operatorname{Yes}}) \]
If a person hasn’t worked in a political party or action group in the last 12 months, their predicted average trust in the European Parliament is 4.94. If a person has worked in a political party or action group in the last 12 months, their predicted average trust in politicians is 5.37.
Task 2
bulgaria_data <- ess %>%
filter(cntry == "BG") %>%
mutate(
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
native = recode(brncntr,
`1` = "Yes",
`2` = "No",
`7` = NA_character_,
`8` = NA_character_,
`9` = NA_character_)
)
bulgaria_data <- bulgaria_data %>% filter(!is.na(stfdem))
bulgaria_data <- bulgaria_data %>% filter(!is.na(native))
table(bulgaria_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 2247 1588 1778 1968 1597 1742 640 400 206 86 84
table(bulgaria_data$native)
##
## No Yes
## 97 12239
model2 <- lm(stfdem ~ native, data = bulgaria_data)
coefficients <- coef(model2)
print(coefficients)
## (Intercept) nativeYes
## 2.8041237 0.1189909
tidy(model2)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.80 0.226 12.4 5.30e-35
## 2 nativeYes 0.119 0.227 0.523 6.01e- 1
tidy(model2) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 2.804 | 0.226 | 12.382 | 0.000 |
| nativeYes | 0.119 | 0.227 | 0.523 | 0.601 |
summary(model2)
##
## Call:
## lm(formula = stfdem ~ native, data = bulgaria_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9231 -1.9231 0.0769 2.0769 7.1959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8041 0.2265 12.382 <2e-16 ***
## nativeYes 0.1190 0.2274 0.523 0.601
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.231 on 12334 degrees of freedom
## Multiple R-squared: 2.22e-05, Adjusted R-squared: -5.887e-05
## F-statistic: 0.2739 on 1 and 12334 DF, p-value: 0.6007
The expected average satisfaction with democracy for respondents that were not born in Bulgaria is 2.804.
Task 3
finland_data <- ess %>%
filter(cntry == "FI") %>%
mutate(
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
edulvla = case_when(
essround < 5 & edulvla == 55 ~ NA_real_,
TRUE ~ edulvla
),
edulvlb = case_when(
essround >= 5 & edulvlb == 5555 ~ NA_real_,
TRUE ~ edulvlb
),
educ_level = case_when(
essround < 5 & edulvla == 5 ~ "BA",
essround >= 5 & edulvlb > 600 ~ "BA",
TRUE ~ "No BA"
)
)
finland_data <- finland_data %>% filter(!is.na(happy))
finland_data <- finland_data %>% filter(!is.na(educ_level))
table(finland_data$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 24 26 80 166 231 628 790 2646 6855 6128 1928
table(finland_data$educ_level)
##
## BA No BA
## 5452 14050
model3 <- lm(happy ~ educ_level, data = finland_data)
coefficients <- coef(model3)
print(coefficients)
## (Intercept) educ_levelNo BA
## 8.2435803 -0.2479931
tidy(model3)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 8.24 0.0190 433. 0
## 2 educ_levelNo BA -0.248 0.0224 -11.1 2.39e-28
tidy(model3) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 8.244 | 0.019 | 433.135 | 0 |
| educ_levelNo BA | -0.248 | 0.022 | -11.060 | 0 |
summary(model3)
##
## Call:
## lm(formula = happy ~ educ_level, data = finland_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.2436 -0.2436 0.0044 1.0044 2.0044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.24358 0.01903 433.13 <2e-16 ***
## educ_levelNo BA -0.24799 0.02242 -11.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.405 on 19500 degrees of freedom
## Multiple R-squared: 0.006234, Adjusted R-squared: 0.006183
## F-statistic: 122.3 on 1 and 19500 DF, p-value: < 2.2e-16
remotes::install_github("datalorax/equatiomatic")
## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
## Use `force = TRUE` to force installation
equatiomatic::extract_eq(model3, use_coefs = TRUE)
\[ \operatorname{\widehat{happy}} = 8.24 - 0.25(\operatorname{educ\_level}_{\operatorname{No\ BA}}) \]
If a person has a BA, their predicted average happiness is 8.24. If a person doesn’t have a BA, their predicted average average happiness is 7.99.