# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "broom" "modelsummary" "fst" "infer" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
ess <- read_fst("All-ESS-Data.fst")
Extract the coefficients and intercept using trust in the European Parliament (as the outcome) and worked in a political party (wrkprty, as the explanatory), for the country of Belgium. Write the regression equation and interpret.
Belgium_data <- ess %>%
filter(cntry == "BE") %>%
mutate(trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep),
)
table(Belgium_data$trstep)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1139 577 997 1419 1562 3850 2688 2759 1493 314 132
unique(Belgium_data$trstep)
## [1] 0 7 8 5 6 NA 4 9 3 1 10 2
Belgium_data <- Belgium_data %>% filter(!is.na(trstep))
Belgium_data <- Belgium_data %>%
mutate(wrkprty = case_when(
wrkprty == 1 ~ "Yes",
wrkprty == 2 ~ "No",
wrkprty %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(wrkprty)
))
table(Belgium_data$wrkprty)
##
## No Yes
## 14871 727
unique(Belgium_data$wrkprty)
## [1] "No" "Yes" NA
Belgium_data <- Belgium_data %>% filter(!is.na(wrkprty))
model1 <- lm(trstep ~ wrkprty, data = Belgium_data)
coefficients <- coef(model1)
print(coefficients)
## (Intercept) wrkprtyYes
## 4.9405554 0.4308338
tidy(model1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4.94 0.0187 264. 0
## 2 wrkprtyYes 0.431 0.0867 4.97 0.000000683
tidy(model1) |>
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 4.941 | 0.019 | 263.892 | 0 |
| wrkprtyYes | 0.431 | 0.087 | 4.968 | 0 |
Explanation: (intercept) : 4.94 The intercept represents the predicted value of the outcome variable when the explanatory variable is zero. As can be seen from the graph, the intercept of 4.94 represents the expected value of trust in the European Parliament among people who have not worked for a political party in Belgium. At the same time, according to the realistic value of wrkprtyyes in the chart, it can be seen that the correlation value with the intercept of 4.94 is 0.43, that is, the difference between the two groups of people who have worked in political parties and those who have not worked is 0.43. So according to the graph, if a person does not work for a political party then his trust is 4.94, if he has worked for a political party then his trust is 5.37.
Produce a model summary output using either tidy() or summary() (your choice). Use stfdem (satisfaction with democracy) as the outcome and born in country as a predictor (which we recoded/renamed as ‘native’ in the tutorial), for the country of Bulgaria. What is the expected average satisfaction with democracy for respondents that were not born in Bulgaria?
Bulgaria_data <- ess %>%
filter(cntry == "BG") %>%
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)
table(Bulgaria_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 2247 1589 1778 1969 1597 1742 640 400 206 86 84
unique(Bulgaria_data$stfdem)
## [1] 0 1 NA 2 3 5 4 6 8 7 10 9
Bulgaria_data <- Bulgaria_data %>% filter(!is.na(stfdem))
Bulgaria_data <- Bulgaria_data %>%
mutate(
native = recode(brncntr,
`1` = "Yes",
`2` = "No",
`7` = NA_character_,
`8` = NA_character_,
`9` = NA_character_)
)
table(Bulgaria_data$native)
##
## No Yes
## 97 12239
unique(Bulgaria_data$native)
## [1] "Yes" "No" NA
Bulgaria_data <- Bulgaria_data %>% filter(!is.na(native))
model2 <- lm(stfdem ~ native, data = Bulgaria_data)
coefficients <- coef(model2)
print(coefficients)
## (Intercept) nativeYes
## 2.8041237 0.1189909
summary(model2)
##
## Call:
## lm(formula = stfdem ~ native, data = Bulgaria_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9231 -1.9231 0.0769 2.0769 7.1959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.8041 0.2265 12.382 <2e-16 ***
## nativeYes 0.1190 0.2274 0.523 0.601
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.231 on 12334 degrees of freedom
## Multiple R-squared: 2.22e-05, Adjusted R-squared: -5.887e-05
## F-statistic: 0.2739 on 1 and 12334 DF, p-value: 0.6007
As can be seen from the table, respondents who were not born in Bulgaria have an expected average satisfaction with democracy of 2.80, while those who were born in Bulgaria have a value of 2.91, and the stfdem value is intercept plus native (yes).
Based on your own outcome and country of interest, as well as one of the predictors recoded in the tutorial, produce an equatiomatic regression equation. Interpret the regression equation.
Austria_data <- ess %>%
filter(cntry == "AT") %>%
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)
table(Austria_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 520 285 682 1188 1265 2414 1865 2393 2448 1046 657
unique(Austria_data$stfdem)
## [1] 8 5 7 0 9 4 NA 3 2 6 10 1
Austria_data <- Austria_data %>% filter(!is.na(stfdem))
Austria_data <- Bulgaria_data %>%
mutate(gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)))
table(Austria_data$gndr)
##
## Female Male
## 6819 5517
unique(Austria_data$gndr)
## [1] "Male" "Female"
Austria_data <- Austria_data %>% filter(!is.na(gndr))
model3 <- lm(stfdem ~ gndr, data = Austria_data)
coefficients <- coef(model3)
print(coefficients)
## (Intercept) gndrMale
## 2.88898665 0.07421799
summary(model3)
##
## Call:
## lm(formula = stfdem ~ gndr, data = Austria_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9632 -1.8890 0.0368 2.0368 7.1110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.88899 0.02701 106.968 <2e-16 ***
## gndrMale 0.07422 0.04039 1.838 0.0661 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.23 on 12334 degrees of freedom
## Multiple R-squared: 0.0002737, Adjusted R-squared: 0.0001927
## F-statistic: 3.377 on 1 and 12334 DF, p-value: 0.06612
remotes::install_github("datalorax/equatiomatic")
## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
## Use `force = TRUE` to force installation
equatiomatic::extract_eq(model3, use_coefs = TRUE)
\[ \operatorname{\widehat{stfdem}} = 2.89 + 0.07(\operatorname{gndr}_{\operatorname{Male}}) \] As can be seen from the table, women’s satisfaction with the way Australian democracy works is about 2.88, while the difference between men’s satisfaction and women’s satisfaction is 0.074, so men’s satisfaction with Australian political parties is about 2.95. The expression of this equation means that Australians’ satisfaction with the way the country’s political parties are run is equal to women’s satisfaction intercept plus gndrmale satisfaction.