Task 1
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes")
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "broom" "modelsummary" "fst" "infer" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "remotes" "broom" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
setwd("C:/Users/Erika/Desktop/SOC_202_YAY"
)
library(fst)
ess <- read_fst("All-ESS-Data.fst")
belgium_data <- ess %>%
filter(cntry == "BE") %>%
mutate(
wrkprty = case_when(
wrkprty == 1 ~ "Yes",
wrkprty == 2 ~ "No",
wrkprty == 9 ~ NA_character_,
TRUE ~ as.character(wrkprty)
),
trust = recode(as.character(trstep),
'1' = "No trust",
'2' = "2",
'3' = "3",
'4' = "4",
'5' = "5",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_),
)
table(belgium_data$wrkprty)
##
## 7 8 No Yes
## 8 6 15354 738
table(belgium_data$trust)
##
## 0 10 2 3 4 5 6 77
## 1139 132 997 1419 1562 3850 2688 14
## 88 99 No trust
## 499 8 577
model1 <- lm(trstep ~ wrkprty, data = belgium_data)
coefficients <- coef(model1)
print(coefficients)
## (Intercept) wrkprty8 wrkprtyNo wrkprtyYes
## 4.500000 13.833333 3.049108 2.088076
Task 2
bulgaria_data <- ess %>%
filter(cntry == "BG") %>%
mutate(
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
brncntr = ifelse(brncntr %in% c(77, 88, 99), NA, brncntr),
)
table(bulgaria_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 2247 1589 1778 1969 1597 1742 640 400 206 86 84
table(bulgaria_data$brncntr)
##
## 1 2 7
## 13134 104 2
model1 <- lm(stfdem ~ brncntr, data = bulgaria_data)
coefficients <- coef(model1)
print(coefficients)
## (Intercept) brncntr
## 3.0571188 -0.1339063
tidy(model1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 3.06 0.175 17.5 1.01e-67
## 2 brncntr -0.134 0.172 -0.778 4.36e- 1
tidy(model1) |>
knitr::kable(digits = 3)
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
3.057
|
0.175
|
17.496
|
0.000
|
|
brncntr
|
-0.134
|
0.172
|
-0.778
|
0.436
|
summary(model1)
##
## Call:
## lm(formula = stfdem ~ brncntr, data = bulgaria_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9232 -1.9232 0.0768 2.0768 7.2107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0571 0.1747 17.496 <2e-16 ***
## brncntr -0.1339 0.1721 -0.778 0.436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.23 on 12336 degrees of freedom
## (902 observations deleted due to missingness)
## Multiple R-squared: 4.91e-05, Adjusted R-squared: -3.196e-05
## F-statistic: 0.6057 on 1 and 12336 DF, p-value: 0.4364
Task 3
spain_data <- ess %>%
filter(cntry == "ES") %>%
mutate(trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt),
)
unique(spain_data$trstplt)
## [1] 1 2 0 3 NA 5 4 7 6 8 10 9
spain_data <- spain_data %>% filter(!is.na(trstplt))
spain_data <- spain_data %>%
mutate(
edulvla = case_when(
essround < 5 & edulvla == 55 ~ NA_real_,
TRUE ~ edulvla
),
edulvlb = case_when(
essround >= 5 & edulvlb == 5555 ~ NA_real_,
TRUE ~ edulvlb
),
edulvlfa = case_when(
essround < 5 & edulvlfa == 55 ~ NA_real_,
TRUE ~ edulvlfa
),
edulvlfb = case_when(
essround >= 5 & edulvlfb == 5555 ~ NA_real_,
TRUE ~ edulvlfb
),
edulvlma = case_when(
essround < 5 & edulvlma == 55 ~ NA_real_,
TRUE ~ edulvlma
),
edulvlmb = case_when(
essround >= 5 & edulvlmb == 5555 ~ NA_real_,
TRUE ~ edulvlmb
),
educ_level = case_when(
essround < 5 & edulvla == 5 ~ "BA",
essround >= 5 & edulvlb > 600 ~ "BA",
TRUE ~ "No BA"
),
educ_level_father = case_when(
essround < 5 & edulvlfa == 5 ~ "BA",
essround >= 5 & edulvlfb > 600 ~ "BA",
TRUE ~ "No BA"
),
educ_level_mother = case_when(
essround < 5 & edulvlma == 5 ~ "BA",
essround >= 5 & edulvlmb > 600 ~ "BA",
TRUE ~ "No BA"
),
eisced = case_when(
eisced == 0 | eisced > 7 ~ as.character(NA),
eisced %in% 1:5 ~ "No BA",
eisced == 6 ~ "BA",
eisced == 7 ~ "MA",
TRUE ~ as.character(eisced)
)
)
table(spain_data$educ_level)
##
## BA No BA
## 4165 14874
table(spain_data$eisced)
##
## BA MA No BA
## 1726 2328 14875
table(spain_data$educ_level_father)
##
## BA No BA
## 2096 16943
table(spain_data$educ_level_mother)
##
## BA No BA
## 1263 17776
spain_data <- spain_data %>%
mutate(
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
),
rlgblg = ifelse(rlgblg %in% c(7, 8), NA, rlgblg),
religion_binary = ifelse(rlgblg == 1, "Yes", "No"),
minority = case_when(
blgetmg %in% c(7, 8, 9) ~ NA_character_,
blgetmg == 1 ~ "Ethnic Minority",
blgetmg == 2 ~ "Not Ethnic Minority",
TRUE ~ as.character(blgetmg)
),
religious = case_when(
rlgdgr %in% c(77, 88, 99) ~ NA_real_,
TRUE ~ rlgdgr
)
)
table(spain_data$gndr)
##
## Female Male
## 9645 9392
table(spain_data$religion_binary)
##
## No Yes
## 5162 11541
table(spain_data$minority)
##
## Ethnic Minority Not Ethnic Minority
## 459 16069
table(spain_data$religious)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 3438 1098 1522 1566 1246 3405 1911 1905 1503 551 774
spain_data <- spain_data %>%
mutate(
age = ifelse(agea == 999, NA, agea),
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
gen = case_when(
yrbrn %in% 1900:1945 ~ "1",
yrbrn %in% 1946:1964 ~ "2",
yrbrn %in% 1965:1979 ~ "3",
yrbrn %in% 1980:1996 ~ "4",
TRUE ~ as.character(cohort)
),
gen = factor(gen,
levels = c("1", "2", "3", "4"),
labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
table(spain_data$age)
##
## 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 1 57 211 250 262 246 260 257 274 254 280 262 257 281 269 296 305 304 328 326
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## 343 345 367 348 336 353 340 374 349 369 353 368 383 354 335 337 316 354 333 327
## 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
## 328 306 292 271 286 272 292 239 271 260 236 259 270 222 225 208 233 220 195 185
## 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 202 203 179 177 114 146 145 123 115 109 75 64 69 47 40 36 63 16 9 14
## 94 95 96 97 99 101 102 103
## 4 4 1 2 1 1 1 1
table(spain_data$cohort)
##
## 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
## 129 143 173 137 178 169 172 157 139 149 209 156 186 199 197 211
## 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
## 219 225 244 242 256 237 265 247 263 260 285 305 278 333 363 358
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977
## 363 335 358 373 384 405 394 351 378 365 386 380 352 365 359 393
## 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993
## 357 345 358 319 308 271 268 239 286 230 227 237 186 160 183 162
## 1994 1995 1996 1997 1998 1999 2000
## 133 146 131 125 105 85 65
table(spain_data$gen)
##
## Interwar Baby Boomers Gen X Millennials
## 3470 5436 5587 3844
model1 <- lm(trstplt ~ educ_level, data = spain_data)
coefficients <- coef(model1)
print(coefficients)
## (Intercept) educ_levelNo BA
## 2.8559424 -0.1416086
tidy(model1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 2.86 0.0363 78.7 0
## 2 educ_levelNo BA -0.142 0.0410 -3.45 0.000560
tidy(model1) |>
knitr::kable(digits = 3)
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
2.856
|
0.036
|
78.735
|
0.000
|
|
educ_levelNo BA
|
-0.142
|
0.041
|
-3.451
|
0.001
|
summary(model1)
##
## Call:
## lm(formula = trstplt ~ educ_level, data = spain_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8559 -2.7143 0.1441 2.1441 7.2857
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.85594 0.03627 78.735 < 2e-16 ***
## educ_levelNo BA -0.14161 0.04104 -3.451 0.00056 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.341 on 19037 degrees of freedom
## Multiple R-squared: 0.0006251, Adjusted R-squared: 0.0005726
## F-statistic: 11.91 on 1 and 19037 DF, p-value: 0.0005605
remotes::install_github("datalorax/equatiomatic")
## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
## Use `force = TRUE` to force installation
equatiomatic::extract_eq(model1, use_coefs = TRUE)
\[
\operatorname{\widehat{trstplt}} = 2.86 -
0.14(\operatorname{educ\_level}_{\operatorname{No\ BA}})
\]