Task 1

packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]

if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "remotes"      "broom"        "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"
setwd("C:/Users/Erika/Desktop/SOC_202_YAY"
)

library(fst) 

ess <- read_fst("All-ESS-Data.fst")
belgium_data <- ess %>%
 filter(cntry == "BE") %>%
 mutate(
 wrkprty = case_when(
     wrkprty == 1 ~ "Yes",
     wrkprty == 2 ~ "No",
     wrkprty == 9 ~ NA_character_,
     TRUE ~ as.character(wrkprty)
 ),
 trust = recode(as.character(trstep), 
            '1' = "No trust", 
            '2' = "2", 
            '3' = "3", 
            '4' = "4", 
            '5' = "5",
            '7' = NA_character_,
            '8' = NA_character_,
            '9' = NA_character_),
)
table(belgium_data$wrkprty)
## 
##     7     8    No   Yes 
##     8     6 15354   738
table(belgium_data$trust)
## 
##        0       10        2        3        4        5        6       77 
##     1139      132      997     1419     1562     3850     2688       14 
##       88       99 No trust 
##      499        8      577
model1 <- lm(trstep ~ wrkprty, data = belgium_data)

coefficients <- coef(model1)

print(coefficients)
## (Intercept)    wrkprty8   wrkprtyNo  wrkprtyYes 
##    4.500000   13.833333    3.049108    2.088076

Task 2

bulgaria_data <- ess %>% 
 filter(cntry == "BG") %>% 
 mutate(
 stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
 brncntr = ifelse(brncntr %in% c(77, 88, 99), NA, brncntr),
 )
table(bulgaria_data$stfdem)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 2247 1589 1778 1969 1597 1742  640  400  206   86   84
table(bulgaria_data$brncntr)
## 
##     1     2     7 
## 13134   104     2
model1 <- lm(stfdem ~ brncntr, data = bulgaria_data)
 coefficients <- coef(model1)
 print(coefficients)
## (Intercept)     brncntr 
##   3.0571188  -0.1339063
tidy(model1)
## # A tibble: 2 × 5
##   term        estimate std.error statistic  p.value
##   <chr>          <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)    3.06      0.175    17.5   1.01e-67
## 2 brncntr       -0.134     0.172    -0.778 4.36e- 1
tidy(model1) |>
 knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 3.057 0.175 17.496 0.000
brncntr -0.134 0.172 -0.778 0.436
summary(model1)
## 
## Call:
## lm(formula = stfdem ~ brncntr, data = bulgaria_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9232 -1.9232  0.0768  2.0768  7.2107 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.0571     0.1747  17.496   <2e-16 ***
## brncntr      -0.1339     0.1721  -0.778    0.436    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.23 on 12336 degrees of freedom
##   (902 observations deleted due to missingness)
## Multiple R-squared:  4.91e-05,   Adjusted R-squared:  -3.196e-05 
## F-statistic: 0.6057 on 1 and 12336 DF,  p-value: 0.4364

Task 3

spain_data <- ess %>%
     filter(cntry == "ES") %>%
 mutate(trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt), 
 )

unique(spain_data$trstplt)
##  [1]  1  2  0  3 NA  5  4  7  6  8 10  9
spain_data <- spain_data %>% filter(!is.na(trstplt))
spain_data <- spain_data %>%
    mutate(
 edulvla = case_when(
     essround < 5 & edulvla == 55 ~ NA_real_,
     TRUE ~ edulvla
 ),
 edulvlb = case_when(
     essround >= 5 & edulvlb == 5555 ~ NA_real_,
     TRUE ~ edulvlb
 ),
 edulvlfa = case_when(
     essround < 5 & edulvlfa == 55 ~ NA_real_,
     TRUE ~ edulvlfa
 ),
 edulvlfb = case_when(
     essround >= 5 & edulvlfb == 5555 ~ NA_real_,
     TRUE ~ edulvlfb
 ),
 edulvlma = case_when(
     essround < 5 & edulvlma == 55 ~ NA_real_,
     TRUE ~ edulvlma
 ),
 edulvlmb = case_when(
     essround >= 5 & edulvlmb == 5555 ~ NA_real_,
     TRUE ~ edulvlmb
 ),
 educ_level = case_when(
     essround < 5 & edulvla == 5 ~ "BA",
     essround >= 5 & edulvlb > 600 ~ "BA",
     TRUE ~ "No BA"
 ),
 educ_level_father = case_when(
     essround < 5 & edulvlfa == 5 ~ "BA",
     essround >= 5 & edulvlfb > 600 ~ "BA",
     TRUE ~ "No BA"
 ),
 educ_level_mother = case_when(
     essround < 5 & edulvlma == 5 ~ "BA",
     essround >= 5 & edulvlmb > 600 ~ "BA",
     TRUE ~ "No BA"
 ),
 eisced = case_when(
     eisced == 0 | eisced > 7 ~ as.character(NA),
 eisced %in% 1:5 ~ "No BA",           
 eisced == 6 ~ "BA",
 eisced == 7 ~ "MA",
 TRUE ~ as.character(eisced) 
 )
 )
table(spain_data$educ_level)
## 
##    BA No BA 
##  4165 14874
table(spain_data$eisced)
## 
##    BA    MA No BA 
##  1726  2328 14875
table(spain_data$educ_level_father)
## 
##    BA No BA 
##  2096 16943
table(spain_data$educ_level_mother)
## 
##    BA No BA 
##  1263 17776
spain_data <- spain_data %>%
     mutate(
 gndr = case_when(
     gndr == 1 ~ "Male",
     gndr == 2 ~ "Female",
     gndr == 9 ~ NA_character_,
     TRUE ~ as.character(gndr)
 ),
 rlgblg = ifelse(rlgblg %in% c(7, 8), NA, rlgblg),
 religion_binary = ifelse(rlgblg == 1, "Yes", "No"),
 minority = case_when(
     blgetmg %in% c(7, 8, 9) ~ NA_character_,
     blgetmg == 1 ~ "Ethnic Minority",
     blgetmg == 2 ~ "Not Ethnic Minority",
     TRUE ~ as.character(blgetmg)
 ),
 religious = case_when(
     rlgdgr %in% c(77, 88, 99) ~ NA_real_,
     TRUE ~ rlgdgr
 )
 )
table(spain_data$gndr)
## 
## Female   Male 
##   9645   9392
table(spain_data$religion_binary)
## 
##    No   Yes 
##  5162 11541
table(spain_data$minority)
## 
##     Ethnic Minority Not Ethnic Minority 
##                 459               16069
table(spain_data$religious)
## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 3438 1098 1522 1566 1246 3405 1911 1905 1503  551  774
spain_data <- spain_data %>%
     mutate(
 age = ifelse(agea == 999, NA, agea),
 cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
 gen = case_when(
     yrbrn %in% 1900:1945 ~ "1",
     yrbrn %in% 1946:1964 ~ "2",
     yrbrn %in% 1965:1979 ~ "3",
     yrbrn %in% 1980:1996 ~ "4",
     TRUE ~ as.character(cohort)
 ),
 gen = factor(gen,
              levels = c("1", "2", "3", "4"),
              labels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
 )
table(spain_data$age)
## 
##  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33 
##   1  57 211 250 262 246 260 257 274 254 280 262 257 281 269 296 305 304 328 326 
##  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53 
## 343 345 367 348 336 353 340 374 349 369 353 368 383 354 335 337 316 354 333 327 
##  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73 
## 328 306 292 271 286 272 292 239 271 260 236 259 270 222 225 208 233 220 195 185 
##  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93 
## 202 203 179 177 114 146 145 123 115 109  75  64  69  47  40  36  63  16   9  14 
##  94  95  96  97  99 101 102 103 
##   4   4   1   2   1   1   1   1
table(spain_data$cohort)
## 
## 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 
##  129  143  173  137  178  169  172  157  139  149  209  156  186  199  197  211 
## 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 
##  219  225  244  242  256  237  265  247  263  260  285  305  278  333  363  358 
## 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 
##  363  335  358  373  384  405  394  351  378  365  386  380  352  365  359  393 
## 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 
##  357  345  358  319  308  271  268  239  286  230  227  237  186  160  183  162 
## 1994 1995 1996 1997 1998 1999 2000 
##  133  146  131  125  105   85   65
table(spain_data$gen)
## 
##     Interwar Baby Boomers        Gen X  Millennials 
##         3470         5436         5587         3844
model1 <- lm(trstplt ~ educ_level, data = spain_data)

coefficients <- coef(model1)

print(coefficients)
##     (Intercept) educ_levelNo BA 
##       2.8559424      -0.1416086
tidy(model1)
## # A tibble: 2 × 5
##   term            estimate std.error statistic  p.value
##   <chr>              <dbl>     <dbl>     <dbl>    <dbl>
## 1 (Intercept)        2.86     0.0363     78.7  0       
## 2 educ_levelNo BA   -0.142    0.0410     -3.45 0.000560
tidy(model1) |>
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 2.856 0.036 78.735 0.000
educ_levelNo BA -0.142 0.041 -3.451 0.001
summary(model1)
## 
## Call:
## lm(formula = trstplt ~ educ_level, data = spain_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8559 -2.7143  0.1441  2.1441  7.2857 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.85594    0.03627  78.735  < 2e-16 ***
## educ_levelNo BA -0.14161    0.04104  -3.451  0.00056 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.341 on 19037 degrees of freedom
## Multiple R-squared:  0.0006251,  Adjusted R-squared:  0.0005726 
## F-statistic: 11.91 on 1 and 19037 DF,  p-value: 0.0005605
remotes::install_github("datalorax/equatiomatic")
## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation
equatiomatic::extract_eq(model1, use_coefs = TRUE)

\[ \operatorname{\widehat{trstplt}} = 2.86 - 0.14(\operatorname{educ\_level}_{\operatorname{No\ BA}}) \]