# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom", "remotes") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'remotes' was built under R version 4.3.2
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "broom" "modelsummary" "fst" "infer" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "remotes" "broom" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
#load ESS
setwd("C:/Users/2expl/Desktop/UToronto/Year 2/SOC202")
ess <- read_fst("All-ESS-Data.fst")
#subsetting and filtering data
belgium_data <- ess %>%
filter(cntry == "BE") %>%
mutate(
trstep = case_when(
trstep %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(trstep)
),
trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep),
wrkprty = case_when(
wrkprty == 1 ~ "Yes", # Recode 1 to "Yes"
wrkprty == 2 ~ "No", # Recode 2 to "No"
wrkprty %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(wrkprty)
)
) %>% select(wrkprty, trstep)
belgium_data <- belgium_data %>% filter(!is.na(trstep))
belgium_data <- belgium_data %>% filter(!is.na(wrkprty))
# Producing linier regression model
model_BE <- lm(trstep ~ wrkprty, data = belgium_data)
# Finding Coefficients
coefficientsBE <- coef(model_BE)
print(coefficientsBE)
## (Intercept) wrkprtyYes
## 4.0336688 0.2538055
From the modelling, we can determine that when our explanatory variable (wrkprty) is at its reference outcome (workprty= “NO”), our outcome variable (trstep) has an approximate value of 4.033. However, when our explanatory variable is (wrkprty = yes), this results in a value of +0.25. Therefore, people who have worked for a political organization over the last 12 months report a 0.25 increase in trust in the European parliament compared to those who haven’t.
Reference Category (wrkprty = “no”), trstep = 4.03 Non-Reference Category (wrkprty = “yes”), trstep =
sum(4.0336688 + 0.2538055) #Non-Reference Category (wrkprty = "yes"), trstep =
## [1] 4.287474
# Filtering Selected data
bulg_data <- ess %>%
filter(cntry == "BG") %>%
mutate(
stfdem = case_when(
stfdem %in% c(7, 8, 9) ~ NA_character_,
TRUE ~ as.character(stfdem)
),
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)
bulg_data <- bulg_data %>%
mutate(
# Recoding place of residence
geo = recode(as.character(domicil),
'1' = "Urban",
'2' = "Peri-Urban",
'3' = "Rural",
'4' = "Rural",
'5' = "Rural",
'7' = NA_character_,
'8' = NA_character_,
'9' = NA_character_),
# Recoding if born in the country
native = recode(brncntr,
`1` = "Yes",
`2` = "No",
`7` = NA_character_,
`8` = NA_character_,
`9` = NA_character_)
) %>%
select(native, stfdem)
# Removing NA
bulg_data <- bulg_data %>% filter(!is.na(native))
bulg_data <- bulg_data %>% filter(!is.na(stfdem))
# Modeling
bulg_model1 <- lm(stfdem ~ native, data = bulg_data)
summary(bulg_model1)
##
## Call:
## lm(formula = stfdem ~ native, data = bulg_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6486 -1.6486 0.3514 1.3514 7.5055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4945 0.2069 12.058 <2e-16 ***
## nativeYes 0.1541 0.2077 0.742 0.458
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.974 on 11642 degrees of freedom
## Multiple R-squared: 4.726e-05, Adjusted R-squared: -3.863e-05
## F-statistic: 0.5503 on 1 and 11642 DF, p-value: 0.4582
From the data presented by the summary() function,we can interpret that the value for “stfdem:native” = “no” to be 2.4945 (the same as the intercept value).
# Filtering Selected data
GB_data <- ess %>%
filter(cntry == "GB") %>%
mutate(
trstlgl = case_when(
trstlgl %in% c(77, 88, 99) ~ NA_character_,
TRUE ~ as.character(trstlgl)
),
trstlgl = ifelse(trstlgl %in% c(77, 88, 99), NA, trstlgl),
)
GB_data <- GB_data %>%
mutate(
# Recoding gender
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ as.character(gndr)
)) %>%
select(gndr, trstlgl)
# Removing NA
GB_data <- GB_data %>% filter(!is.na(gndr))
GB_data <- GB_data %>% filter(!is.na(trstlgl))
# Modelling Regression
GB_model1 <- lm(trstlgl ~ gndr, data = GB_data)
# Extracting Coefficients
GB_coefficients <- coef(GB_model1)
print(GB_coefficients)
## (Intercept) gndrMale
## 5.2960824 0.1314312
# extracting equation
equatiomatic::extract_eq(GB_model1, use_coefs = TRUE)
\[ \operatorname{\widehat{trstlgl}} = 5.3 + 0.13(\operatorname{gndr}_{\operatorname{Male}}) \]
from the output equation, we can determine that “gndr=female:trstlgl” = 5.3
we can also understand that “gndr=male:trstlgl” =
sum(5.30 - 0.02)
## [1] 5.28