Homework_8_YiLin

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "broom") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "broom"        "modelsummary" "fst"          "infer"        "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"

ess <- read_fst("All-ESS-Data.fst")

Task 1

Extract the coefficients and intercept using trust in the European Parliament (as the outcome) and worked in a political party (wrkprty, as the explanatory), for the country of Belgium. Write the regression equation and interpret.

Belgium_data <- ess %>%
  filter(cntry == "BE") %>%
  mutate(trstep = ifelse(trstep %in% c(77, 88, 99), NA, trstep),
  )

table(Belgium_data$trstep)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 1139  577  997 1419 1562 3850 2688 2759 1493  314  132

unique(Belgium_data$trstep)

##  [1]  0  7  8  5  6 NA  4  9  3  1 10  2

Belgium_data <- Belgium_data %>% filter(!is.na(trstep))

Belgium_data <- Belgium_data %>%
  mutate(wrkprty = case_when(
    wrkprty == 1 ~ "Yes",
    wrkprty == 2 ~ "No",
    wrkprty %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(wrkprty)
  ))

table(Belgium_data$wrkprty)

## 
##    No   Yes 
## 14871   727

unique(Belgium_data$wrkprty)

## [1] "No"  "Yes" NA

Belgium_data <- Belgium_data %>% filter(!is.na(wrkprty))

model1 <- lm(trstep ~ wrkprty, data = Belgium_data)

coefficients <- coef(model1)
print(coefficients)

## (Intercept)  wrkprtyYes 
##   4.9405554   0.4308338

tidy(model1)

## # A tibble: 2 × 5
##   term        estimate std.error statistic     p.value
##   <chr>          <dbl>     <dbl>     <dbl>       <dbl>
## 1 (Intercept)    4.94     0.0187    264.   0          
## 2 wrkprtyYes     0.431    0.0867      4.97 0.000000683

tidy(model1) |>
knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value
(Intercept)	4.941	0.019	263.892	0
wrkprtyYes	0.431	0.087	4.968	0

Explanation: (intercept) : 4.94 The intercept represents the predicted value of the outcome variable when the explanatory variable is zero. As can be seen from the graph, the intercept of 4.94 represents the expected value of trust in the European Parliament among people who have not worked for a political party in Belgium. At the same time, according to the realistic value of wrkprtyyes in the chart, it can be seen that the correlation value with the intercept of 4.94 is 0.43, that is, the difference between the two groups of people who have worked in political parties and those who have not worked is 0.43. So according to the graph, if a person does not work for a political party then his trust is 4.94, if he has worked for a political party then his trust is 5.37.

Task 2

Produce a model summary output using either tidy() or summary() (your choice). Use stfdem (satisfaction with democracy) as the outcome and born in country as a predictor (which we recoded/renamed as ‘native’ in the tutorial), for the country of Bulgaria. What is the expected average satisfaction with democracy for respondents that were not born in Bulgaria?

Bulgaria_data <- ess %>%
filter(cntry == "BG") %>%
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)

table(Bulgaria_data$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 2247 1589 1778 1969 1597 1742  640  400  206   86   84

unique(Bulgaria_data$stfdem)

##  [1]  0  1 NA  2  3  5  4  6  8  7 10  9

Bulgaria_data <- Bulgaria_data %>% filter(!is.na(stfdem))

Bulgaria_data <- Bulgaria_data %>%
mutate(
  native = recode(brncntr,
                 `1` = "Yes",
                 `2` = "No",
                 `7` = NA_character_,
                 `8` = NA_character_,
                 `9` = NA_character_)
 )

table(Bulgaria_data$native)

## 
##    No   Yes 
##    97 12239

unique(Bulgaria_data$native)

## [1] "Yes" "No"  NA

Bulgaria_data <- Bulgaria_data %>% filter(!is.na(native))

model2 <- lm(stfdem ~ native, data = Bulgaria_data)

coefficients <- coef(model2)
print(coefficients)

## (Intercept)   nativeYes 
##   2.8041237   0.1189909

summary(model2)

## 
## Call:
## lm(formula = stfdem ~ native, data = Bulgaria_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9231 -1.9231  0.0769  2.0769  7.1959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.8041     0.2265  12.382   <2e-16 ***
## nativeYes     0.1190     0.2274   0.523    0.601    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.231 on 12334 degrees of freedom
## Multiple R-squared:  2.22e-05,   Adjusted R-squared:  -5.887e-05 
## F-statistic: 0.2739 on 1 and 12334 DF,  p-value: 0.6007

As can be seen from the table, respondents who were not born in Bulgaria have an expected average satisfaction with democracy of 2.80, while those who were born in Bulgaria have a value of 2.91, and the stfdem value is intercept plus native (yes).

Task 3

Based on your own outcome and country of interest, as well as one of the predictors recoded in the tutorial, produce an equatiomatic regression equation. Interpret the regression equation.

Austria_data <- ess %>%
filter(cntry == "AT") %>%
mutate(stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
)

table(Austria_data$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
##  520  285  682 1188 1265 2414 1865 2393 2448 1046  657

unique(Austria_data$stfdem)

##  [1]  8  5  7  0  9  4 NA  3  2  6 10  1

Austria_data <- Austria_data %>% filter(!is.na(stfdem))

Austria_data <- Bulgaria_data %>%
  
  mutate(gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)))
    
table(Austria_data$gndr)

## 
## Female   Male 
##   6819   5517

unique(Austria_data$gndr)

## [1] "Male"   "Female"

Austria_data <- Austria_data %>% filter(!is.na(gndr))

model3 <- lm(stfdem ~ gndr, data = Austria_data)

coefficients <- coef(model3)
print(coefficients)

## (Intercept)    gndrMale 
##  2.88898665  0.07421799

summary(model3)

## 
## Call:
## lm(formula = stfdem ~ gndr, data = Austria_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9632 -1.8890  0.0368  2.0368  7.1110 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.88899    0.02701 106.968   <2e-16 ***
## gndrMale     0.07422    0.04039   1.838   0.0661 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.23 on 12334 degrees of freedom
## Multiple R-squared:  0.0002737,  Adjusted R-squared:  0.0001927 
## F-statistic: 3.377 on 1 and 12334 DF,  p-value: 0.06612

remotes::install_github("datalorax/equatiomatic")

## Skipping install of 'equatiomatic' from a github remote, the SHA1 (29ff168f) has not changed since last install.
##   Use `force = TRUE` to force installation

equatiomatic::extract_eq(model3, use_coefs = TRUE)

\[ \operatorname{\widehat{stfdem}} = 2.89 + 0.07(\operatorname{gndr}_{\operatorname{Male}}) \] As can be seen from the table, women’s satisfaction with the way Australian democracy works is about 2.88, while the difference between men’s satisfaction and women’s satisfaction is 0.074, so men’s satisfaction with Australian political parties is about 2.95. The expression of this equation means that Australians’ satisfaction with the way the country’s political parties are run is equal to women’s satisfaction intercept plus gndrmale satisfaction.

Homework_8_YiLin_Zhou

2023-11-19

Task 1

Task 2

Task 3