FINAL

# List of packages
packages <- c("tidyverse", "fst", "modelsummary", "broom", "sjPlot", "ggplot2", "car", "Lock5Data", "pandoc", "mosaic", "janitor") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
## 
## 载入需要的程序包：carData
## 
## 
## 载入程序包：'car'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## 
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## 
## 载入程序包：'mosaic'
## 
## 
## The following object is masked from 'package:Matrix':
## 
##     mean
## 
## 
## The following objects are masked from 'package:car':
## 
##     deltaMethod, logit
## 
## 
## The following object is masked from 'package:modelsummary':
## 
##     msummary
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## 
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     stat
## 
## 
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## 
## 
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## 
## 
## 
## 载入程序包：'janitor'
## 
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "fst"       "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [16] "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "broom"        "modelsummary" "fst"          "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "sjPlot"       "broom"        "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "sjPlot"       "broom"        "modelsummary" "fst"          "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[7]]
##  [1] "car"          "carData"      "sjPlot"       "broom"        "modelsummary"
##  [6] "fst"          "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "Lock5Data"    "car"          "carData"      "sjPlot"       "broom"       
##  [6] "modelsummary" "fst"          "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "pandoc"       "Lock5Data"    "car"          "carData"      "sjPlot"      
##  [6] "broom"        "modelsummary" "fst"          "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "mosaic"       "mosaicData"   "ggformula"    "Matrix"       "lattice"     
##  [6] "pandoc"       "Lock5Data"    "car"          "carData"      "sjPlot"      
## [11] "broom"        "modelsummary" "fst"          "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "janitor"      "mosaic"       "mosaicData"   "ggformula"    "Matrix"      
##  [6] "lattice"      "pandoc"       "Lock5Data"    "car"          "carData"     
## [11] "sjPlot"       "broom"        "modelsummary" "fst"          "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"

library(dbplyr)

## 
## 载入程序包：'dbplyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql

ess <- read_fst("All-ESS-Data.fst")

switzerland_data <- ess %>% 
  filter(cntry == "CH")

switzerland_data_table_subset <- switzerland_data %>%
  mutate(
    happy = ifelse(happy == 2, 0, ifelse(happy %in% c(7, 8, 9), NA, happy)), 
    
    trstlgl = ifelse(trstlgl %in% c(77, 88, 99), NA, trstlgl) 
  )

summary_table <- datasummary_skim(switzerland_data_table_subset %>% select(happy, trstlgl), output = "flextable")

## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.

## Warning: `type='all'` is only supported for the `tinytable` backend. Set the
##   `type` argument explicitly to suppress this warning.

summary_table

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
happy	10	73	8.2	6.3	0.0	10.0	88.0
trstlgl	12	3	6.4	2.1	0.0	7.0	10.0

# Re-coding education
switzerland_data <- switzerland_data %>%
  mutate(
    # Set 'other' to NA for rounds 1-4
    edulvla = case_when(
      essround < 5 & edulvla == 55 ~ NA_real_,
      TRUE ~ edulvla
    ),
    # Set 'other' to NA for rounds 5 and later
    edulvlb = case_when(
      essround >= 5 & edulvlb == 5555 ~ NA_real_,
      TRUE ~ edulvlb
    ),
    # Create educ_level column
    educ_level = case_when(
      essround < 5 & edulvla == 5 ~ "BA",
      essround >= 5 & edulvlb > 600 ~ "BA",
      TRUE ~ "No BA"
    )
  )

# View the table of educ_level
table(switzerland_data$educ_level)

## 
##    BA No BA 
##  3856 13069

switzerland_data <- switzerland_data %>%
  mutate(religion = case_when(
    rlgblg == 2 ~ "No",
    rlgblg == 1 ~ "Yes",
    rlgblg %in% c(7, 8, 9) ~ NA_character_,
    TRUE ~ as.character(rlgblg)
  ),
  
  # Recoding gender
    gndr = case_when(
      gndr == 1 ~ "Male",
      gndr == 2 ~ "Female",
      gndr == 9 ~ NA_character_,
      TRUE ~ as.character(gndr)
  
    )
  )



# check
table(switzerland_data$religion)

## 
##    No   Yes 
##  6008 10855

table(switzerland_data$gndr)

## 
## Female   Male 
##   8729   8195

# compare to check (original variable name)
table(switzerland_data$rlgblg)

## 
##     1     2     7     8 
## 10855  6008     8    54

table(switzerland_data$gndr)

## 
## Female   Male 
##   8729   8195

switzerland_data <- switzerland_data %>%
  mutate(pdwrk_recode = case_when(
    pdwrk == 1 ~ 'yes',
    pdwrk == 0 ~ 'no', 
  ))

# check the recoded variable
table(switzerland_data$pdwrk_recode)

## 
##    no   yes 
##  6610 10315

# compare with the original variable
table(switzerland_data$pdwrk)

## 
##     0     1 
##  6610 10315

datasummary_skim(switzerland_data %>% select(happy, trstlgl, educ_level, rlgblg, pdwrk, gndr))

tinytable_cupdyzmiayv3d2sa3udd

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
happy	13	0	8.2	3.3	0.0	8.0	88.0
trstlgl	13	0	8.5	13.0	0.0	7.0	88.0
rlgblg	4	0	1.4	0.6	1.0	1.0	8.0
pdwrk	2	0	0.6	0.5	0.0	1.0	1.0
		N	%
educ_level	BA	3856	22.8
	No BA	13069	77.2
gndr	Female	8729	51.6
	Male	8195	48.4
	NA	1	0.0

# Create a histogram of life expectancy
ggplot(switzerland_data, aes(x = trstlgl)) +
  geom_histogram(binwidth = 1, fill = "pink", color = "black", alpha = 0.3) + # Change bindwith and alpha values to see what happens
  labs(title = "Histogram of Trust in Legal System",
       
       x = "trstlgl", stat = "Count") + # Add labels and title
  theme_minimal() # Use a minimal theme for a clean look

# Create box plots for life expectancy by continent
ggplot(switzerland_data, aes(x = happy, y = trstlgl, fill = gndr)) +
  geom_boxplot() + # Create boxplot, fill colors by continent
  labs(title = "Trust in the Legal System based on happy",
       x = "happy", y = "Trust in Legal system") + # Add labels and title
  theme_minimal() # Use a minimal theme for a clean look

# double check clean
switzerland_clean <- switzerland_data %>%
  filter(!is.na(happy) & !is.na(trstlgl))

# calculate conditional probabilities
switzerland_probs <- switzerland_clean %>%
  count(trstlgl, happy) %>%
  group_by(happy) %>%
  mutate(prob = n / sum(n))
# plot
ggplot(switzerland_probs, aes(x = as.factor(trstlgl), y = prob, color = happy)) +
  geom_point() +
  geom_line(aes(group = happy)) +
  labs(title = "Conditional Probabilities of Trust in Legal System",
       subtitle = "Based on happy",
       x = "Trust Scale", 
       y = "Probability") +
  theme_minimal()

# Ensure trust_legal_system is a factor with levels 0 and 1
switzerland_data$trstlgl <- as.factor(switzerland_data$trstlgl)

# Fit a logistic regression model
model_logistic <- glm(trstlgl ~ happy, data = switzerland_data, family = binomial)

# Summarize the logistic model
summary(model_logistic)

## 
## Call:
## glm(formula = trstlgl ~ happy, family = binomial, data = switzerland_data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.30000    0.29020  11.372   <2e-16 ***
## happy        0.09626    0.03624   2.656   0.0079 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2893.1  on 16924  degrees of freedom
## Residual deviance: 2885.7  on 16923  degrees of freedom
## AIC: 2889.7
## 
## Number of Fisher Scoring iterations: 7

coef(model_logistic)

## (Intercept)       happy 
##  3.29999865  0.09626459

exp(coef(model_logistic))

## (Intercept)       happy 
##    27.11260     1.10105

modelsummary(model_logistic, 
             stars = TRUE,  # Include significance stars
             statistic = "std.error",  # Display standard errors
             output = "html")

tinytable_uqqqy9m05a5fuclx9ngw

	(1)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	3.300***
	(0.290)
happy	0.096**
	(0.036)
Num.Obs.	16925
AIC
BIC
Log.Lik.
RMSE	0.13

m.logit2 <- glm(trstlgl ~ happy + educ_level, data=switzerland_data, family=binomial(link="logit"))
summary(m.logit2)

## 
## Call:
## glm(formula = trstlgl ~ happy + educ_level, family = binomial(link = "logit"), 
##     data = switzerland_data)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      4.04245    0.33840  11.946  < 2e-16 ***
## happy            0.08737    0.03610   2.420   0.0155 *  
## educ_levelNo BA -0.80697    0.18621  -4.334 1.47e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2893.1  on 16924  degrees of freedom
## Residual deviance: 2862.8  on 16922  degrees of freedom
## AIC: 2868.8
## 
## Number of Fisher Scoring iterations: 7

# Log-odds ratios
coef(m.logit2)

##     (Intercept)           happy educ_levelNo BA 
##      4.04245393      0.08737069     -0.80697116

# Odds ratios
odds_ratios <- exp(coef(m.logit2))
knitr::kable(odds_ratios, col.names = "Odds Ratio", digits = 4)

	Odds Ratio
(Intercept)	56.9660
happy	1.0913
educ_levelNo BA	0.4462

modelsummary(m.logit2, 
             stars = TRUE,  # Include significance stars
             statistic = "std.error",  # Display standard errors
             output = "html")

tinytable_cqvqmyhckh9u6c9i0syr

	(1)
+ p < 0.1, * p < 0.05, p < 0.01, * p < 0.001
(Intercept)	4.042***
	(0.338)
happy	0.087*
	(0.036)
educ_levelNo BA	-0.807***
	(0.186)
Num.Obs.	16925
AIC
BIC
Log.Lik.
RMSE	0.13

library(effects)

## Use the command
##     lattice::trellis.par.set(effectsTheme())
##   to customize lattice options for effects plots.
## See ?effectsTheme for details.

plot(allEffects(m.logit2), multiline = TRUE)

FINAL

Lyra Dong

2024-08-10