rm(list=ls()); gc() 
##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526276 28.2    1168937 62.5         NA   669417 35.8
## Vcells 967895  7.4    8388608 64.0      16384  1851676 14.2

##Initial set - up

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects", "marginaleffects") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "performance"  "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "broom"        "flextable"    "performance"  "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "scales"       "broom"        "flextable"    "performance"  "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "ggeffects"    "scales"       "broom"        "flextable"    "performance" 
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "marginaleffects" "ggeffects"       "scales"          "broom"          
##  [5] "flextable"       "performance"     "survey"          "survival"       
##  [9] "Matrix"          "grid"            "effects"         "carData"        
## [13] "modelsummary"    "fst"             "infer"           "lubridate"      
## [17] "forcats"         "stringr"         "dplyr"           "purrr"          
## [21] "readr"           "tidyr"           "tibble"          "ggplot2"        
## [25] "tidyverse"       "stats"           "graphics"        "grDevices"      
## [29] "utils"           "datasets"        "methods"         "base"
ess <- read_fst("All-ESS-Data.fst")
france_data <- read.fst("france_data.fst")

Research Question: How does education relate to trust in France ?

table(france_data$trstplt)
## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88 
## 2642 1566 2738 2959 2580 3615 1460  850  362   71   49   20  126
table(france_data$educ.ba)
## < table of extent 0 >
france_data <- france_data %>%
  mutate(
    trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt)
  )
france_data$education <- france_data$educ.ba
france_data <- france_data %>%

  mutate(
    educ.ba = case_when(
      essround < 5 & edulvla == 5 ~ "BA or more",
      essround >= 5 & edulvlb > 600 ~ "BA or more",
      TRUE ~ "No BA"
    ),
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
    edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
    educ.ba = factor(educ.ba, levels = c("No BA", "BA or more"))
  )
trust_by_educ_france <- france_data %>%
  group_by(educ.ba) %>%
  summarize(mean_trust = mean(trstplt, na.rm = TRUE))
trust_by_educ_france
## # A tibble: 2 × 2
##   educ.ba    mean_trust
##   <fct>           <dbl>
## 1 No BA            3.22
## 2 BA or more       3.73
ggplot(trust_by_educ_france, aes(x = educ.ba, y = mean_trust)) +
  geom_line(color = "pink", size = 1) +
  geom_point(color = "black", size = 3) +
  labs(title = "Education in Relation to Trust", 
       x = "Education (BA or more)", 
       y = "Average Trust (0-8 scale)") +
  ylim(0, 10) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

The key predictor here is education with an attained BA and the outcome is “trust”

library(dplyr)
france_data <- france_data %>%
  mutate(
    age = ifelse(agea == 999, NA_real_, agea),
    adulthood = ifelse(age >= 18 & age <= 30, "Early", 
                        if_else(age >= 31, "Later", NA_character_)),
    cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA_character_, as.character(yrbrn)),
    gen = case_when(
      yrbrn >= 1900 & yrbrn <= 1945 ~ "Interwar",
      yrbrn >= 1946 & yrbrn <= 1964 ~ "Baby Boomers",
      yrbrn >= 1965 & yrbrn <= 1979 ~ "Gen X",
      yrbrn >= 1980 & yrbrn <= 1996 ~ "Millennials",
      TRUE ~ NA_character_  # If none of the above conditions are met, return NA
    )
  ) %>%
  mutate(
    gen = factor(gen, levels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
  )

france_data <- france_data %>% mutate( trstplt = trust, education = case_when( educ.ba > 10 ~ NA_real_, # Set values above 10 to NA TRUE ~ educ.ba # Keep other values as is ), Age = case_when( agea > 100 ~ NA_real_, # Set values above 100 to NA agea < 15 ~ NA_real_, # Set values below 15 to NA TRUE ~ agea # Keep other values as is ) ) df <- france_data df <- df %>% filter(!is.na(educ.ba)) df <- df %>% filter(!is.na(Age))

table1b <- datasummary_skim(df %>% dplyr::select(trust, education, age), title = “Table 1. Descriptive statistics for main variables”, output = “flextable”)

table1b

^ would not knit in coded form. However, table1b is included in the written report.

Hypothesis Testing with Infer

Step 1 test_stat <- data.frame %>% specify(explanatory = trust, # change variable name for explanatory variable response = educ.ba) %>% # change variable name for outcome of interest hypothesize(null = “independence”) %>% calculate(stat = “t”) print(test_statstat)’’’print(teststatstat)

Step 2 null_dist <- df %>% specify(response = trust, explanatory = education) %>% hypothesize(null = “independence”) %>% generate(reps = 1000, type = “permute”) %>% calculate(stat = “Chisq”) null_dist

Step 3 p_val <- null_dist %>% # get_pvalue(obs_stat = test_stat, direction = “greater”) p_val

Step 4 conf_int <- null_dist%>% get_confidence_interval(level = 0.95, type = “percentile”)

null_dist %>% visualize(data, bins = 10, method = “simulation”, dens_color = “black”) + shade_p_value(obs_stat = test_stat, direction = “greater”) + shade_confidence_interval(endpoints = conf_int)

^^ codes were not running