rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526276 28.2 1168937 62.5 NA 669417 35.8
## Vcells 967895 7.4 8388608 64.0 16384 1851676 14.2
##Initial set - up
# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects", "marginaleffects") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "performance" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "flextable" "performance" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "broom" "flextable" "performance" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "scales" "broom" "flextable" "performance" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "ggeffects" "scales" "broom" "flextable" "performance"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "marginaleffects" "ggeffects" "scales" "broom"
## [5] "flextable" "performance" "survey" "survival"
## [9] "Matrix" "grid" "effects" "carData"
## [13] "modelsummary" "fst" "infer" "lubridate"
## [17] "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2"
## [25] "tidyverse" "stats" "graphics" "grDevices"
## [29] "utils" "datasets" "methods" "base"
ess <- read_fst("All-ESS-Data.fst")
france_data <- read.fst("france_data.fst")
table(france_data$trstplt)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 2642 1566 2738 2959 2580 3615 1460 850 362 71 49 20 126
table(france_data$educ.ba)
## < table of extent 0 >
france_data <- france_data %>%
mutate(
trstplt = ifelse(trstplt %in% c(77, 88, 99), NA, trstplt)
)
france_data$education <- france_data$educ.ba
france_data <- france_data %>%
mutate(
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"
),
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more"))
)
trust_by_educ_france <- france_data %>%
group_by(educ.ba) %>%
summarize(mean_trust = mean(trstplt, na.rm = TRUE))
trust_by_educ_france
## # A tibble: 2 × 2
## educ.ba mean_trust
## <fct> <dbl>
## 1 No BA 3.22
## 2 BA or more 3.73
ggplot(trust_by_educ_france, aes(x = educ.ba, y = mean_trust)) +
geom_line(color = "pink", size = 1) +
geom_point(color = "black", size = 3) +
labs(title = "Education in Relation to Trust",
x = "Education (BA or more)",
y = "Average Trust (0-8 scale)") +
ylim(0, 10) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
The key predictor here is education with an attained BA and the outcome is “trust”
library(dplyr)
france_data <- france_data %>%
mutate(
age = ifelse(agea == 999, NA_real_, agea),
adulthood = ifelse(age >= 18 & age <= 30, "Early",
if_else(age >= 31, "Later", NA_character_)),
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA_character_, as.character(yrbrn)),
gen = case_when(
yrbrn >= 1900 & yrbrn <= 1945 ~ "Interwar",
yrbrn >= 1946 & yrbrn <= 1964 ~ "Baby Boomers",
yrbrn >= 1965 & yrbrn <= 1979 ~ "Gen X",
yrbrn >= 1980 & yrbrn <= 1996 ~ "Millennials",
TRUE ~ NA_character_ # If none of the above conditions are met, return NA
)
) %>%
mutate(
gen = factor(gen, levels = c("Interwar", "Baby Boomers", "Gen X", "Millennials"))
)
france_data <- france_data %>% mutate( trstplt = trust, education = case_when( educ.ba > 10 ~ NA_real_, # Set values above 10 to NA TRUE ~ educ.ba # Keep other values as is ), Age = case_when( agea > 100 ~ NA_real_, # Set values above 100 to NA agea < 15 ~ NA_real_, # Set values below 15 to NA TRUE ~ agea # Keep other values as is ) ) df <- france_data df <- df %>% filter(!is.na(educ.ba)) df <- df %>% filter(!is.na(Age))
table1b <- datasummary_skim(df %>% dplyr::select(trust, education, age), title = “Table 1. Descriptive statistics for main variables”, output = “flextable”)
table1b
^ would not knit in coded form. However, table1b is included in the written report.
Step 1 test_stat <- data.frame %>% specify(explanatory = trust, # change variable name for explanatory variable response = educ.ba) %>% # change variable name for outcome of interest hypothesize(null = “independence”) %>% calculate(stat = “t”) print(test_statstat)’’’print(teststatstat)
Step 2 null_dist <- df %>% specify(response = trust, explanatory = education) %>% hypothesize(null = “independence”) %>% generate(reps = 1000, type = “permute”) %>% calculate(stat = “Chisq”) null_dist
Step 3 p_val <- null_dist %>% # get_pvalue(obs_stat = test_stat, direction = “greater”) p_val
Step 4 conf_int <- null_dist%>% get_confidence_interval(level = 0.95, type = “percentile”)
null_dist %>% visualize(data, bins = 10, method = “simulation”, dens_color = “black”) + shade_p_value(obs_stat = test_stat, direction = “greater”) + shade_confidence_interval(endpoints = conf_int)
^^ codes were not running