rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526513 28.2 1169600 62.5 NA 669445 35.8
## Vcells 968369 7.4 8388608 64.0 16384 1851710 14.2
# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales", "broom") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'aod'
##
##
## The following object is masked from 'package:survival':
##
## rats
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Registered S3 methods overwritten by 'broom':
## method from
## tidy.glht jtools
## tidy.summary.glht jtools
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "aod" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "interactions" "aod" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "kableExtra" "interactions" "aod" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "flextable" "kableExtra" "interactions" "aod" "MASS"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "scales" "flextable" "kableExtra" "interactions" "aod"
## [6] "MASS" "survey" "survival" "Matrix" "grid"
## [11] "effects" "carData" "modelsummary" "fst" "infer"
## [16] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [26] "stats" "graphics" "grDevices" "utils" "datasets"
## [31] "methods" "base"
##
## [[13]]
## [1] "broom" "scales" "flextable" "kableExtra" "interactions"
## [6] "aod" "MASS" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "fst"
## [16] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
france_data <- read.fst("france_data.fst")
table(france_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 1374 779 1703 2228 2357 3563 2125 2298 1669 398 267 26 251
table(france_data$edulvla)
##
## 1 2 3 4 5 77 88
## 1449 1018 2879 13 2003 2 4
table(france_data$edulvlb)
##
## 0 113 213 313 321 323 413 421 510 520 610 620 710 720 800 5555
## 64 1826 1013 1365 2827 749 54 83 173 1267 114 485 475 1047 97 17
## 7777 8888
## 10 4
table(france_data$lrscale)
##
## 0 1 2 3 4 5 6 7 8 9 10 77 88
## 945 499 1269 1996 1838 5274 1594 1749 1458 409 686 381 940
france_clean <- france_data %>%
mutate(
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
# Handle NAs for education levels
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
lrscale = ifelse(lrscale %in% c(77, 88, 99), NA, lrscale) # Convert lrscale values
)
table(france_clean$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 1374 779 1703 2228 2357 3563 2125 2298 1669 398 267
table(france_clean$edulvla)
##
## 1 2 3 4 5
## 1449 1018 2879 13 2003
table(france_clean$edulvlb)
##
## 0 113 213 313 321 323 413 421 510 520 610 620 710 720 800
## 64 1826 1013 1365 2827 749 54 83 173 1267 114 485 475 1047 97
table(france_clean$lrscale)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 945 499 1269 1996 1838 5274 1594 1749 1458 409 686
table_summary <- datasummary_skim(france_clean %>% dplyr::select(stfdem, edulvla, edulvlb, lrscale))
table_summary
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max | ||
|---|---|---|---|---|---|---|---|---|
| stfdem | 12 | 1 | 4.6 | 2.4 | 0.0 | 5.0 | 10.0 | |
| edulvla | 6 | 61 | 3.0 | 1.4 | 1.0 | 3.0 | 5.0 | |
| edulvlb | 16 | 39 | 373.1 | 193.8 | 0.0 | 321.0 | 800.0 | |
| lrscale | 12 | 7 | 4.9 | 2.4 | 0.0 | 5.0 | 10.0 |
table1 <- datasummary_skim(france_clean %>% dplyr::select(stfdem, edulvla, edulvlb, lrscale), title = "Table 1. Descriptive statistics of human values items", output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
table1
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
stfdem | 12 | 1 | 4.6 | 2.4 | 0.0 | 5.0 | 10.0 |
edulvla | 6 | 61 | 3.0 | 1.4 | 1.0 | 3.0 | 5.0 |
edulvlb | 16 | 39 | 373.1 | 193.8 | 0.0 | 321.0 | 800.0 |
lrscale | 12 | 7 | 4.9 | 2.4 | 0.0 | 5.0 | 10.0 |
france_clean <- france_data %>%
mutate(
# Recoding education
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"
),
# Handle NAs for education levels
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
# Explicitly making 'No BA' the reference category
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
)
test_stat <- france_clean %>%
specify(explanatory = educ.ba, # change variable name for explanatory variable
response = stfdem) %>% # change variable name for outcome of interest
hypothesize(null = "independence") %>%
calculate(stat = "t") # replace in between quotation marks appropriate test statistic
## Warning: Removed 277 rows containing missing values.
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "No BA" - "BA or more", or divided in the order "No BA" / "BA or more"
## for ratio-based statistics. To specify this order yourself, supply `order =
## c("No BA", "BA or more")` to the calculate() function.
print(test_stat$stat)
## t
## -18.12345
null_distribution <- france_clean %>%
specify(explanatory = educ.ba,
response = stfdem) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>% # only line we are adding, use reps = 1000 as standard but change if too computationally demanding
calculate(stat = "t")
## Warning: Removed 277 rows containing missing values.
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "No BA" - "BA or more", or divided in the order "No BA" / "BA or more"
## for ratio-based statistics. To specify this order yourself, supply `order =
## c("No BA", "BA or more")` to the calculate() function.
p_val <- null_distribution %>% # replace name here if you assigned something other than null_distribution above
get_pvalue(obs_stat = test_stat, direction = "two-sided") # would only replace test_stat if assigned another name in Step 1
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
conf_int <- null_distribution %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_distribution %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided") +
shade_confidence_interval(endpoints = conf_int)
null_distribution
## Response: stfdem (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 -0.169
## 2 2 0.594
## 3 3 -0.446
## 4 4 -0.488
## 5 5 0.995
## 6 6 0.931
## 7 7 0.450
## 8 8 0.479
## 9 9 0.0388
## 10 10 -0.781
## # ℹ 990 more rows
model_data <- france_data %>%
mutate(
lrscale = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 7:10 ~ "Right",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% c(77, 88, 99) ~ NA_character_
),
# Recoding education
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA"
),
# Handle NAs for education levels
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
# Explicitly making 'No BA' the reference category
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
)
model1 <- lm(stfdem ~ educ.ba, data = model_data)
model2 <- lm(stfdem ~ lrscale, data = model_data)
model3 <- lm(stfdem ~ educ.ba * lrscale, data = model_data)
models <- list(
"Model 1" = lm(stfdem ~ educ.ba, data = model_data),
"Model 2" = lm(stfdem ~ lrscale, data = model_data),
"Model 3" = lm(stfdem ~ educ.ba * lrscale, data = model_data))
modelsummary(models, fmt = 1,
estimate = c("{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
title = 'Table 2. Regression models for the satisfaction with democracy in France')
| Model 1 | Â Model 2 | Â Model 3 | |
|---|---|---|---|
| (Intercept) | 4.4 (0.0)*** | 4.3 (0.0)*** | 4.1 (0.0)*** |
| educ.baBA or more | 0.7 (0.0)*** | 0.6 (0.1)*** | |
| lrscaleModerate | 0.4 (0.0)*** | 0.4 (0.1)*** | |
| lrscaleRight | 0.7 (0.1)*** | 0.8 (0.1)*** | |
| educ.baBA or more × lrscaleModerate | 0.3 (0.1)** | ||
| educ.baBA or more × lrscaleRight | 0.1 (0.1) | ||
| Num.Obs. | 18761 | 17564 | 17564 |
| R2 | 0.017 | 0.012 | 0.032 |
| R2 Adj. | 0.017 | 0.012 | 0.031 |
| AIC | 86113.6 | 80407.3 | 80060.4 |
| BIC | 86137.1 | 80438.4 | 80114.8 |
| Log.Lik. | −43053.781 | −40199.631 | −40023.192 |
| RMSE | 2.40 | 2.39 | 2.36 |
interaction_plot <- effect("educ.ba*lrscale", model3, na.rm=TRUE)
plot(interaction_plot,
main="Interaction effect",
xlab="Left right scale",
ylab="satisfaction with democracy")
interaction_plot
##
## educ.ba*lrscale effect
## lrscale
## educ.ba Left Moderate Right
## No BA 4.104274 4.464196 4.873706
## BA or more 4.743867 5.378216 5.621839