# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects","ggformula", "ggplot2", "dplyr", "marginaleffects") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
##
## Change the default backend persistently:
##
## config_modelsummary(factory_default = 'gt')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
##
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Loading required package: ggridges
##
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "performance" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "flextable" "performance" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "broom" "flextable" "performance" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "scales" "broom" "flextable" "performance" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "ggeffects" "scales" "broom" "flextable" "performance"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "ggformula" "ggridges" "ggeffects" "scales" "broom"
## [6] "flextable" "performance" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "fst"
## [16] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
##
## [[13]]
## [1] "ggformula" "ggridges" "ggeffects" "scales" "broom"
## [6] "flextable" "performance" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "fst"
## [16] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
##
## [[14]]
## [1] "ggformula" "ggridges" "ggeffects" "scales" "broom"
## [6] "flextable" "performance" "survey" "survival" "Matrix"
## [11] "grid" "effects" "carData" "modelsummary" "fst"
## [16] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [26] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [31] "datasets" "methods" "base"
##
## [[15]]
## [1] "marginaleffects" "ggformula" "ggridges" "ggeffects"
## [5] "scales" "broom" "flextable" "performance"
## [9] "survey" "survival" "Matrix" "grid"
## [13] "effects" "carData" "modelsummary" "fst"
## [17] "infer" "lubridate" "forcats" "stringr"
## [21] "dplyr" "purrr" "readr" "tidyr"
## [25] "tibble" "ggplot2" "tidyverse" "stats"
## [29] "graphics" "grDevices" "utils" "datasets"
## [33] "methods" "base"
france_data <- read_fst("france_data.fst")
france_clean <- france_data %>%
mutate(
bctprd = ifelse(bctprd %in% c(7, 8, 9), NA, bctprd),
sclact = ifelse(sclact %in% c(7, 8, 9), NA, sclact),
pbldmn = ifelse(pbldmn %in% c(7, 8, 9), NA, pbldmn),
)
france_clean <- france_clean %>% filter(!is.na(bctprd), !is.na(sclact), !is.na(pbldmn))
france_clean <- france_clean %>%
# Relabeling variables for clarity
mutate(
Boycott = bctprd,
Activism = sclact,
Protest = pbldmn
)
table_summary <- datasummary_skim(france_clean %>% select(Boycott, Activism, Protest))
table_summary
| Unique | Missing Pct. | Mean | SD | Min | Median | Max | Histogram | |
|---|---|---|---|---|---|---|---|---|
| Boycott | 2 | 0 | 1.7 | 0.5 | 1.0 | 2.0 | 2.0 | |
| Activism | 5 | 0 | 3.0 | 0.8 | 1.0 | 3.0 | 5.0 | |
| Protest | 2 | 0 | 1.9 | 0.4 | 1.0 | 2.0 | 2.0 |
table1 <- datasummary_skim(france_clean %>% dplyr::select(Boycott, Activism, Protest), title = "Table 1. Descriptive statistics for main variables", output = "flextable")
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
## produced by the `tinytable` backend.
## Warning: `type='all'` is only supported for the `tinytable` backend. Set the
## `type` argument explicitly to suppress this warning.
table1
| Unique | Missing Pct. | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
Boycott | 2 | 0 | 1.7 | 0.5 | 1.0 | 2.0 | 2.0 |
Activism | 5 | 0 | 3.0 | 0.8 | 1.0 | 3.0 | 5.0 |
Protest | 2 | 0 | 1.9 | 0.4 | 1.0 | 2.0 | 2.0 |
set_flextable_defaults(fonts_ignore=TRUE)
flextable::save_as_docx(table1, path = "Table1.docx", width = 7.0, height = 7.0)
france_clean <- france_clean %>%
mutate(
bctprd = case_when(
bctprd == 1 ~ "Yes",
bctprd == 2 ~ "No",
TRUE ~ NA_character_
),
sclact = case_when(
sclact == 1 ~ "Much less than most",
sclact == 2 ~ "Less than most",
sclact == 3 ~ "About the same",
sclact == 4 ~ "More than most",
sclact == 5 ~ "Much more than most",
TRUE ~ NA_character_
),
pbldmn = case_when(
pbldmn == 1 ~ "Yes",
pbldmn == 2 ~ "No",
TRUE ~ NA_character_
)
)
## Hypothesis infer
france_clean$Activism <- factor(france_clean$Activism)
france_clean$Protest <- factor(france_clean$Protest)
france_clean$Boycott <- factor(france_clean$Boycott)
test_stat <- france_clean %>%
specify(explanatory = Activism,
response = Boycott) %>%
hypothesize(null = "independence") %>%
calculate(stat = "Chisq")
print(test_stat$stat)
## X-squared
## 151.6166
null_dist <- france_clean %>%
specify(response = Activism, explanatory = Boycott) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq")
null_dist
## Response: Activism (factor)
## Explanatory: Boycott (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.910
## 2 2 1.45
## 3 3 3.19
## 4 4 1.49
## 5 5 3.37
## 6 6 3.68
## 7 7 13.1
## 8 8 1.97
## 9 9 3.70
## 10 10 2.21
## # ℹ 990 more rows
p_val <- null_dist %>% #
get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
conf_int <- null_dist%>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize(data, bins = 10, method = "simulation", dens_color = "black") +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int)
test_stat <- 155
conf_int <- null_dist %>%
get_confidence_interval(level = 0.95, type = "percentile")
# Visualize the null distribution with manual x-axis limits
null_dist %>%
visualize(bins = 15, method = "simulation", dens_color = "black") +
geom_vline(aes(xintercept = test_stat), color = "red", linetype = "dashed", size = 1) +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int) +
coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
labs(title = "Simulation-Based Null Distribution",
x = "Chi-Square",
y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
table(france_clean$Protest)
##
## 1 2
## 2422 14503
table(france_clean$Boycott)
##
## 1 2
## 5197 11728
table(france_clean$Activism)
##
## 1 2 3 4 5
## 744 3036 9889 2594 662
model1 <- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight))
model2 <- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors
model3 <- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction
modelsummary(
list(model1, model2, model3),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL)
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
| (1) | (2) | (3) | |
|---|---|---|---|
| (Intercept) | 0.4 (0.0)*** | -0.3 (0.0)*** | -2.2 (0.1)*** |
| Activism2 | 0.6 (0.0)*** | 0.6 (0.0)*** | 2.5 (0.1)*** |
| Activism3 | 0.4 (0.0)*** | 0.3 (0.0)*** | 2.2 (0.1)*** |
| Activism4 | -0.4 (0.0)*** | -0.4 (0.0)*** | 1.8 (0.1)*** |
| Activism5 | -0.4 (0.0)*** | -0.4 (0.0)*** | 2.0 (0.1)*** |
| Protest2 | 0.9 (0.0)*** | 3.1 (0.1)*** | |
| Activism2 × Protest2 | -2.3 (0.1)*** | ||
| Activism3 × Protest2 | -2.3 (0.1)*** | ||
| Activism4 × Protest2 | -2.6 (0.1)*** | ||
| Activism5 × Protest2 | -2.8 (0.1)*** | ||
| Num.Obs. | 1904 | 1904 | 1904 |
| AIC | 1797753.5 | 1756989.2 | 1766529.6 |
| BIC | 1797781.3 | 1757022.5 | 1766585.1 |
| Log.Lik. | -898871.764 | -878488.577 | -883254.785 |
| RMSE | 0.48 | 0.47 | 0.47 |
#aesthetics
models <- list()
models[['SLR']]<- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight)) # single predictor
models[['MLR']]<- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors
models[['Interaction']]<- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction
modelsummary(models,
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
exponentiate = TRUE, ## NOTE: this is to exponentiate directly, otherwise leave out or set to FALSE
coef_rename = c("Activism2" = "Much less than most", "Activism3" = "Less than most", "Activism4" = "More than most", "Activism5" = "Much more than most", "Protest2" = "No Protest"),
title = 'Table 2. Regression models predicting Boycotting')
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
| SLR | MLR | Interaction | |
|---|---|---|---|
| (Intercept) | 1.4 (0.0)*** | 0.7 (0.0)*** | 0.1 (0.0)*** |
| Much less than most | 1.9 (0.1)*** | 1.8 (0.1)*** | 12.7 (1.3)*** |
| Less than most | 1.4 (0.0)*** | 1.3 (0.0)*** | 8.8 (0.9)*** |
| More than most | 0.7 (0.0)*** | 0.7 (0.0)*** | 6.0 (0.6)*** |
| Much more than most | 0.7 (0.0)*** | 0.7 (0.0)*** | 7.3 (0.8)*** |
| No Protest | 2.4 (0.0)*** | 23.2 (2.4)*** | |
| Much less than most:No Protest | 0.1 (0.0)*** | ||
| Less than most:No Protest | 0.1 (0.0)*** | ||
| More than most:No Protest | 0.1 (0.0)*** | ||
| Much more than most:No Protest | 0.1 (0.0)*** | ||
| Num.Obs. | 1904 | 1904 | 1904 |
| AIC | 1797753.5 | 1756989.2 | 1766529.6 |
| BIC | 1797781.3 | 1757022.5 | 1766585.1 |
| Log.Lik. | -898871.764 | -878488.577 | -883254.785 |
| RMSE | 0.48 | 0.47 | 0.47 |
model4 <- glm(Boycott ~ polintr + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction
##polintr = interest in politics
mydf <- predict_response(model4, terms = c("polintr", "Protest"))
print(mydf, collapse_tables = TRUE)
## # Predicted probabilities of Boycott
##
## polintr | Protest | Predicted | 95% CI
## ------------------------------------------
## 1 | 1 | 0.09 | 0.08, 0.11
## 2 | | 0.10 | 0.09, 0.12
## 3 | | 0.12 | 0.10, 0.14
## 4 | | 0.14 | 0.11, 0.16
## 7 | | 0.20 | 0.17, 0.23
## 1 | 2 | 0.65 | 0.64, 0.67
## 2 | | 0.69 | 0.67, 0.70
## 3 | | 0.72 | 0.70, 0.73
## 4 | | 0.75 | 0.73, 0.76
## 7 | | 0.82 | 0.81, 0.83
##
## Adjusted for:
## * Activism = 1
ggplot(mydf, aes(x, predicted, colour = group)) + geom_line()