# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects","ggformula", "ggplot2", "dplyr", "marginaleffects") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## Loading required package: ggridges
## 
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "performance"  "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "broom"        "flextable"    "performance"  "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "scales"       "broom"        "flextable"    "performance"  "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "ggeffects"    "scales"       "broom"        "flextable"    "performance" 
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[15]]
##  [1] "marginaleffects" "ggformula"       "ggridges"        "ggeffects"      
##  [5] "scales"          "broom"           "flextable"       "performance"    
##  [9] "survey"          "survival"        "Matrix"          "grid"           
## [13] "effects"         "carData"         "modelsummary"    "fst"            
## [17] "infer"           "lubridate"       "forcats"         "stringr"        
## [21] "dplyr"           "purrr"           "readr"           "tidyr"          
## [25] "tibble"          "ggplot2"         "tidyverse"       "stats"          
## [29] "graphics"        "grDevices"       "utils"           "datasets"       
## [33] "methods"         "base"
france_data <- read_fst("france_data.fst")
france_clean <- france_data %>% 
  mutate(
    bctprd = ifelse(bctprd %in% c(7, 8, 9), NA, bctprd), 
    sclact = ifelse(sclact %in% c(7, 8, 9), NA, sclact),
    pbldmn = ifelse(pbldmn %in% c(7, 8, 9), NA, pbldmn), 
  )

france_clean <- france_clean %>% filter(!is.na(bctprd), !is.na(sclact), !is.na(pbldmn))

france_clean <- france_clean %>%
  # Relabeling variables for clarity
  mutate(
    Boycott = bctprd,      
    Activism = sclact,      
    Protest = pbldmn
  )

table_summary <- datasummary_skim(france_clean %>% select(Boycott, Activism, Protest))
table_summary
tinytable_n700d10vo21fgf1x0n3p
Unique Missing Pct. Mean SD Min Median Max Histogram
Boycott 2 0 1.7 0.5 1.0 2.0 2.0
Activism 5 0 3.0 0.8 1.0 3.0 5.0
Protest 2 0 1.9 0.4 1.0 2.0 2.0
table1 <- datasummary_skim(france_clean %>% dplyr::select(Boycott, Activism, Protest), title = "Table 1. Descriptive statistics for main variables", output = "flextable")
## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.
## Warning: `type='all'` is only supported for the `tinytable` backend. Set the
##   `type` argument explicitly to suppress this warning.
table1

Unique

Missing Pct.

Mean

SD

Min

Median

Max

Boycott

2

0

1.7

0.5

1.0

2.0

2.0

Activism

5

0

3.0

0.8

1.0

3.0

5.0

Protest

2

0

1.9

0.4

1.0

2.0

2.0

set_flextable_defaults(fonts_ignore=TRUE)

flextable::save_as_docx(table1, path = "Table1.docx", width = 7.0, height = 7.0) 
france_clean <- france_clean %>%
  mutate(
    bctprd = case_when(
    bctprd == 1 ~ "Yes",    
    bctprd == 2 ~ "No",    
    TRUE ~ NA_character_  
    ),
    
    sclact = case_when(
    sclact == 1 ~ "Much less than most",
    sclact == 2 ~ "Less than most",
    sclact == 3 ~ "About the same",
    sclact == 4 ~ "More than most",
    sclact == 5 ~ "Much more than most",
    TRUE ~ NA_character_ 
    ),
    
    pbldmn = case_when(
    pbldmn == 1 ~ "Yes",    
    pbldmn == 2 ~ "No",    
    TRUE ~ NA_character_   
    )
  )
## Hypothesis infer

france_clean$Activism <- factor(france_clean$Activism)
france_clean$Protest <- factor(france_clean$Protest)
france_clean$Boycott <- factor(france_clean$Boycott)

test_stat <- france_clean %>%
  specify(explanatory = Activism, 
          response = Boycott) %>% 
  hypothesize(null = "independence") %>%
  calculate(stat = "Chisq")

print(test_stat$stat)
## X-squared 
##  151.6166
null_dist <- france_clean %>%
  specify(response = Activism, explanatory = Boycott) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>%
  calculate(stat = "Chisq")

null_dist
## Response: Activism (factor)
## Explanatory: Boycott (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  0.910
##  2         2  1.45 
##  3         3  3.19 
##  4         4  1.49 
##  5         5  3.37 
##  6         6  3.68 
##  7         7 13.1  
##  8         8  1.97 
##  9         9  3.70 
## 10        10  2.21 
## # ℹ 990 more rows
p_val <- null_dist %>% # 
  get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
p_val
## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0
conf_int <- null_dist%>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_dist %>%
  visualize(data, bins = 10, method = "simulation", dens_color = "black") +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int)

test_stat <- 155

conf_int <- null_dist %>%
  get_confidence_interval(level = 0.95, type = "percentile")

# Visualize the null distribution with manual x-axis limits
null_dist %>%
  visualize(bins = 15, method = "simulation", dens_color = "black") +
  geom_vline(aes(xintercept = test_stat), color = "red", linetype = "dashed", size = 1) +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int) +
  coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
  labs(title = "Simulation-Based Null Distribution",
       x = "Chi-Square",
       y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

table(france_clean$Protest)
## 
##     1     2 
##  2422 14503
table(france_clean$Boycott)
## 
##     1     2 
##  5197 11728
table(france_clean$Activism)
## 
##    1    2    3    4    5 
##  744 3036 9889 2594  662
model1 <- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight))

model2 <- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors 

model3 <- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
tinytable_zkt89fpotgbs47fsy5go
(1) (2) (3)
(Intercept) 0.4 (0.0)*** -0.3 (0.0)*** -2.2 (0.1)***
Activism2 0.6 (0.0)*** 0.6 (0.0)*** 2.5 (0.1)***
Activism3 0.4 (0.0)*** 0.3 (0.0)*** 2.2 (0.1)***
Activism4 -0.4 (0.0)*** -0.4 (0.0)*** 1.8 (0.1)***
Activism5 -0.4 (0.0)*** -0.4 (0.0)*** 2.0 (0.1)***
Protest2 0.9 (0.0)*** 3.1 (0.1)***
Activism2 × Protest2 -2.3 (0.1)***
Activism3 × Protest2 -2.3 (0.1)***
Activism4 × Protest2 -2.6 (0.1)***
Activism5 × Protest2 -2.8 (0.1)***
Num.Obs. 1904 1904 1904
AIC 1797753.5 1756989.2 1766529.6
BIC 1797781.3 1757022.5 1766585.1
Log.Lik. -898871.764 -878488.577 -883254.785
RMSE 0.48 0.47 0.47
#aesthetics

models <- list()
models[['SLR']]<- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight))  # single predictor 
models[['MLR']]<- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors 
models[['Interaction']]<- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

modelsummary(models,
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  exponentiate = TRUE, ## NOTE: this is to exponentiate directly, otherwise leave out or set to FALSE
  coef_rename = c("Activism2" = "Much less than most", "Activism3" = "Less than most", "Activism4" = "More than most", "Activism5" = "Much more than most", "Protest2" = "No Protest"),
  title = 'Table 2. Regression models predicting Boycotting')
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
## Warning in n * resp: longer object length is not a multiple of shorter object
## length
## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length
tinytable_ejse87jpdl2xrcxlh0k1
Table 2. Regression models predicting Boycotting
SLR MLR Interaction
(Intercept) 1.4 (0.0)*** 0.7 (0.0)*** 0.1 (0.0)***
Much less than most 1.9 (0.1)*** 1.8 (0.1)*** 12.7 (1.3)***
Less than most 1.4 (0.0)*** 1.3 (0.0)*** 8.8 (0.9)***
More than most 0.7 (0.0)*** 0.7 (0.0)*** 6.0 (0.6)***
Much more than most 0.7 (0.0)*** 0.7 (0.0)*** 7.3 (0.8)***
No Protest 2.4 (0.0)*** 23.2 (2.4)***
Much less than most:No Protest 0.1 (0.0)***
Less than most:No Protest 0.1 (0.0)***
More than most:No Protest 0.1 (0.0)***
Much more than most:No Protest 0.1 (0.0)***
Num.Obs. 1904 1904 1904
AIC 1797753.5 1756989.2 1766529.6
BIC 1797781.3 1757022.5 1766585.1
Log.Lik. -898871.764 -878488.577 -883254.785
RMSE 0.48 0.47 0.47
model4 <- glm(Boycott ~ polintr + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

##polintr = interest in politics

mydf <- predict_response(model4, terms = c("polintr", "Protest"))
print(mydf, collapse_tables = TRUE)
## # Predicted probabilities of Boycott
## 
## polintr | Protest | Predicted |     95% CI
## ------------------------------------------
##       1 |       1 |      0.09 | 0.08, 0.11
##       2 |         |      0.10 | 0.09, 0.12
##       3 |         |      0.12 | 0.10, 0.14
##       4 |         |      0.14 | 0.11, 0.16
##       7 |         |      0.20 | 0.17, 0.23
##       1 |       2 |      0.65 | 0.64, 0.67
##       2 |         |      0.69 | 0.67, 0.70
##       3 |         |      0.72 | 0.70, 0.73
##       4 |         |      0.75 | 0.73, 0.76
##       7 |         |      0.82 | 0.81, 0.83
## 
## Adjusted for:
## * Activism = 1
ggplot(mydf, aes(x, predicted, colour = group)) + geom_line()