Jaishankar_Arvindh_Research

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects","ggformula", "ggplot2", "dplyr", "marginaleffects") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
## 
## Change the default backend persistently:
## 
##   config_modelsummary(factory_default = 'gt')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
## 
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## Loading required package: ggridges
## 
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "performance"  "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "broom"        "flextable"    "performance"  "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "scales"       "broom"        "flextable"    "performance"  "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "ggeffects"    "scales"       "broom"        "flextable"    "performance" 
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[14]]
##  [1] "ggformula"    "ggridges"     "ggeffects"    "scales"       "broom"       
##  [6] "flextable"    "performance"  "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"        
## 
## [[15]]
##  [1] "marginaleffects" "ggformula"       "ggridges"        "ggeffects"      
##  [5] "scales"          "broom"           "flextable"       "performance"    
##  [9] "survey"          "survival"        "Matrix"          "grid"           
## [13] "effects"         "carData"         "modelsummary"    "fst"            
## [17] "infer"           "lubridate"       "forcats"         "stringr"        
## [21] "dplyr"           "purrr"           "readr"           "tidyr"          
## [25] "tibble"          "ggplot2"         "tidyverse"       "stats"          
## [29] "graphics"        "grDevices"       "utils"           "datasets"       
## [33] "methods"         "base"

france_data <- read_fst("france_data.fst")

france_clean <- france_data %>% 
  mutate(
    bctprd = ifelse(bctprd %in% c(7, 8, 9), NA, bctprd), 
    sclact = ifelse(sclact %in% c(7, 8, 9), NA, sclact),
    pbldmn = ifelse(pbldmn %in% c(7, 8, 9), NA, pbldmn), 
  )

france_clean <- france_clean %>% filter(!is.na(bctprd), !is.na(sclact), !is.na(pbldmn))

france_clean <- france_clean %>%
  # Relabeling variables for clarity
  mutate(
    Boycott = bctprd,      
    Activism = sclact,      
    Protest = pbldmn
  )

table_summary <- datasummary_skim(france_clean %>% select(Boycott, Activism, Protest))
table_summary

tinytable_n700d10vo21fgf1x0n3p

	Unique	Mean	SD	Min	Median	Max
Boycott	2	1.7	0.5	1.0	2.0	2.0
Activism	5	3.0	0.8	1.0	3.0	5.0
Protest	2	1.9	0.4	1.0	2.0	2.0

table1 <- datasummary_skim(france_clean %>% dplyr::select(Boycott, Activism, Protest), title = "Table 1. Descriptive statistics for main variables", output = "flextable")

## Warning: Inline histograms in `datasummary_skim()` are only supported for tables
##   produced by the `tinytable` backend.

## Warning: `type='all'` is only supported for the `tinytable` backend. Set the
##   `type` argument explicitly to suppress this warning.

table1

	Unique	Mean	SD	Min	Median	Max
Boycott	2	1.7	0.5	1.0	2.0	2.0
Activism	5	3.0	0.8	1.0	3.0	5.0
Protest	2	1.9	0.4	1.0	2.0	2.0

set_flextable_defaults(fonts_ignore=TRUE)

flextable::save_as_docx(table1, path = "Table1.docx", width = 7.0, height = 7.0)

france_clean <- france_clean %>%
  mutate(
    bctprd = case_when(
    bctprd == 1 ~ "Yes",    
    bctprd == 2 ~ "No",    
    TRUE ~ NA_character_  
    ),
    
    sclact = case_when(
    sclact == 1 ~ "Much less than most",
    sclact == 2 ~ "Less than most",
    sclact == 3 ~ "About the same",
    sclact == 4 ~ "More than most",
    sclact == 5 ~ "Much more than most",
    TRUE ~ NA_character_ 
    ),
    
    pbldmn = case_when(
    pbldmn == 1 ~ "Yes",    
    pbldmn == 2 ~ "No",    
    TRUE ~ NA_character_   
    )
  )

## Hypothesis infer

france_clean$Activism <- factor(france_clean$Activism)
france_clean$Protest <- factor(france_clean$Protest)
france_clean$Boycott <- factor(france_clean$Boycott)

test_stat <- france_clean %>%
  specify(explanatory = Activism, 
          response = Boycott) %>% 
  hypothesize(null = "independence") %>%
  calculate(stat = "Chisq")

print(test_stat$stat)

## X-squared 
##  151.6166

null_dist <- france_clean %>%
  specify(response = Activism, explanatory = Boycott) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>%
  calculate(stat = "Chisq")

null_dist

## Response: Activism (factor)
## Explanatory: Boycott (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  0.910
##  2         2  1.45 
##  3         3  3.19 
##  4         4  1.49 
##  5         5  3.37 
##  6         6  3.68 
##  7         7 13.1  
##  8         8  1.97 
##  9         9  3.70 
## 10        10  2.21 
## # ℹ 990 more rows

p_val <- null_dist %>% # 
  get_pvalue(obs_stat = test_stat, direction = "greater")

## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.

p_val

## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

conf_int <- null_dist%>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_dist %>%
  visualize(data, bins = 10, method = "simulation", dens_color = "black") +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int)

test_stat <- 155

conf_int <- null_dist %>%
  get_confidence_interval(level = 0.95, type = "percentile")

# Visualize the null distribution with manual x-axis limits
null_dist %>%
  visualize(bins = 15, method = "simulation", dens_color = "black") +
  geom_vline(aes(xintercept = test_stat), color = "red", linetype = "dashed", size = 1) +
  shade_p_value(obs_stat = test_stat, direction = "greater") +
  shade_confidence_interval(endpoints = conf_int) +
  coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
  labs(title = "Simulation-Based Null Distribution",
       x = "Chi-Square",
       y = "Count")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

table(france_clean$Protest)

## 
##     1     2 
##  2422 14503

table(france_clean$Boycott)

## 
##     1     2 
##  5197 11728

table(france_clean$Activism)

## 
##    1    2    3    4    5 
##  744 3036 9889 2594  662

model1 <- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight))

model2 <- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors 

model3 <- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

modelsummary(
  list(model1, model2, model3),
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL)

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

tinytable_zkt89fpotgbs47fsy5go

	(1)	(2)	(3)
(Intercept)	0.4 (0.0)***	-0.3 (0.0)***	-2.2 (0.1)***
Activism2	0.6 (0.0)***	0.6 (0.0)***	2.5 (0.1)***
Activism3	0.4 (0.0)***	0.3 (0.0)***	2.2 (0.1)***
Activism4	-0.4 (0.0)***	-0.4 (0.0)***	1.8 (0.1)***
Activism5	-0.4 (0.0)***	-0.4 (0.0)***	2.0 (0.1)***
Protest2		0.9 (0.0)***	3.1 (0.1)***
Activism2 × Protest2			-2.3 (0.1)***
Activism3 × Protest2			-2.3 (0.1)***
Activism4 × Protest2			-2.6 (0.1)***
Activism5 × Protest2			-2.8 (0.1)***
Num.Obs.	1904	1904	1904
AIC	1797753.5	1756989.2	1766529.6
BIC	1797781.3	1757022.5	1766585.1
Log.Lik.	-898871.764	-878488.577	-883254.785
RMSE	0.48	0.47	0.47

#aesthetics

models <- list()
models[['SLR']]<- glm(Boycott ~ Activism, family = binomial, data = france_clean, weights = round(weight))  # single predictor 
models[['MLR']]<- glm(Boycott ~ Activism + Protest, family = binomial, data = france_clean, weights = round(weight)) # two predictors 
models[['Interaction']]<- glm(Boycott ~ Activism + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

modelsummary(models,
  fmt = 1,
  estimate  = c( "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
  exponentiate = TRUE, ## NOTE: this is to exponentiate directly, otherwise leave out or set to FALSE
  coef_rename = c("Activism2" = "Much less than most", "Activism3" = "Less than most", "Activism4" = "More than most", "Activism5" = "Much more than most", "Protest2" = "No Protest"),
  title = 'Table 2. Regression models predicting Boycotting')

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

## Warning in n * resp: longer object length is not a multiple of shorter object
## length

## Warning in stats::dbinom(round(n * resp), round(n), predicted, log = TRUE) * :
## longer object length is not a multiple of shorter object length

tinytable_ejse87jpdl2xrcxlh0k1

Table 2. Regression models predicting Boycotting
	SLR	MLR	Interaction
(Intercept)	1.4 (0.0)***	0.7 (0.0)***	0.1 (0.0)***
Much less than most	1.9 (0.1)***	1.8 (0.1)***	12.7 (1.3)***
Less than most	1.4 (0.0)***	1.3 (0.0)***	8.8 (0.9)***
More than most	0.7 (0.0)***	0.7 (0.0)***	6.0 (0.6)***
Much more than most	0.7 (0.0)***	0.7 (0.0)***	7.3 (0.8)***
No Protest		2.4 (0.0)***	23.2 (2.4)***
Much less than most:No Protest			0.1 (0.0)***
Less than most:No Protest			0.1 (0.0)***
More than most:No Protest			0.1 (0.0)***
Much more than most:No Protest			0.1 (0.0)***
Num.Obs.	1904	1904	1904
AIC	1797753.5	1756989.2	1766529.6
BIC	1797781.3	1757022.5	1766585.1
Log.Lik.	-898871.764	-878488.577	-883254.785
RMSE	0.48	0.47	0.47

model4 <- glm(Boycott ~ polintr + Protest + Activism*Protest, family = binomial, data = france_clean, weights = round(weight)) # both predictors + interaction

##polintr = interest in politics

mydf <- predict_response(model4, terms = c("polintr", "Protest"))
print(mydf, collapse_tables = TRUE)

## # Predicted probabilities of Boycott
## 
## polintr | Protest | Predicted |     95% CI
## ------------------------------------------
##       1 |       1 |      0.09 | 0.08, 0.11
##       2 |         |      0.10 | 0.09, 0.12
##       3 |         |      0.12 | 0.10, 0.14
##       4 |         |      0.14 | 0.11, 0.16
##       7 |         |      0.20 | 0.17, 0.23
##       1 |       2 |      0.65 | 0.64, 0.67
##       2 |         |      0.69 | 0.67, 0.70
##       3 |         |      0.72 | 0.70, 0.73
##       4 |         |      0.75 | 0.73, 0.76
##       7 |         |      0.82 | 0.81, 0.83
## 
## Adjusted for:
## * Activism = 1

ggplot(mydf, aes(x, predicted, colour = group)) + geom_line()

Jaishankar_Arvindh_Research_Report

2024-04-15