Sum_ManLaiMannie_Research

Initial set-up

rm(list=ls()); gc()

##          used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 526513 28.2    1169600 62.5         NA   669445 35.8
## Vcells 968369  7.4    8388608 64.0      16384  1851710 14.2

# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "aod", "interactions", "kableExtra", "flextable", "scales", "broom") # add any you need here

# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Load the packages
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## 
## Loading required package: grid
## 
## Loading required package: Matrix
## 
## 
## Attaching package: 'Matrix'
## 
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Loading required package: survival
## 
## 
## Attaching package: 'survey'
## 
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
## 
## 
## 
## Attaching package: 'MASS'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## 
## 
## Attaching package: 'aod'
## 
## 
## The following object is masked from 'package:survival':
## 
##     rats
## 
## 
## 
## Attaching package: 'kableExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following objects are masked from 'package:kableExtra':
## 
##     as_image, footnote
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## Registered S3 methods overwritten by 'broom':
##   method            from  
##   tidy.glht         jtools
##   tidy.summary.glht jtools

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "infer"     "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "infer"     "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
##  [6] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [11] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[5]]
##  [1] "effects"      "carData"      "modelsummary" "fst"          "infer"       
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [21] "methods"      "base"        
## 
## [[6]]
##  [1] "survey"       "survival"     "Matrix"       "grid"         "effects"     
##  [6] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[7]]
##  [1] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
##  [6] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[8]]
##  [1] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
##  [6] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [11] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [16] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [21] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "interactions" "aod"          "MASS"         "survey"       "survival"    
##  [6] "Matrix"       "grid"         "effects"      "carData"      "modelsummary"
## [11] "fst"          "infer"        "lubridate"    "forcats"      "stringr"     
## [16] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [21] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"        
## 
## [[10]]
##  [1] "kableExtra"   "interactions" "aod"          "MASS"         "survey"      
##  [6] "survival"     "Matrix"       "grid"         "effects"      "carData"     
## [11] "modelsummary" "fst"          "infer"        "lubridate"    "forcats"     
## [16] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [21] "tibble"       "ggplot2"      "tidyverse"    "stats"        "graphics"    
## [26] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[11]]
##  [1] "flextable"    "kableExtra"   "interactions" "aod"          "MASS"        
##  [6] "survey"       "survival"     "Matrix"       "grid"         "effects"     
## [11] "carData"      "modelsummary" "fst"          "infer"        "lubridate"   
## [16] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [21] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [26] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [31] "base"        
## 
## [[12]]
##  [1] "scales"       "flextable"    "kableExtra"   "interactions" "aod"         
##  [6] "MASS"         "survey"       "survival"     "Matrix"       "grid"        
## [11] "effects"      "carData"      "modelsummary" "fst"          "infer"       
## [16] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [21] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [26] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [31] "methods"      "base"        
## 
## [[13]]
##  [1] "broom"        "scales"       "flextable"    "kableExtra"   "interactions"
##  [6] "aod"          "MASS"         "survey"       "survival"     "Matrix"      
## [11] "grid"         "effects"      "carData"      "modelsummary" "fst"         
## [16] "infer"        "lubridate"    "forcats"      "stringr"      "dplyr"       
## [21] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [26] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [31] "datasets"     "methods"      "base"

france_data <- read.fst("france_data.fst")

Context

table(france_data$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88 
## 1374  779 1703 2228 2357 3563 2125 2298 1669  398  267   26  251

table(france_data$edulvla)

## 
##    1    2    3    4    5   77   88 
## 1449 1018 2879   13 2003    2    4

table(france_data$edulvlb)

## 
##    0  113  213  313  321  323  413  421  510  520  610  620  710  720  800 5555 
##   64 1826 1013 1365 2827  749   54   83  173 1267  114  485  475 1047   97   17 
## 7777 8888 
##   10    4

table(france_data$lrscale)

## 
##    0    1    2    3    4    5    6    7    8    9   10   77   88 
##  945  499 1269 1996 1838 5274 1594 1749 1458  409  686  381  940

france_clean <- france_data %>% 
  mutate(
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
     # Handle NAs for education levels
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
    edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
    lrscale = ifelse(lrscale %in% c(77, 88, 99), NA, lrscale)  # Convert lrscale values
  )

table(france_clean$stfdem)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 1374  779 1703 2228 2357 3563 2125 2298 1669  398  267

table(france_clean$edulvla)

## 
##    1    2    3    4    5 
## 1449 1018 2879   13 2003

table(france_clean$edulvlb)

## 
##    0  113  213  313  321  323  413  421  510  520  610  620  710  720  800 
##   64 1826 1013 1365 2827  749   54   83  173 1267  114  485  475 1047   97

table(france_clean$lrscale)

## 
##    0    1    2    3    4    5    6    7    8    9   10 
##  945  499 1269 1996 1838 5274 1594 1749 1458  409  686

table_summary <- datasummary_skim(france_clean %>% dplyr::select(stfdem, edulvla, edulvlb, lrscale))
table_summary

	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
stfdem	12	1	4.6	2.4	0.0	5.0	10.0
edulvla	6	61	3.0	1.4	1.0	3.0	5.0
edulvlb	16	39	373.1	193.8	0.0	321.0	800.0
lrscale	12	7	4.9	2.4	0.0	5.0	10.0

table1 <- datasummary_skim(france_clean %>% dplyr::select(stfdem, edulvla, edulvlb, lrscale), title = "Table 1. Descriptive statistics of human values items", output = "flextable")

## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.

table1

Table 1. Descriptive statistics of human values items
	Unique (#)	Missing (%)	Mean	SD	Min	Median	Max
stfdem	12	1	4.6	2.4	0.0	5.0	10.0
edulvla	6	61	3.0	1.4	1.0	3.0	5.0
edulvlb	16	39	373.1	193.8	0.0	321.0	800.0
lrscale	12	7	4.9	2.4	0.0	5.0	10.0

Infer Package

france_clean <- france_data %>%
  mutate(
 # Recoding education
    educ.ba = case_when(
      essround < 5 & edulvla == 5 ~ "BA or more",
      essround >= 5 & edulvlb > 600 ~ "BA or more",
      TRUE ~ "No BA"
    ),
    
    # Handle NAs for education levels
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
    edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
    
    # Explicitly making 'No BA' the reference category
    educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),
    
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
    
  )

test_stat <- france_clean %>%
  specify(explanatory = educ.ba, # change variable name for explanatory variable
          response = stfdem) %>% # change variable name for outcome of interest
  hypothesize(null = "independence") %>%
  calculate(stat = "t") # replace in between quotation marks appropriate test statistic

## Warning: Removed 277 rows containing missing values.

## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "No BA" - "BA or more", or divided in the order "No BA" / "BA or more"
## for ratio-based statistics. To specify this order yourself, supply `order =
## c("No BA", "BA or more")` to the calculate() function.

print(test_stat$stat)

##         t 
## -18.12345

null_distribution <- france_clean %>%
  specify(explanatory = educ.ba,
          response = stfdem) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>% # only line we are adding, use reps = 1000 as standard but change if too computationally demanding
  calculate(stat = "t")

## Warning: Removed 277 rows containing missing values.

## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "No BA" - "BA or more", or divided in the order "No BA" / "BA or more"
## for ratio-based statistics. To specify this order yourself, supply `order =
## c("No BA", "BA or more")` to the calculate() function.

p_val <- null_distribution %>% # replace name here if you assigned something other than null_distribution above
  get_pvalue(obs_stat = test_stat, direction = "two-sided") # would only replace test_stat if assigned another name in Step 1

## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.

p_val

## # A tibble: 1 × 1
##   p_value
##     <dbl>
## 1       0

conf_int <- null_distribution %>%
  get_confidence_interval(level = 0.95, type = "percentile")


null_distribution %>%
  visualize() +
  shade_p_value(obs_stat = test_stat, direction = "two-sided") +
  shade_confidence_interval(endpoints = conf_int)

null_distribution

## Response: stfdem (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.169 
##  2         2  0.594 
##  3         3 -0.446 
##  4         4 -0.488 
##  5         5  0.995 
##  6         6  0.931 
##  7         7  0.450 
##  8         8  0.479 
##  9         9  0.0388
## 10        10 -0.781 
## # ℹ 990 more rows

Regression Model and Visualization

model_data <- france_data %>%
  mutate(
  lrscale = case_when(
      lrscale %in% 0:3 ~ "Left",                    
      lrscale %in% 7:10 ~ "Right",                     
      lrscale %in% 4:6 ~ "Moderate",                  
      lrscale %in% c(77, 88, 99) ~ NA_character_      
    ),

 # Recoding education
    educ.ba = case_when(
      essround < 5 & edulvla == 5 ~ "BA or more",
      essround >= 5 & edulvlb > 600 ~ "BA or more",
      TRUE ~ "No BA"
    ),
    
    # Handle NAs for education levels
    edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
    edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
    
    # Explicitly making 'No BA' the reference category
    educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),

    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem), # For 'stfdem', set values 77, 88, and 99 to NA.
    
  )

model1 <- lm(stfdem ~ educ.ba, data = model_data)
model2 <- lm(stfdem ~ lrscale, data = model_data)
model3 <- lm(stfdem ~ educ.ba * lrscale, data = model_data)

models <- list(
   "Model 1" = lm(stfdem ~ educ.ba, data = model_data),
   "Model 2" = lm(stfdem ~ lrscale, data = model_data),
   "Model 3" = lm(stfdem ~ educ.ba * lrscale, data = model_data))
modelsummary(models, fmt = 1,
  estimate  = c("{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}",
                "{estimate} ({std.error}){stars}"),
  statistic = NULL,
 title = 'Table 2. Regression models for the satisfaction with democracy in France')

Table 2. Regression models for the satisfaction with democracy in France
	Model 1	Model 2	Model 3
(Intercept)	4.4 (0.0)***	4.3 (0.0)***	4.1 (0.0)***
educ.baBA or more	0.7 (0.0)***		0.6 (0.1)***
lrscaleModerate		0.4 (0.0)***	0.4 (0.1)***
lrscaleRight		0.7 (0.1)***	0.8 (0.1)***
educ.baBA or more × lrscaleModerate			0.3 (0.1)**
educ.baBA or more × lrscaleRight			0.1 (0.1)
Num.Obs.	18761	17564	17564
R2	0.017	0.012	0.032
R2 Adj.	0.017	0.012	0.031
AIC	86113.6	80407.3	80060.4
BIC	86137.1	80438.4	80114.8
Log.Lik.	−43053.781	−40199.631	−40023.192
RMSE	2.40	2.39	2.36

interaction_plot <- effect("educ.ba*lrscale", model3, na.rm=TRUE)

plot(interaction_plot,
     main="Interaction effect",
     xlab="Left right scale",
     ylab="satisfaction with democracy")

interaction_plot

## 
##  educ.ba*lrscale effect
##             lrscale
## educ.ba          Left Moderate    Right
##   No BA      4.104274 4.464196 4.873706
##   BA or more 4.743867 5.378216 5.621839

Sum_ManLaiMannie_Research_Report

2024-04-09

Initial set-up

Context

Infer Package

Regression Model and Visualization