packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "performance", "flextable", "broom", "scales", "ggeffects", "marginaleffects", "interactions", "dplyr") # add any you need here
# Install packages if they aren't installed already
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
##
## Attaching package: 'interactions'
##
##
## The following object is masked from 'package:ggeffects':
##
## johnson_neyman
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "performance" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "flextable" "performance" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "broom" "flextable" "performance" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "scales" "broom" "flextable" "performance" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
##
## [[11]]
## [1] "ggeffects" "scales" "broom" "flextable" "performance"
## [6] "survey" "survival" "Matrix" "grid" "effects"
## [11] "carData" "modelsummary" "fst" "infer" "lubridate"
## [16] "forcats" "stringr" "dplyr" "purrr" "readr"
## [21] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [26] "graphics" "grDevices" "utils" "datasets" "methods"
## [31] "base"
##
## [[12]]
## [1] "marginaleffects" "ggeffects" "scales" "broom"
## [5] "flextable" "performance" "survey" "survival"
## [9] "Matrix" "grid" "effects" "carData"
## [13] "modelsummary" "fst" "infer" "lubridate"
## [17] "forcats" "stringr" "dplyr" "purrr"
## [21] "readr" "tidyr" "tibble" "ggplot2"
## [25] "tidyverse" "stats" "graphics" "grDevices"
## [29] "utils" "datasets" "methods" "base"
##
## [[13]]
## [1] "interactions" "marginaleffects" "ggeffects" "scales"
## [5] "broom" "flextable" "performance" "survey"
## [9] "survival" "Matrix" "grid" "effects"
## [13] "carData" "modelsummary" "fst" "infer"
## [17] "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble"
## [25] "ggplot2" "tidyverse" "stats" "graphics"
## [29] "grDevices" "utils" "datasets" "methods"
## [33] "base"
##
## [[14]]
## [1] "interactions" "marginaleffects" "ggeffects" "scales"
## [5] "broom" "flextable" "performance" "survey"
## [9] "survival" "Matrix" "grid" "effects"
## [13] "carData" "modelsummary" "fst" "infer"
## [17] "lubridate" "forcats" "stringr" "dplyr"
## [21] "purrr" "readr" "tidyr" "tibble"
## [25] "ggplot2" "tidyverse" "stats" "graphics"
## [29] "grDevices" "utils" "datasets" "methods"
## [33] "base"
spain_data <- read.fst("spain_data.fst")
spain_clean <- spain_data %>%
mutate(
lrscale = case_when(
lrscale %in% 0:3 ~ "Left",
lrscale %in% 4:6 ~ "Moderate",
lrscale %in% 7:10 ~ "Right",
lrscale %in% c(77, 88, 99) ~ NA_character_,
TRUE ~ NA_character_
),
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
gndr == 9 ~ NA_character_,
TRUE ~ NA_character_
),
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA",
TRUE ~NA_character_
),
edulvla = ifelse(edulvla %in% c(77, 88, 99), NA_integer_, edulvla),
edulvlb = ifelse(edulvlb %in% c(5555, 7777, 8888), NA_integer_, edulvlb),
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more")),
)%>%
filter(
!is.na(lrscale),
!is.na(gndr),
!is.na(educ.ba)
)
print(table(spain_clean$lrscale))
##
## Left Moderate Right
## 5609 8574 2726
print(table(spain_clean$gndr))
##
## Female Male
## 8451 8458
print(table(spain_clean$educ.ba))
##
## No BA BA or more
## 12982 3927
spain_clean <- spain_clean %>%
mutate(
PoliticalOrientation = lrscale,
Gender = gndr,
Education = educ.ba
)
table1 <- datasummary_skim(spain_clean %>% dplyr::select(PoliticalOrientation, Gender, Education), type = "categorical",
title = "Table 1. Descriptive statistics for main variables", output = "flextable")
table1
|
| N | % |
|---|---|---|---|
PoliticalOrientation | Left | 5609 | 33.2 |
Moderate | 8574 | 50.7 | |
Right | 2726 | 16.1 | |
Gender | Female | 8451 | 50.0 |
Male | 8458 | 50.0 | |
Education | No BA | 12982 | 76.8 |
BA or more | 3927 | 23.2 |
test_stat <- spain_clean %>%
specify(response = PoliticalOrientation, explanatory = Education) %>%
hypothesize(null = "independence") %>%
calculate(stat = "Chisq")
print(test_stat)
## Response: PoliticalOrientation (factor)
## Explanatory: Education (factor)
## Null Hypothesis: independence
## # A tibble: 1 × 1
## stat
## <dbl>
## 1 41.0
null_dist <- spain_clean %>%
specify(response = PoliticalOrientation, explanatory = Education) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "Chisq")
null_dist
## Response: PoliticalOrientation (factor)
## Explanatory: Education (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.428
## 2 2 0.0305
## 3 3 1.21
## 4 4 3.87
## 5 5 1.25
## 6 6 0.571
## 7 7 0.469
## 8 8 1.18
## 9 9 0.628
## 10 10 2.52
## # ℹ 990 more rows
p_val <- null_dist %>% #
get_pvalue(obs_stat = test_stat, direction = "greater")
## Warning: Please be cautious in reporting a p-value of 0. This result is an approximation
## based on the number of `reps` chosen in the `generate()` step.
## ℹ See `get_p_value()` (`?infer::get_p_value()`) for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
conf_int <- null_dist%>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize(data, bins = 10, method = "simulation", dens_color = "black") +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int)
test_stat <- 960
conf_int <- null_dist %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize(bins = 15, method = "simulation", dens_color = "black") +
geom_vline(aes(xintercept = test_stat), color = "red", linetype = "dashed", size = 1) +
shade_p_value(obs_stat = test_stat, direction = "greater") +
shade_confidence_interval(endpoints = conf_int) +
coord_cartesian(xlim = c(0, max(null_dist$stat) * 1.1)) + # Set manual x-axis limits
labs(title = "Simulation-Based Null Distribution",
x = "Chi-Square",
y = "Count")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
spain_data <- read.fst("spain_data.fst")
spain_clean1 <- spain_data %>%
mutate(
lrscale = case_when(
lrscale %in% 0:3 ~ "1",#Assuming "1" for left
lrscale %in% 7:10 ~ "3", #Assuming "3" for Right
TRUE ~ NA_character_ # Handling values outside the specified range as NA
),
gndr = case_when(
gndr == 1 ~ "Male",
gndr == 2 ~ "Female",
TRUE ~ NA_character_ # Setting other values as NA
),
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ "BA or more",
essround >= 5 & edulvlb > 600 ~ "BA or more",
TRUE ~ "No BA" # Default category for education
),
educ.ba = factor(educ.ba, levels = c("No BA", "BA or more"))
) %>%
filter(
!is.na(lrscale),
!is.na(gndr),
!is.na(educ.ba)
)
spain_clean1$gndr <- factor(spain_clean1$gndr)
spain_clean1$educ.ba <- factor(spain_clean1$educ.ba)
spain_clean1$lrscale <- factor(spain_clean1$lrscale)
table(spain_clean1$gndr)
##
## Female Male
## 4147 4188
table(spain_clean1$educ.ba)
##
## No BA BA or more
## 6245 2090
spain_clean1$gndr <- factor(spain_clean1$gndr)
spain_clean1$educ.ba <- factor(spain_clean1$educ.ba)
table(spain_clean1$gndr)
##
## Female Male
## 4147 4188
table(spain_clean1$educ.ba)
##
## No BA BA or more
## 6245 2090
unique(spain_clean1$lrscale)
## [1] 1 3
## Levels: 1 3
summary(spain_clean1$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 43.00 62.00 70.00 89.77 82.00 888.00 7514
spain_clean1$lrscale <- as.numeric(as.character(spain_clean1$lrscale))
model3 <- lm(lrscale ~ gndr + educ.ba, data = spain_clean1, weights = spain_clean1$weight)
model4 <- lm(lrscale ~ gndr * educ.ba, data = spain_clean1, weights = spain_clean1$weight)
modelsummary(
list(model3, model4),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL)
| Â (1) | Â Â (2) | |
|---|---|---|
| (Intercept) | 1.7 (0.1)*** | 1.8 (0.1)*** |
| gndrMale | −0.1 (0.1) | −0.2 (0.1)*** |
| educ.baBA or more | −0.2 (0.1)* | −0.5 (0.1)*** |
| gndrMale × educ.baBA or more | 0.6 (0.2)*** | |
| Num.Obs. | 821 | 821 |
| R2 | 0.007 | 0.028 |
| R2 Adj. | 0.005 | 0.024 |
| AIC | 2374.3 | 2358.8 |
| BIC | 2393.2 | 2382.4 |
| Log.Lik. | −1183.154 | −1174.410 |
| RMSE | 0.92 | 0.92 |
coef_rename <- c("gndrMale" = "Male", "educ.baBA or more" = "BA or more",
title = "Table 2. Regression models predicting protest")
interaction_plot <- effect("gndr*educ.ba", model4, xlevels=list(gndr=c("Male", "Female"), educ.ba=c("No BA", "BA or More")))
plot(interaction_plot,
main="Interaction Effect Between Gender and Education on Political Orientation",
xlab="Education Level",
ylab="Estimated Effect on Political Orientation Scale",
xaxt="n")
interaction_plot
##
## gndr*educ.ba effect
## educ.ba
## gndr No BA BA or more
## Female 1.817201 1.366017
## Male 1.568316 1.759876
str(spain_clean1$gndr)
## Factor w/ 2 levels "Female","Male": 1 2 2 2 2 1 2 2 1 2 ...
str(spain_clean1$educ.ba)
## Factor w/ 2 levels "No BA","BA or more": 1 1 1 1 2 1 1 1 1 1 ...
spain_clean1$gndr <- as.factor(spain_clean1$gndr)
spain_clean1$educ.ba <- as.factor(spain_clean1$educ.ba)
interaction_plot <- effect("gndr*educ.ba", model4, xlevels=list(gndr=c("Male", "Female"), educ.ba=c("No BA", "BA or More")))
interaction_df <- as.data.frame(interaction_plot)
interaction_plot <- ggplot(interaction_df, aes(x = educ.ba, y = fit, color = gndr, group = gndr)) +
geom_point(position = position_dodge(width = 0.1)) +
geom_line(position = position_dodge(width = 0.1))
labs(
title = "Interaction Effect Between Gender and Education on Political Orientation",
x = "Education Level",
y = "Estimated Effect on Political Orientation Scale"
) +
theme_minimal()
## NULL
print(interaction_plot)