Data
d <- read.csv("data/osfstorage-archive/1_WCHS.csv", header = TRUE)
# Apply exclusions (per Pratt et al., 2026)
d <- d %>%
filter(Consent == 1) %>%
filter(Progress == 100) %>%
mutate(Q_RecaptchaScore = as.numeric(as.character(Q_RecaptchaScore))) %>%
filter(is.na(Q_RecaptchaScore) | Q_RecaptchaScore >= 0.5) %>%
filter(EnglishFluency >= 3) %>%
filter(CorrectAttentionChecks >= 2)
# Score WCHS via single-factor ML extraction
wchs_items <- d %>% select(WCHS_1:WCHS_10)
fa_mod <- fa(wchs_items, nfactors = 1, rotate = "oblimin", fm = "ml")
d$WCHS_fa <- as.numeric(scale(factor.scores(wchs_items, fa_mod, method = "regression")$scores))
# Recode demographics and other variables (from original code)
d$SES[d$SES == 7] <- NA
d$subjstatus <- 8 - d$subjstatus
d$Education[d$Education == 7] <- NA
d$religdummy <- ifelse(d$Religion %in% 1:2, 0, ifelse(d$Religion %in% 3:7, 1, NA))
d$partydummy <- ifelse(d$PartyID == 1, 0, ifelse(d$PartyID == 2, 1, NA))
d$PoliticalIdeology_reversed <- 8 - d$PoliticalIdeology
# Bifactor model for psychopathology (GAD-7, PHQ-9, DERS, ASI, BRS reversed, neuroticism)
gad_items <- paste0("GAD7_", 1:7)
phq_items <- paste0("PHQ9_", 1:9)
ders_items <- c("DERS_2", "DERS_3", "DERS_5", paste0("DERS_", 7:18))
brs_items <- c("BRS_1", "BRS_2R", "BRS_3", "BRS_4R", "BRS_5", "BRS_6R")
asi_items <- paste0("ASI_", 1:16)
neuro_items <- c("TIPI_4R_ES", "TIPI_9_ES")
psych_data <- d[, c(gad_items, phq_items, ders_items, asi_items, brs_items, neuro_items)]
psych_data[, brs_items] <- 6 - psych_data[, brs_items] # reverse so higher = more pathology
psych_data$TIPI_9_ES <- 8 - psych_data$TIPI_9_ES # reverse so higher = more neurotic
# Fit bifactor CFA via lavaan (omega_h = .86, ECV = .63)
library(lavaan)
psych_cc <- psych_data[complete.cases(psych_data), ]
all_items_psych <- colnames(psych_cc)
bifactor_model <- paste0(
"g =~ ", paste(all_items_psych, collapse = " + "), "\n",
"F_anx_dep =~ ", paste(all_items_psych[grep("GAD7|PHQ9", all_items_psych)], collapse = " + "), "\n",
"F_anx_sens =~ ", paste(all_items_psych[grep("ASI", all_items_psych)], collapse = " + "), "\n",
"F_resil =~ ", paste(all_items_psych[grep("BRS|TIPI", all_items_psych)], collapse = " + "), "\n",
"F_dysreg =~ ", paste(all_items_psych[grep("DERS", all_items_psych)], collapse = " + "), "\n"
)
fit_bi <- cfa(bifactor_model, data = psych_cc, orthogonal = TRUE, estimator = "MLR", std.lv = TRUE)
scores_bi <- scale(lavPredict(fit_bi))
cc <- complete.cases(psych_data)
d$psychopathology <- NA_real_; d$psychopathology[cc] <- scores_bi[, "g"]
d$anx_dep <- NA_real_; d$anx_dep[cc] <- scores_bi[, "F_anx_dep"]
d$anx_sensitivity <- NA_real_; d$anx_sensitivity[cc] <- scores_bi[, "F_anx_sens"]
d$resilience <- NA_real_; d$resilience[cc] <- scores_bi[, "F_resil"]
d$dysregulation <- NA_real_; d$dysregulation[cc] <- scores_bi[, "F_dysreg"]
Analysis
Sex difference
d_sex <- d %>%
filter(Gender %in% c(1, 2)) %>%
mutate(Sex = factor(Gender, levels = c(1, 2), labels = c("Male", "Female")))
# Cohen's d (Female - Male)
m <- tapply(d_sex$WCHS_fa, d_sex$Sex, mean, na.rm = TRUE)
s <- tapply(d_sex$WCHS_fa, d_sex$Sex, sd, na.rm = TRUE)
n <- tapply(d_sex$WCHS_fa, d_sex$Sex, length)
pooled_sd <- sqrt(((n["Male"]-1)*s["Male"]^2 + (n["Female"]-1)*s["Female"]^2) / (sum(n)-2))
cohen_d <- (m["Female"] - m["Male"]) / pooled_sd
p <- ggplot(d_sex, aes(x = Sex, y = WCHS_fa)) +
geom_violin(aes(fill = Sex), alpha = 0.3, width = 0.7) +
geom_boxplot(width = 0.15, outlier.shape = NA) +
stat_summary(fun = mean, geom = "point", size = 3, shape = 18) +
scale_fill_manual(values = c("Male" = "steelblue", "Female" = "coral")) +
labs(x = NULL, y = "WCHS (factor score)", title = "Words Can Harm Scale by Sex") +
theme_bw() +
theme(legend.position = "none") +
annotate("text", x = 1.5, y = max(d_sex$WCHS_fa, na.rm = TRUE) * 0.95,
label = sprintf("d = %.2f", cohen_d), size = 4.5)
p
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m

ggsave("figs/wchs_sex_difference.png", p, width = 5, height = 5, dpi = 300)
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m
Race differences
d_race <- d %>%
mutate(Race = as.character(Race),
Race_single = ifelse(!grepl(",", Race) & Race %in% as.character(1:7), Race, NA),
Race_grp = case_when(
Race_single == "6" ~ "White",
Race_single == "4" ~ "Black",
Race_single == "2" ~ "Asian",
Race_single == "7" ~ "Hispanic",
TRUE ~ NA_character_
)) %>%
filter(!is.na(Race_grp))
race_order <- d_race %>% group_by(Race_grp) %>% summarise(m = mean(WCHS_fa, na.rm = TRUE)) %>% arrange(m)
d_race$Race_grp <- factor(d_race$Race_grp, levels = race_order$Race_grp)
race_stats <- d_race %>%
group_by(Race_grp) %>%
summarise(n = n(), m = mean(WCHS_fa, na.rm = TRUE), s = sd(WCHS_fa, na.rm = TRUE)) %>%
mutate(label = sprintf("n=%d\nM=%.2f", n, m))
p <- ggplot(d_race, aes(x = Race_grp, y = WCHS_fa)) +
geom_violin(aes(fill = Race_grp), alpha = 0.3, width = 0.7) +
geom_boxplot(width = 0.15, outlier.shape = NA) +
stat_summary(fun = mean, geom = "point", size = 3, shape = 18) +
geom_text(data = race_stats, aes(x = Race_grp, y = -3.3, label = label), size = 3.2) +
labs(x = NULL, y = "WCHS (factor score, z)", title = "Words Can Harm Scale by Race") +
theme_bw() +
theme(legend.position = "none")
p
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m

ggsave("figs/wchs_race_difference.png", p, width = 6, height = 5, dpi = 300)
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m
Age and sex
d_sex <- d %>%
filter(Gender %in% c(1, 2)) %>%
mutate(Sex = factor(Gender, levels = c(1, 2), labels = c("Male", "Female")))
p <- ggplot(d_sex, aes(x = Age, y = WCHS_fa, color = Sex)) +
geom_point(alpha = 0.3, size = 1.5) +
geom_smooth(method = "lm", se = TRUE) +
scale_color_manual(values = c("Male" = "steelblue", "Female" = "coral")) +
labs(x = "Age", y = "WCHS (factor score, z)", title = "Words Can Harm Scale by Age and Sex") +
theme_bw()
p
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 2 rows containing non-finite outside the scale range
## (`stat_smooth()`).[39m
## Warning: [37mRemoved 2 rows containing missing values or values outside the scale range
## (`geom_point()`).[39m

ggsave("figs/wchs_age_sex.png", p, width = 7, height = 5, dpi = 300)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 2 rows containing non-finite outside the scale range (`stat_smooth()`).[39m
## [37mRemoved 2 rows containing missing values or values outside the scale range
## (`geom_point()`).[39m
Psychopathology
r_val <- cor(d$WCHS_fa, d$psychopathology, use = "complete.obs")
p <- ggplot(d_sex, aes(x = psychopathology, y = WCHS_fa, color = Sex)) +
geom_point(alpha = 0.3, size = 1.5) +
geom_smooth(method = "lm", se = TRUE) +
scale_color_manual(values = c("Male" = "steelblue", "Female" = "coral")) +
labs(x = "General psychopathology (z)", y = "WCHS (factor score, z)",
title = "Words Can Harm Scale vs. General Psychopathology") +
annotate("text", x = -0.5, y = 1.9, label = sprintf("r = %.2f", r_val), size = 4.5) +
theme_bw()
p
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 3 rows containing non-finite outside the scale range
## (`stat_smooth()`).[39m
## Warning: [37mRemoved 3 rows containing missing values or values outside the scale range
## (`geom_point()`).[39m

ggsave("figs/wchs_psychopathology.png", p, width = 7, height = 5, dpi = 300)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 3 rows containing non-finite outside the scale range (`stat_smooth()`).[39m
## [37mRemoved 3 rows containing missing values or values outside the scale range
## (`geom_point()`).[39m
Political ideology
r_val <- cor(d$WCHS_fa, d$PoliticalIdeology_reversed, use = "complete.obs")
p <- ggplot(d_sex, aes(x = PoliticalIdeology_reversed, y = WCHS_fa, color = Sex)) +
geom_jitter(alpha = 0.3, size = 1.5, width = 0.15) +
geom_smooth(method = "lm", se = TRUE) +
scale_color_manual(values = c("Male" = "steelblue", "Female" = "coral")) +
scale_x_continuous(breaks = 1:7, labels = c("Very\ncons.", "Cons.", "Lean\ncons.", "Moderate", "Lean\nlib.", "Liberal", "Very\nlib.")) +
labs(x = "Political ideology", y = "WCHS (factor score, z)",
title = "Words Can Harm Scale vs. Political Ideology") +
annotate("text", x = 2, y = 1.9, label = sprintf("r = %.2f", r_val), size = 4.5) +
theme_bw()
p
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_smooth()`).[39m
## Warning: [37mRemoved 1 row containing missing values or values outside the scale range
## (`geom_point()`).[39m

ggsave("figs/wchs_politics.png", p, width = 7, height = 5, dpi = 300)
## [37m`geom_smooth()` using formula = 'y ~ x'[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range (`stat_smooth()`).[39m
## [37mRemoved 1 row containing missing values or values outside the scale range
## (`geom_point()`).[39m
Party affiliation
d_party <- d %>%
filter(PartyID %in% 1:5) %>%
mutate(Party = factor(PartyID, levels = c(1, 2, 3, 4, 5),
labels = c("Republican", "Democrat", "Independent", "Libertarian", "None")))
party_stats <- d_party %>%
group_by(Party) %>%
summarise(n = n(), m = mean(WCHS_fa, na.rm = TRUE)) %>%
mutate(label = sprintf("n=%d\nM=%.2f", n, m))
party_order <- party_stats %>% arrange(m)
d_party$Party <- factor(d_party$Party, levels = party_order$Party)
party_stats$Party <- factor(party_stats$Party, levels = party_order$Party)
p <- ggplot(d_party, aes(x = Party, y = WCHS_fa)) +
geom_violin(aes(fill = Party), alpha = 0.3, width = 0.7) +
geom_boxplot(width = 0.15, outlier.shape = NA) +
stat_summary(fun = mean, geom = "point", size = 3, shape = 18) +
geom_text(data = party_stats, aes(x = Party, y = -3.3, label = label), size = 3.2) +
labs(x = NULL, y = "WCHS (factor score, z)", title = "Words Can Harm Scale by Party Affiliation") +
theme_bw() +
theme(legend.position = "none")
p
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m

ggsave("figs/wchs_party.png", p, width = 7, height = 5, dpi = 300)
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_ydensity()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).[39m
## Warning: [37mRemoved 1 row containing non-finite outside the scale range
## (`stat_summary()`).[39m
Regression table
library(modelsummary)
# Standardized regression: progressive models predicting WCHS
d_reg <- d %>%
filter(Gender %in% c(1, 2)) %>%
mutate(
female = as.numeric(Gender == 2),
age = Age,
leftism = PoliticalIdeology_reversed,
Race = as.character(Race),
Race_single = ifelse(!grepl(",", Race) & Race %in% as.character(1:7), Race, NA),
race_grp = case_when(
Race_single == "6" ~ "White",
Race_single == "4" ~ "Black",
Race_single == "2" ~ "Asian",
Race_single == "7" ~ "Hispanic",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(race_grp)) %>%
mutate(
black = as.numeric(race_grp == "Black"),
asian = as.numeric(race_grp == "Asian"),
hispanic = as.numeric(race_grp == "Hispanic")
) %>%
select(WCHS_fa, female, black, asian, hispanic, age, leftism, psychopathology,
anx_dep, anx_sensitivity, resilience, dysregulation) %>%
na.omit()
d_z <- as.data.frame(scale(d_reg))
m1 <- lm(WCHS_fa ~ age + female + black + asian + hispanic, data = d_z)
m2 <- lm(WCHS_fa ~ age + female + black + asian + hispanic + leftism, data = d_z)
m3 <- lm(WCHS_fa ~ age + female + black + asian + hispanic + leftism + psychopathology, data = d_z)
m4 <- lm(WCHS_fa ~ age + female + black + asian + hispanic + leftism + psychopathology + anx_sensitivity, data = d_z)
m5 <- lm(WCHS_fa ~ age + female + black + asian + hispanic + leftism + psychopathology + anx_sensitivity + anx_dep + resilience + dysregulation, data = d_z)
models <- list("M1" = m1, "M2" = m2, "M3" = m3, "M4" = m4, "M5" = m5)
cm <- c(
"age" = "Age", "female" = "Female", "black" = "Black", "asian" = "Asian",
"hispanic" = "Hispanic", "leftism" = "Leftism", "psychopathology" = "Psychopathology (P)",
"anx_sensitivity" = "Anxiety sensitivity", "anx_dep" = "Anxiety/depression",
"resilience" = "Resilience", "dysregulation" = "Dysregulation",
"(Intercept)" = "(Intercept)"
)
gm <- list(
list(raw = "adj.r.squared", clean = "Adj. R²", fmt = 3),
list(raw = "nobs", clean = "N", fmt = 0)
)
tab <- modelsummary(
models, stars = c("*" = 0.01, "**" = 0.001), coef_map = cm, gof_map = gm,
title = "Predicting Words Can Harm Scale (standardized coefficients)",
notes = "* p < .01, ** p < .001. All variables standardized. Race baseline = White.",
output = "gt"
)
tab
Predicting Words Can Harm Scale (standardized coefficients)
| |
M1 |
M2 |
M3 |
M4 |
M5 |
| Age |
-0.072 |
-0.058 |
0.007 |
0.010 |
0.012 |
|
(0.034) |
(0.033) |
(0.035) |
(0.034) |
(0.034) |
| Female |
0.187** |
0.176** |
0.173** |
0.157** |
0.150** |
|
(0.034) |
(0.033) |
(0.032) |
(0.031) |
(0.032) |
| Black |
0.189** |
0.175** |
0.183** |
0.161** |
0.172** |
|
(0.034) |
(0.033) |
(0.033) |
(0.032) |
(0.033) |
| Asian |
-0.000 |
-0.004 |
-0.005 |
0.003 |
0.008 |
|
(0.034) |
(0.033) |
(0.032) |
(0.032) |
(0.032) |
| Hispanic |
0.055 |
0.049 |
0.043 |
0.046 |
0.051 |
|
(0.035) |
(0.034) |
(0.033) |
(0.032) |
(0.032) |
| Leftism |
|
0.244** |
0.245** |
0.256** |
0.250** |
|
|
(0.033) |
(0.032) |
(0.031) |
(0.032) |
| Psychopathology (P) |
|
|
0.186** |
0.171** |
0.169** |
|
|
|
(0.034) |
(0.033) |
(0.033) |
| Anxiety sensitivity |
|
|
|
0.222** |
0.217** |
|
|
|
|
(0.031) |
(0.032) |
| Anxiety/depression |
|
|
|
|
0.031 |
|
|
|
|
|
(0.032) |
| Resilience |
|
|
|
|
0.009 |
|
|
|
|
|
(0.033) |
| Dysregulation |
|
|
|
|
-0.029 |
|
|
|
|
|
(0.033) |
| (Intercept) |
-0.000 |
-0.000 |
-0.000 |
-0.000 |
-0.000 |
|
(0.034) |
(0.033) |
(0.032) |
(0.031) |
(0.031) |
| Adj. R² |
0.076 |
0.134 |
0.163 |
0.211 |
0.210 |
| N |
820 |
820 |
820 |
820 |
820 |
| * p < 0.01, ** p < 0.001 |
| * p < .01, ** p < .001. All variables standardized. Race baseline = White. |
gt::gtsave(tab, "figs/regression_table.png")
## file:////tmp/Rtmp2XHBx6/file242b1573066258.html screenshot completed
# Partial epsilon squared (Type II SS)
library(car)
library(effectsize)
eps <- epsilon_squared(Anova(m5, type = 2), alternative = "two.sided")
label_map <- c(
"age" = "Age", "female" = "Female", "black" = "Black", "asian" = "Asian",
"hispanic" = "Hispanic", "leftism" = "Leftism", "psychopathology" = "Psychopathology (P)",
"anx_sensitivity" = "Anxiety sensitivity", "anx_dep" = "Anxiety/depression",
"resilience" = "Resilience", "dysregulation" = "Dysregulation"
)
eps_df <- data.frame(predictor = eps$Parameter, eps2 = eps$Epsilon2_partial, eps = sqrt(eps$Epsilon2_partial))
eps_df$label <- label_map[as.character(eps_df$predictor)]
eps_df <- eps_df %>% arrange(eps) %>% mutate(label = factor(label, levels = label))
p <- ggplot(eps_df, aes(x = label, y = eps)) +
geom_col(fill = "steelblue", alpha = 0.7) +
geom_text(aes(label = sprintf("%.2f", eps)), hjust = -0.2, size = 3.5) +
coord_flip() +
labs(x = NULL, y = expression(sqrt(partial~epsilon^2)),
title = "Independent Effects on Words Can Harm Scale") +
theme_bw() +
ylim(0, max(eps_df$eps) * 1.15)
p

ggsave("figs/wchs_effect_sizes.png", p, width = 7, height = 5, dpi = 300)