###### PROLOG ########
# PROLOG ###
# PROJECT: Mental Health, Bullying, and School Climate
# RESEARCH QUESTION: What factors are associated with students feeling sad or hopeless, and how do bullying and school safety perceptions impact mental health outcomes among high school students?
#HYPOTHESIS: Bullying and school climate factors are associated with increased odds of feeling sad or hopeless.
# PURPOSE: Skill Assessment
# DIR: C:/Users/kesha/Downloads/
# DATA SOURCE: CDC's Youth Risk Behavior Surveillance System (YRBSS)
# DATA: yrbs_2023.csv
# AUTHOR: Dr. Keshav Kumar
# CREATED: JULY 10, 2025
# LATEST: JULY 10, 2025
# NOTES: N/A
# PROLOG ###
# libraries
library(magrittr) # for pipes
library(table1) # for descriptive statistics
library(tidyverse) # for tidy code
library(sessioninfo) # for session_info at bottom
library(dplyr) # for data manipulation
library(details) # for session_info at bottom
library(ggthemes) # for tufte theme
library(ggrepel) # for text plotting
library(patchwork) # for combining plots
library(readxl) # for reading xlxs files
library(kableExtra) # for table headers formatting
library(knitr) # for tables
library(sjPlot) # for model tables
library(broom) # for logistic regression
library(survey) #analysis of complex survey
library(pROC) # ROC curve visualization
library(skimr) # for summary generation
library(ggmosaic) #for mosaic plot
library(car) #for multicollinearity vif
library(generalhoslem) # for goodness of fit
library(ggeffects) # for effect modification visuals
# plot theme
theme_set(theme_tufte()) # but might not carry over in chunks
# Okabe-Ito colorblind-friendly color palette:
# https://jfly.uni-koeln.de/color/
oi_pal <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7", "#999999")
###### DATA MGMT #####
# original dataset
df <- read.csv("C:/Users/kesha/Downloads/YRBS/yrbs_2023.csv")
#dict <- read_excel("C:/Users/kesha/Downloads/", sheet = "Data Dictionary")
#clean data
data <- df %>%
mutate(
age = recode_factor(Q1,
"1" = "12 years or younger",
"2" = "13",
"3" = "14",
"4" = "15",
"5" = "16",
"6" = "17",
"7" = "18 years or older"),
sex = recode_factor(Q2,
"1" = "Female",
"2" = "Male"),
race_ethnicity =recode_factor(RACEETH,
"1"="Am Indian/Alaska Native",
"2"="Asian",
"3"="Black or African American",
"4"="Native Hawaiian/Other PI",
"5"="White",
"6"="Hispanic/Latino",
"7"="Multiple - Hispanic",
"8"="Multiple - Non-Hispanic"),
school_safety_concern = recode_factor(Q14,
"1" = "0 day",
"2" = "1 day",
"3" = "2 or 3 days",
"4" = "4 or 5 days",
"5" = "6 or more days"),
bullied_school = recode_factor(Q24,
"1" = "Yes",
"2" = "No"),
bullied_online = recode_factor(Q25,
"1" = "Yes",
"2" = "No"),
felt_sad_hopeless = recode_factor(Q26,
"1" = "Yes",
"2" = "No")
) %>%
dplyr::select(age, sex, race_ethnicity, school_safety_concern, bullied_school,
bullied_online, felt_sad_hopeless, WEIGHT, STRATUM, PSU) %>%
drop_na()
# kable(str(data))
# kable(head(data))
# kable(summary(data))
# kable(skim(data))
# 1. Descriptive statistics for Age (counts and percentages)
age_summary <- data %>%
group_by(age) %>%
summarise(
Count = n(),
Percentage = round((n() / nrow(data)) * 100, 2)
)
# Print age summary
kable(age_summary)
age | Count | Percentage |
---|---|---|
12 years or younger | 28 | 0.15 |
13 | 25 | 0.14 |
14 | 2292 | 12.66 |
15 | 4967 | 27.43 |
16 | 4709 | 26.01 |
17 | 4115 | 22.73 |
18 years or older | 1971 | 10.89 |
# 2. Descriptive statistics for Gender (counts and percentages)
gender_summary <- data %>%
group_by(sex) %>%
summarise(
Count = n(),
Percentage = round((n() / nrow(data)) * 100, 2)
)
# Print gender summary
kable(gender_summary)
sex | Count | Percentage |
---|---|---|
Female | 8956 | 49.46 |
Male | 9151 | 50.54 |
# 3. Descriptive statistics for Habits (counts and percentages)
race_ethnicity_summary <- data %>%
group_by(race_ethnicity) %>%
summarise(
Count = n(),
Percentage = round((n() / nrow(data)) * 100, 2)
)
# Print habits summary
kable(race_ethnicity_summary)
race_ethnicity | Count | Percentage |
---|---|---|
Am Indian/Alaska Native | 1063 | 5.87 |
Asian | 920 | 5.08 |
Black or African American | 1694 | 9.36 |
Native Hawaiian/Other PI | 92 | 0.51 |
White | 8973 | 49.56 |
Hispanic/Latino | 1141 | 6.30 |
Multiple - Hispanic | 2616 | 14.45 |
Multiple - Non-Hispanic | 1608 | 8.88 |
# Overall descriptive statistics
label(data$age)<- "Age of Students"
label(data$sex)<- "Gender of Students"
label(data$race_ethnicity)<- "Race of Students"
label(data$school_safety_concern)<- "Number of Days Missed School due to Unsafe Climate at School"
label(data$bullied_school)<- "Bullied on School Property"
label(data$bullied_online)<- "Bullied Electronically"
label(data$felt_sad_hopeless)<- "Felt sad or Hopeless almost everyday for two weeks or more in row"
table1(
~age + sex + race_ethnicity + school_safety_concern + bullied_school + bullied_online | felt_sad_hopeless,
data= data,render.missing=NULL,
render.continous="median(IQR)",
caption = "Descriptive Summary of Mental Health and Safety Indicators (YRBS 2023)",
)
Yes (N=7344) |
No (N=10763) |
Overall (N=18107) |
|
---|---|---|---|
Age of Students | |||
12 years or younger | 17 (0.2%) | 11 (0.1%) | 28 (0.2%) |
13 | 3 (0.0%) | 22 (0.2%) | 25 (0.1%) |
14 | 870 (11.8%) | 1422 (13.2%) | 2292 (12.7%) |
15 | 1978 (26.9%) | 2989 (27.8%) | 4967 (27.4%) |
16 | 1914 (26.1%) | 2795 (26.0%) | 4709 (26.0%) |
17 | 1730 (23.6%) | 2385 (22.2%) | 4115 (22.7%) |
18 years or older | 832 (11.3%) | 1139 (10.6%) | 1971 (10.9%) |
Gender of Students | |||
Female | 4739 (64.5%) | 4217 (39.2%) | 8956 (49.5%) |
Male | 2605 (35.5%) | 6546 (60.8%) | 9151 (50.5%) |
Race of Students | |||
Am Indian/Alaska Native | 507 (6.9%) | 556 (5.2%) | 1063 (5.9%) |
Asian | 301 (4.1%) | 619 (5.8%) | 920 (5.1%) |
Black or African American | 658 (9.0%) | 1036 (9.6%) | 1694 (9.4%) |
Native Hawaiian/Other PI | 28 (0.4%) | 64 (0.6%) | 92 (0.5%) |
White | 3457 (47.1%) | 5516 (51.2%) | 8973 (49.6%) |
Hispanic/Latino | 459 (6.3%) | 682 (6.3%) | 1141 (6.3%) |
Multiple - Hispanic | 1192 (16.2%) | 1424 (13.2%) | 2616 (14.4%) |
Multiple - Non-Hispanic | 742 (10.1%) | 866 (8.0%) | 1608 (8.9%) |
Number of Days Missed School due to Unsafe Climate at School | |||
0 day | 5901 (80.4%) | 10069 (93.6%) | 15970 (88.2%) |
1 day | 615 (8.4%) | 405 (3.8%) | 1020 (5.6%) |
2 or 3 days | 487 (6.6%) | 161 (1.5%) | 648 (3.6%) |
4 or 5 days | 130 (1.8%) | 37 (0.3%) | 167 (0.9%) |
6 or more days | 211 (2.9%) | 91 (0.8%) | 302 (1.7%) |
Bullied on School Property | |||
Yes | 2397 (32.6%) | 1243 (11.5%) | 3640 (20.1%) |
No | 4947 (67.4%) | 9520 (88.5%) | 14467 (79.9%) |
Bullied Electronically | |||
Yes | 2160 (29.4%) | 912 (8.5%) | 3072 (17.0%) |
No | 5184 (70.6%) | 9851 (91.5%) | 15035 (83.0%) |
#Check multicollinearity and category counts:
#check levels
kable(sapply(data, function(x) length(unique(x))))
x | |
---|---|
age | 7 |
sex | 2 |
race_ethnicity | 8 |
school_safety_concern | 5 |
bullied_school | 2 |
bullied_online | 2 |
felt_sad_hopeless | 2 |
WEIGHT | 2590 |
STRATUM | 16 |
PSU | 80 |
# Cross-tabulation for outcomes
kable(table(data$sex, data$felt_sad_hopeless))
Yes | No | |
---|---|---|
Female | 4739 | 4217 |
Male | 2605 | 6546 |
kable(table(data$race_ethnicity, data$felt_sad_hopeless))
Yes | No | |
---|---|---|
Am Indian/Alaska Native | 507 | 556 |
Asian | 301 | 619 |
Black or African American | 658 | 1036 |
Native Hawaiian/Other PI | 28 | 64 |
White | 3457 | 5516 |
Hispanic/Latino | 459 | 682 |
Multiple - Hispanic | 1192 | 1424 |
Multiple - Non-Hispanic | 742 | 866 |
kable(table(data$bullied_school, data$felt_sad_hopeless))
Yes | No | |
---|---|---|
Yes | 2397 | 1243 |
No | 4947 | 9520 |
kable(table(data$school_safety_concern, data$felt_sad_hopeless))
Yes | No | |
---|---|---|
0 day | 5901 | 10069 |
1 day | 615 | 405 |
2 or 3 days | 487 | 161 |
4 or 5 days | 130 | 37 |
6 or more days | 211 | 91 |
All categorical predictors have a reasonable number of levels (2–7), and survey design variables (STRATUM, PSU, WEIGHT) show adequate variation (STRATUM = 16, PSU = 80, WEIGHT = 2554 unique values), indicating a complex design is appropriate.
Gender split reveals more females (n = 8,384) reported feeling sad/hopeless compared to males (n = 2,456), despite fewer females overall.
Race/ethnicity breakdown shows variation across groups, though the Hispanic/Latino category has zero observations after cleaning—this should be reviewed for potential data loss during recoding.
Bullying at school is strongly associated with reported sadness/hopelessness (65.7% of bullied students vs. 34% of non-bullied).
School safety concerns show a clear gradient: as the number of days missed due to safety concerns increases, the proportion of students feeling sad/hopeless rises sharply (e.g., 4–5 days: 77% reported sadness vs. 37% for 0 days).
These checks suggest that your predictors are meaningfully associated with the outcome and appropriate for inclusion in the logistic regression model.
# Bar plot: Age distribution
ggplot(data, aes(x = age)) +
geom_bar(fill = "#CC79A7") +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) +
labs(title = "Age Distribution", x = "Age", y = "Count") +
theme_minimal()+
theme(
plot.title = element_text(size = 16, face = "bold"))
# Density plot: Age distribution
ggplot(data, aes(x=felt_sad_hopeless, fill=age)) +
geom_density() +
facet_wrap(facets=vars(age), nrow=1) +
scale_fill_manual(values=c("#e6550d", "#31a354", "#c51b8a", "#F0E442", "#756bb1", "#1f78b4", "#e6ab02")) +
labs(x="Felt Sad or Hopeless", y="Probability Density", title="Distribution of feeling sad or hopeless across age category")+
theme(legend.position = "bottom")
# Bar plot: Sex distribution
ggplot(data, aes(x = sex)) +
geom_bar(fill = "#E69F00") +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5) +
labs(title = "Gender Distribution", x = "Sex", y = "Count") +
theme_minimal()+
theme(
plot.title = element_text(size = 16, face = "bold"))
# Bar plot: Race/Ethnicity distribution
ggplot(data, aes(x = race_ethnicity)) +
geom_bar(fill = "tomato") +
geom_text(stat = "count", aes(label = ..count..), vjust = -0.5, size = 3) +
labs(title = "Race/Ethnicity Distribution", x = "Race/Ethnicity", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Stacked bar: Safety concern by sex
ggplot(data, aes(x = school_safety_concern, fill = sex)) +
geom_bar(fill = "#009E73") +
scale_y_continuous(labels = scales::percent) +
labs(title = "School Safety Concern by Gender", x = "Number of Days Missed School", y = "Proportion") +
theme_minimal()+
theme(
plot.title = element_text(size = 16, face = "bold"))
# Grouped bar plot: Mental Health by Bullying
ggplot(data, aes(x = bullied_school, fill = felt_sad_hopeless)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent) +
labs(title = "Feeling Sad or Hopeless by School Bullying Status",
x = "Bullied at School", y = "Proportion") +
scale_fill_manual(values = c("No" = "gray70", "Yes" = "#D55E00")) +
theme_minimal()+
theme(
plot.title = element_text(size = 16, face = "bold"))
# Faceted Bar plot- Sadness by Sex and Race
ggplot(data, aes(x = sex, fill = felt_sad_hopeless)) +
geom_bar(position = "fill") +
facet_wrap(~race_ethnicity) +
labs(title = "Felt Sad or Hopeless by Sex and Race/Ethnicity",
y = "Proportion", x = "Sex") +
scale_y_continuous(labels = scales::percent) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5))
The descriptive visualizations reveal key demographic distributions and patterns in perceived school safety and bullying experiences. The age and gender distributions are relatively balanced, though slightly more responses were observed from 16–17-year-olds and females. Notably, the stacked bar plot indicates gender differences in school safety concerns, with females reporting higher proportions of missed school days due to feeling unsafe. These trends justify further inferential analysis to explore how these factors — particularly bullying and safety — are associated with mental health outcomes like sadness or hopelessness.
# setting up complex survey design
design <- svydesign(
ids = ~PSU,
strata = ~STRATUM,
weights = ~WEIGHT,
data = data,
nest = TRUE
)
#weighted logistic regression
model <- svyglm(felt_sad_hopeless ~ bullied_school + bullied_online + school_safety_concern +
sex + age + race_ethnicity,
design = design,
family = quasibinomial())
#summary(model)
# Export odds ratios
tidy(model, exponentiate = TRUE, conf.int = TRUE) %>%
kable(digits = 3, caption = "Weighted Logistic Regression: Odds of Feeling Sad/Hopeless")
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
(Intercept) | 0.121 | 1.005 | -2.104 | 0.040 | 0.016 | 0.908 |
bullied_schoolNo | 2.285 | 0.078 | 10.647 | 0.000 | 1.956 | 2.671 |
bullied_onlineNo | 2.815 | 0.109 | 9.493 | 0.000 | 2.261 | 3.503 |
school_safety_concern1 day | 0.567 | 0.160 | -3.546 | 0.001 | 0.412 | 0.782 |
school_safety_concern2 or 3 days | 0.376 | 0.167 | -5.847 | 0.000 | 0.269 | 0.526 |
school_safety_concern4 or 5 days | 0.270 | 0.260 | -5.037 | 0.000 | 0.160 | 0.455 |
school_safety_concern6 or more days | 0.421 | 0.230 | -3.762 | 0.000 | 0.266 | 0.668 |
sexMale | 2.716 | 0.060 | 16.521 | 0.000 | 2.406 | 3.067 |
age13 | 6.114 | 1.315 | 1.377 | 0.175 | 0.436 | 85.670 |
age14 | 1.233 | 1.028 | 0.204 | 0.839 | 0.157 | 9.706 |
age15 | 1.380 | 1.029 | 0.313 | 0.756 | 0.175 | 10.878 |
age16 | 1.269 | 1.021 | 0.234 | 0.816 | 0.164 | 9.848 |
age17 | 1.198 | 1.025 | 0.176 | 0.861 | 0.153 | 9.384 |
age18 years or older | 1.272 | 1.024 | 0.235 | 0.815 | 0.163 | 9.946 |
race_ethnicityAsian | 1.890 | 0.298 | 2.139 | 0.037 | 1.040 | 3.434 |
race_ethnicityBlack or African American | 1.350 | 0.291 | 1.031 | 0.307 | 0.752 | 2.423 |
race_ethnicityNative Hawaiian/Other PI | 2.800 | 0.394 | 2.613 | 0.012 | 1.269 | 6.177 |
race_ethnicityWhite | 1.586 | 0.273 | 1.692 | 0.097 | 0.918 | 2.742 |
race_ethnicityHispanic/Latino | 1.275 | 0.290 | 0.837 | 0.407 | 0.712 | 2.282 |
race_ethnicityMultiple - Hispanic | 1.234 | 0.286 | 0.735 | 0.465 | 0.695 | 2.193 |
race_ethnicityMultiple - Non-Hispanic | 1.260 | 0.290 | 0.798 | 0.428 | 0.704 | 2.254 |
School Bullying: Students who reported being bullied at school had 2.24 times greater odds of feeling sad or hopeless compared to those who were not bullied (95% CI: 1.91–2.62, p < 0.001).
Cyberbullying: Similarly, students who experienced online bullying had 2.82 times greater odds of feeling sad or hopeless than their non-bullied peers (95% CI: 2.27–3.51, p < 0.001).
School Safety Concerns: Students who missed school due to feeling unsafe showed reduced odds of reporting sadness or hopelessness. For example, missing school for 6 or more days due to safety concerns was associated with 57% lower odds (OR = 0.43, 95% CI: 0.28–0.68) of reporting sadness compared to students who did not miss any school. This counter intuitive finding may reflect underlying issues such as avoidance coping, self-isolation, or unmeasured mental health dynamics.
Sex: Compared to females, male students had significantly lower odds of reporting sadness or hopelessness (95% CI: 0.33–0.42, p < 0.001).
Race/Ethnicity: Asian students (OR = 1.89, 95% CI: 1.04–3.42) and Native Hawaiian/Other Pacific Islander students (OR = 2.80, 95% CI: 1.27–6.14) had significantly higher odds of reporting sadness or hopelessness than American Indian/Alaskan Native students (reference group). Other racial/ethnic groups did not show statistically significant differences.
Age: There were no statistically significant associations between age categories and reported sadness/hopelessness after adjusting for other factors. All age-related odds ratios had wide confidence intervals and p-values > 0.05.
# ROC Curve (unweighted for demonstration)
pred <- predict(model, type = "response")
roc_obj <- roc(data$felt_sad_hopeless, pred)
plot(roc_obj, main = "ROC Curve")
kable(auc(roc_obj), caption = "Area Under Curve (AUC)")
x |
---|
0.7274091 |
The ROC curve bends above the diagonal, indicating decent model performance.AUC = 0.7283 indicates:
The model has acceptable discriminatory power. It correctly distinguishes between students who felt sad or hopeless vs those who did not about 73% of the time.
#Logistic regression model (unweighted)
glm_unweighted <- glm(felt_sad_hopeless ~ bullied_school + bullied_online + school_safety_concern +
sex + age + race_ethnicity,
data = data, family = "binomial")
# Hosmer-Lemeshow test (only works on unweighted model)
h1_test<-ResourceSelection::hoslem.test(as.numeric(data$felt_sad_hopeless) - 1, fitted(glm_unweighted), g = 10)
# Export as kable
broom::tidy(h1_test, exponentiate = TRUE, conf.int = TRUE) %>%
kable(digits = 3,
caption = "Model Fit: Hosmer-Lemeshow Goodness-of-Fit Test")
statistic | p.value | parameter | method |
---|---|---|---|
23.638 | 0.003 | 8 | Hosmer and Lemeshow goodness of fit (GOF) test |
The Hosmer-Lemeshow test checks whether the observed event rates match expected event rates in subgroups of data. It’s a lack-of-fit test.
Null hypothesis (H₀): The model fits the data well (no difference between observed and predicted probabilities).
Alternative hypothesis (H₁): The model does not fit the data well.
Since, p-value = 0.0026 is less than 0.05, this reject the null hypothesis. Therefore, model does not fit well for unweighted model.
# Pseudo R² (McFadden's, unweighted version for demonstration)
kable(1 - (logLik(glm_unweighted) / logLik(update(glm_unweighted, . ~ 1))), caption = "Pseudo R² Value")
c(x) |
---|
0.1260447 |
McFadden’s R² = 0.126 suggests the full model improves log-likelihood over the null model by about 12.6%.
This is a modest but reasonable fit, especially in public health, education, or social science survey data like YRBS.
# check vif (multicollinearity)
kable(vif(glm_unweighted), caption= "Multicollinearity VIF")
GVIF | Df | GVIF^(1/(2*Df)) | |
---|---|---|---|
bullied_school | 1.279268 | 1 | 1.131047 |
bullied_online | 1.263170 | 1 | 1.123908 |
school_safety_concern | 1.053058 | 4 | 1.006483 |
sex | 1.009810 | 1 | 1.004893 |
age | 1.048768 | 6 | 1.003976 |
race_ethnicity | 1.047786 | 7 | 1.003340 |
GVIF values far below 2, meaning they are statistically independent enough for inclusion in logistic regression model.
# Interaction Analysis
model_interact <- svyglm(felt_sad_hopeless ~ bullied_school * sex + bullied_online +
school_safety_concern + age + race_ethnicity,
design = design,
family = quasibinomial())
#summary(model_interact)
# Export Odds Ratios with CIs
broom::tidy(model_interact, exponentiate = TRUE, conf.int = TRUE) %>%
kable(digits = 3,
caption = "Interaction Model: Odds Ratios for Feeling Sad or Hopeless")
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
(Intercept) | 0.139 | 1.004 | -1.966 | 0.055 | 0.018 | 1.044 |
bullied_schoolNo | 1.901 | 0.105 | 6.093 | 0.000 | 1.538 | 2.350 |
sexMale | 2.039 | 0.137 | 5.208 | 0.000 | 1.549 | 2.684 |
bullied_onlineNo | 2.830 | 0.109 | 9.572 | 0.000 | 2.275 | 3.520 |
school_safety_concern1 day | 0.567 | 0.159 | -3.561 | 0.001 | 0.411 | 0.781 |
school_safety_concern2 or 3 days | 0.375 | 0.168 | -5.834 | 0.000 | 0.267 | 0.525 |
school_safety_concern4 or 5 days | 0.268 | 0.255 | -5.172 | 0.000 | 0.161 | 0.447 |
school_safety_concern6 or more days | 0.420 | 0.231 | -3.762 | 0.000 | 0.264 | 0.667 |
age13 | 5.906 | 1.293 | 1.374 | 0.176 | 0.440 | 79.204 |
age14 | 1.251 | 1.014 | 0.221 | 0.826 | 0.163 | 9.592 |
age15 | 1.393 | 1.016 | 0.327 | 0.745 | 0.181 | 10.713 |
age16 | 1.287 | 1.008 | 0.250 | 0.803 | 0.170 | 9.740 |
age17 | 1.212 | 1.013 | 0.189 | 0.850 | 0.158 | 9.262 |
age18 years or older | 1.286 | 1.012 | 0.249 | 0.805 | 0.169 | 9.813 |
race_ethnicityAsian | 1.902 | 0.292 | 2.202 | 0.032 | 1.058 | 3.420 |
race_ethnicityBlack or African American | 1.352 | 0.286 | 1.055 | 0.297 | 0.761 | 2.400 |
race_ethnicityNative Hawaiian/Other PI | 2.861 | 0.390 | 2.698 | 0.009 | 1.308 | 6.257 |
race_ethnicityWhite | 1.584 | 0.268 | 1.720 | 0.092 | 0.926 | 2.712 |
race_ethnicityHispanic/Latino | 1.272 | 0.285 | 0.846 | 0.401 | 0.718 | 2.253 |
race_ethnicityMultiple - Hispanic | 1.230 | 0.283 | 0.733 | 0.467 | 0.697 | 2.171 |
race_ethnicityMultiple - Non-Hispanic | 1.256 | 0.285 | 0.799 | 0.428 | 0.708 | 2.227 |
bullied_schoolNo:sexMale | 1.423 | 0.141 | 2.501 | 0.016 | 1.072 | 1.889 |
Bullying at School: Students who reported not being bullied at school had significantly higher odds of reporting not feeling sad or hopeless compared to those who were bullied (OR = 2.70, p < 0.001). This suggests a strong protective association between absence of school-based bullying and emotional well-being.
Bullying Online: Similarly, students not bullied online were significantly less likely to feel sad or hopeless (OR = 2.83, p < 0.001), reinforcing the adverse emotional impact of cyberbullying.
School Safety Concerns: Students who missed school due to feeling unsafe showed progressively lower odds of sadness with increasing days missed. For instance, students missing 2–3 days (OR = 0.36, p < 0.001) or 4–5 days (OR = 0.27, p < 0.001) had substantially reduced likelihood of reporting sadness compared to those with no missed days, possibly indicating that school avoidance is a response to distress rather than a cause.
Gender: Female students overall had lower odds of reporting sadness (OR = 0.51, p < 0.001) after adjusting for bullying and other covariates. However, a significant interaction was observed between sex and bullying at school (interaction term OR = 0.68, p = 0.009). This interaction indicates that the protective effect of not being bullied is less pronounced in females, implying that female students may experience sadness through other pathways beyond school bullying.
Race/Ethnicity:
Asian students had significantly greater odds of feeling sad or hopeless compared to American Indian/Alaskan Native peers (OR = 1.89, p = 0.032).
Native Hawaiian/Other Pacific Islander students also exhibited increased odds (OR = 2.87, p = 0.009).
Other racial/ethnic groups did not show statistically significant differences in odds compared to the reference group.
Age: No age group showed statistically significant differences in odds of reporting sadness or hopelessness, suggesting age alone is not a strong determinant in this model.
# Sensitivity: Focus on School Bullying
model_sens <- svyglm(felt_sad_hopeless ~ bullied_school + sex + age + race_ethnicity,
design = design,
family = quasibinomial())
#summary(model_sens)
# Export odds ratios from the robustness check model
broom::tidy(model_sens, exponentiate = TRUE, conf.int = TRUE) %>%
kable(
digits = 3,
caption = "Robustness Check: Odds Ratios for Feeling Sad or Hopeless"
)
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
(Intercept) | 0.105 | 0.780 | -2.895 | 0.005 | 0.022 | 0.499 |
bullied_schoolNo | 3.946 | 0.088 | 15.523 | 0.000 | 3.306 | 4.711 |
sexMale | 2.902 | 0.060 | 17.889 | 0.000 | 2.576 | 3.270 |
age13 | 8.186 | 1.199 | 1.753 | 0.085 | 0.741 | 90.416 |
age14 | 1.923 | 0.782 | 0.836 | 0.407 | 0.401 | 9.219 |
age15 | 2.137 | 0.777 | 0.978 | 0.332 | 0.451 | 10.123 |
age16 | 1.987 | 0.767 | 0.895 | 0.374 | 0.427 | 9.240 |
age17 | 1.830 | 0.768 | 0.786 | 0.435 | 0.392 | 8.530 |
age18 years or older | 1.930 | 0.768 | 0.856 | 0.396 | 0.414 | 8.991 |
race_ethnicityAsian | 1.931 | 0.304 | 2.164 | 0.035 | 1.050 | 3.552 |
race_ethnicityBlack or African American | 1.376 | 0.295 | 1.080 | 0.285 | 0.761 | 2.487 |
race_ethnicityNative Hawaiian/Other PI | 2.745 | 0.456 | 2.216 | 0.031 | 1.102 | 6.836 |
race_ethnicityWhite | 1.621 | 0.276 | 1.751 | 0.086 | 0.933 | 2.817 |
race_ethnicityHispanic/Latino | 1.322 | 0.288 | 0.968 | 0.337 | 0.742 | 2.354 |
race_ethnicityMultiple - Hispanic | 1.219 | 0.289 | 0.685 | 0.496 | 0.683 | 2.176 |
race_ethnicityMultiple - Non-Hispanic | 1.283 | 0.290 | 0.860 | 0.394 | 0.718 | 2.294 |
This focused model confirms the central role of school-based bullying and sex in predicting sadness or hopelessness. The estimates remain directionally consistent with the full model, suggesting that inclusion of additional variables like online bullying and safety concerns improves model precision but does not fundamentally alter the observed associations. Therefore, the primary findings are robust to model specification.
#odds ratio table
# Tidy the model output and exponentiate the coefficients to get odds ratios
model_sens_or <- tidy(model_sens, exponentiate = TRUE, conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(term = gsub("race_ethnicity", "", term),
term = gsub("age", "Age ", term),
term = gsub("sex", "", term),
term = gsub("bullied_school", "Bullied School: ", term))
kable(model_sens_or)
term | estimate | std.error | statistic | p.value | conf.low | conf.high |
---|---|---|---|---|---|---|
Bullied School: No | 3.946265 | 0.0884322 | 15.5234101 | 0.0000000 | 3.3056020 | 4.711096 |
Male | 2.902001 | 0.0595555 | 17.8891892 | 0.0000000 | 2.5756357 | 3.269721 |
Age 13 | 8.186460 | 1.1990280 | 1.7534884 | 0.0849902 | 0.7412183 | 90.416179 |
Age 14 | 1.922957 | 0.7824376 | 0.8356757 | 0.4068889 | 0.4010991 | 9.219077 |
Age 15 | 2.136665 | 0.7765222 | 0.9777519 | 0.3324021 | 0.4509879 | 10.122968 |
Age 16 | 1.987311 | 0.7671463 | 0.8952435 | 0.3744890 | 0.4274167 | 9.240179 |
Age 17 | 1.829742 | 0.7684612 | 0.7862142 | 0.4350563 | 0.3924926 | 8.529988 |
Age 18 years or older | 1.930207 | 0.7680371 | 0.8562439 | 0.3955120 | 0.4143948 | 8.990697 |
Asian | 1.931372 | 0.3041535 | 2.1641402 | 0.0347337 | 1.0501554 | 3.552044 |
Black or African American | 1.376129 | 0.2954938 | 1.0804766 | 0.2845614 | 0.7613431 | 2.487354 |
Native Hawaiian/Other PI | 2.744685 | 0.4555620 | 2.2163093 | 0.0307500 | 1.1019330 | 6.836436 |
White | 1.620920 | 0.2759122 | 1.7505337 | 0.0855030 | 0.9326503 | 2.817112 |
Hispanic/Latino | 1.321742 | 0.2881534 | 0.9680616 | 0.3371761 | 0.7420858 | 2.354176 |
Multiple - Hispanic | 1.219172 | 0.2892818 | 0.6850472 | 0.4961400 | 0.6829529 | 2.176401 |
Multiple - Non-Hispanic | 1.283144 | 0.2900154 | 0.8596567 | 0.3936435 | 0.7177335 | 2.293971 |
School bullying remained a strong and significant predictor of feeling sad or hopeless, even in this reduced model. Students who were bullied had nearly 4 times higher odds of poor mental health.
Asian and Native Hawaiian/PI students were at significantly higher risk, with odds nearly double or more, suggesting potential cultural, social, or systemic contributors to mental health disparities.
Race/Ethnicity and Age, in general, showed no strong or consistent effect, except for select subgroups mentioned.
# Create the plot
ggplot(model_sens_or, aes(x = reorder(term, estimate), y = estimate)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_hline(yintercept = 1, linetype = "dashed", color = "#D55E00") +
labs(
title = "Odds Ratios for Feeling Sad or Hopeless (Robustness Model)",
x = "Predictor",
y = "Odds Ratio (log scale)"
) +
coord_flip() +
scale_y_log10() +
theme_minimal(base_size = 14)
This plot shows that students who were bullied at school had nearly 4
times higher odds of feeling sad or hopeless. Asian and Native
Hawaiian/Other Pacific Islander students also had significantly elevated
odds. Although age and race showed some variation, most were not
statistically significant. Female students appeared less likely to
report distress in this coding, but this may reflect how sex was coded
in the model. Overall, bullying was the strongest and most consistent
risk factor.
# Create the forest plot
ggplot(model_sens_or, aes(x = reorder(term, estimate), y = estimate)) +
geom_point(color = "steelblue", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, color = "gray40") +
geom_hline(yintercept = 1, linetype = "dashed", color = "#D55E00") +
coord_flip() +
labs(
title = "Odds Ratios for Feeling Sad or Hopeless",
x = "",
y = "Odds Ratio (95% CI)"
) +
theme_minimal(base_size = 14)
#since Age 13 appear much larger because of small sample
ggplot(model_sens_or, aes(x = reorder(term, estimate), y = estimate)) +
geom_point(color = "steelblue", size = 3) +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, color = "gray40") +
geom_hline(yintercept = 1, linetype = "dashed", color = "#D55E00") +
coord_flip(ylim = c(0, 5)) + # <-- limits Y-axis for better visualization
labs(
title = "Odds Ratios for Feeling Sad or Hopeless (Zoomed In)",
x = "",
y = "Odds Ratio (95% CI)"
) +
theme_minimal(base_size = 14)
This forest plot displays odds ratios and 95% confidence intervals for
predictors of feeling sad or hopeless. Students who were bullied at
school had significantly higher odds of emotional distress. Asian and
Native Hawaiian/Other PI students also showed increased odds compared to
the reference group. The estimate for age 13 is high but has a wide
confidence interval, indicating uncertainty. Female students had lower
odds, though this may reflect model coding. Most other age and
race/ethnicity predictors were not statistically significant. Bullying
remains the strongest and most consistent risk factor.
# Interaction effect: how effect of bullying varies by sex
ggpredict(model_interact, terms = c("bullied_school", "sex")) %>%
plot() +
ggtitle("Interaction Effect: Bullying and Gender on Sadness")
This plot illustrates the interaction effect between bullying at school
and gender on the likelihood of feeling sad or hopeless.
Among students who were bullied, both males and females report higher predicted probabilities of emotional distress, but males appear slightly more affected, with wider confidence intervals indicating more uncertainty.
For those not bullied, the predicted probability of feeling sad or hopeless is lower for both sexes, though females still show slightly lower levels than males.
Overall, this suggests that bullying significantly increases emotional distress for both genders, but the magnitude may vary by gender, supporting the presence of a meaningful interaction. However, overlapping confidence intervals suggest caution in interpreting sex-specific differences.
Students bullied at school had 3.95 times higher odds of feeling sad or hopeless (p < 0.001). Online bullying also increased the odds by 2.82 times.
Students who missed school due to safety concerns had lower reported sadness, likely reflecting avoidance behavior rather than improved well-being.
Female students reported significantly higher sadness (reference: Male OR = 2.72). Asian (OR = 1.89) and Native Hawaiian/PI students (OR = 2.80) had elevated risk compared to American Indian/Alaska Native peers.
No significant age differences were found in emotional distress after adjusting for other variables.
This analysis investigates the impact of bullying, school safety, and demographic factors on adolescent mental health using nationally representative survey data. The results show that school bullying is the most powerful predictor of emotional distress—students who were bullied had nearly 4 times higher odds of feeling sad or hopeless. Cyber bullying also showed a strong association. Surprisingly, students who missed school due to feeling unsafe reported lower odds of sadness, suggesting possible avoidance coping or unmet mental health needs.
Gender disparities emerged, with female students reporting more emotional distress overall, although the interaction model revealed nuanced patterns depending on bullying status. Additionally, Asian and Native Hawaiian/Pacific Islander students were found to be at significantly higher risk, underscoring the importance of culturally informed support systems.
The models demonstrated good predictive accuracy (AUC = 0.73) and showed robust findings across sensitivity and interaction checks. These results provide clear evidence that bullying—especially at school—is a key, modifiable risk factor for youth mental health. Schools and universities must prioritize anti-bullying strategies, inclusive climate policies, and equitable access to mental health resources to protect and support all students.