library(tidyverse); library(moments)
# 1.1. Load raw data
raw_data <- read.csv("C:/Users/Admin/Documents/NCKH JC/Bài Jetlag/Bài 2. validation/Jet_validation.csv", stringsAsFactors = FALSE, na.strings = c("", "NA"))
# 1.2. Select, Clean, and Recode (The "Bulletproof" Transformation)
Jet_val <- raw_data %>%
select(Sex_raw = gender_code, Year_raw = year, Residence_raw = residence_code,
E1 = E.1_raw, E2 = E.2_raw, E3 = E.3_raw,
PHQ9_raw = PHQ9_total, ISI_raw = ISI_total, SJL_raw = SJL_hours,
Chronotype_raw = MEQ_evening2, Screen_time = D2_total_hours) %>%
drop_na(E1, E2, E3) %>%
mutate(
# [FIX 1] Rescale BPS-3 from 0-4 to 1-5 (Essential for Floor/Ceiling Analysis)
E1 = E1 + 1, E2 = E2 + 1, E3 = E3 + 1, BPS_Total = E1 + E2 + E3,
# [FIX 2] Residence Grouping (Merging groups < 5 to ensure statistical power)
Residence = factor(case_when(Residence_raw == 1 ~ "Dormitory", Residence_raw == 2 ~ "Rented",
Residence_raw %in% c(3, 4) ~ "Family/Others", TRUE ~ "Unknown"),
levels = c("Dormitory", "Rented", "Family/Others")),
# Demographic & Clinical Categorization
Sex = factor(Sex_raw, levels = c(0, 1), labels = c("Male", "Female")),
Year = factor(Year_raw),
Depression_Cat = factor(ifelse(PHQ9_raw >= 10, "Clinical Depression", "Normal"), levels = c("Normal", "Clinical Depression")),
Insomnia_Cat = factor(ifelse(ISI_raw >= 10, "Clinical Insomnia", "Normal"), levels = c("Normal", "Clinical Insomnia")),
SJL_Cat = factor(ifelse(SJL_raw >= 1, "SJL >= 1h", "SJL < 1h"), levels = c("SJL < 1h", "SJL >= 1h")),
Chronotype = factor(Chronotype_raw, levels = c("Non-evening", "Evening"))
)
# [FIX 3] Quick Check (Properly separated line)
cat("Dataset dimensions:", dim(Jet_val), "\n"); head(Jet_val, 5)Dataset dimensions: 113 19
Sex_raw Year_raw Residence_raw E1 E2 E3 PHQ9_raw ISI_raw SJL_raw
1 0 3 2 4 3 3 15 17 10.5666667
2 1 2 1 3 3 3 18 7 0.7500000
3 1 2 2 4 1 3 2 2 1.0416667
4 0 2 2 5 5 5 22 17 3.2500000
5 1 2 2 3 2 2 3 8 0.4583333
Chronotype_raw Screen_time BPS_Total Residence Sex Year
1 Non-evening 20.66667 10 Rented Male 3
2 Non-evening 5.25000 9 Dormitory Female 2
3 Non-evening 5.50000 8 Rented Female 2
4 Evening 9.00000 15 Rented Male 2
5 Non-evening 12.66667 7 Rented Female 2
Depression_Cat Insomnia_Cat SJL_Cat Chronotype
1 Clinical Depression Clinical Insomnia SJL >= 1h Non-evening
2 Clinical Depression Normal SJL < 1h Non-evening
3 Normal Normal SJL >= 1h Non-evening
4 Clinical Depression Clinical Insomnia SJL >= 1h Evening
5 Normal Normal SJL < 1h Non-evening
library(dplyr); library(ggplot2); library(ggpubr)
# 1. FUNCTION FOR AUTOMATED DIAGNOSTICS (Targeting 'Jet_val')
run_diag <- function(v, label) {
dat <- na.omit(Jet_val[[v]])
# Histogram & Q-Q Plot using ggpubr
p1 <- gghistogram(Jet_val, x = v, fill = "steelblue", color = "white", add = "mean", rug = TRUE, title = paste("Histogram:", label), xlab = label)
p2 <- ggqqplot(Jet_val, x = v, color = "steelblue", title = paste("Q-Q Plot:", label))
print(ggarrange(p1, p2, ncol = 2))
if(length(dat) >= 3 && length(dat) <= 5000) {
sw <- shapiro.test(dat)
cat(sprintf("=> [%s]: W = %.3f, p = %.2e (%s)\n", label, sw$statistic, sw$p.value, ifelse(sw$p.value < 0.05, "Non-normal", "Normal")))
}
}
# 2. VARIABLE LIST (Ensuring names match Jet_val columns)
test_list <- list(
c("BPS_Total", "Bedtime Procrastination (BPS-3)"),
c("PHQ9_raw", "Depressive Symptoms (PHQ-9)"),
c("ISI_raw", "Insomnia Severity (ISI)"),
c("SJL_raw", "Social Jetlag (Hours)"),
c("Screen_time", "Daily Screen Time (Hours)")
)
# 3. EXECUTE LOOP (x[1] for variable, x[2] for label)
invisible(lapply(test_list, function(x) run_diag(x[1], x[2])))=> [Bedtime Procrastination (BPS-3)]: W = 0.972, p = 1.70e-02 (Non-normal)
=> [Depressive Symptoms (PHQ-9)]: W = 0.946, p = 1.67e-04 (Non-normal)
=> [Insomnia Severity (ISI)]: W = 0.981, p = 1.11e-01 (Normal)
=> [Social Jetlag (Hours)]: W = 0.732, p = 4.50e-13 (Non-normal)
=> [Daily Screen Time (Hours)]: W = 0.911, p = 1.40e-06 (Non-normal)
library(dplyr); library(gtsummary); library(flextable); library(officer)
# 1. DATA PREP: Gộp nhóm và gán nhãn lại cho chuẩn quốc tế
Jet_val_t1 <- Jet_val %>% mutate(
Residence = factor(case_when(
Residence_raw == 1 ~ "Dormitory",
Residence_raw == 2 ~ "Private Rental",
Residence_raw %in% c(3, 4) ~ "Family/Others"),
levels = c("Dormitory", "Private Rental", "Family/Others")),
Year = factor(Year_raw, labels = c("First Year", "Second Year", "Third Year"))
)
# 2. XUẤT BẢNG TABLE 1 CHUẨN APA-7
theme_gtsummary_compact()
Table1_APA <- Jet_val_t1 %>%
select(Sex, Year, Residence, Chronotype, SJL_Cat, Insomnia_Cat, Depression_Cat,
BPS_Total, PHQ9_raw, ISI_raw, SJL_raw, Screen_time) %>%
tbl_summary(
type = list(all_continuous() ~ "continuous", all_categorical() ~ "categorical"),
statistic = list(
all_continuous() ~ "{median} ({p25}–{p75})", # Dùng dấu gạch ngang en-dash (–)
all_categorical() ~ "{n} ({p}%)"
),
label = list(
Sex ~ "Biological Sex",
Year ~ "Year of Study",
Residence ~ "Residential Status",
Chronotype ~ "Chronotype (Evening Preference)",
SJL_Cat ~ "Social Jetlag (SJL) Category",
Insomnia_Cat ~ "Insomnia Severity (ISI ≥ 10)",
Depression_Cat ~ "Depressive Symptoms (PHQ-9 ≥ 10)",
BPS_Total ~ "VN-BPS-3 Total Score",
PHQ9_raw ~ "PHQ-9 Total Score",
ISI_raw ~ "ISI Total Score",
SJL_raw ~ "Social Jetlag (hours)",
Screen_time ~ "Daily Screen Time (hours)"
)
) %>%
modify_header(label = "**Characteristics**", stat_0 = "**Total (N = 113)**") %>%
modify_footnote(everything() ~ "Values are presented as n (%) or Median (IQR).") %>%
bold_labels() %>%
as_flex_table() %>%
font(fontname = "Times New Roman", part = "all") %>%
fontsize(size = 11, part = "all") %>%
border_remove() %>%
hline_top(border = fp_border_default(width = 1.5), part = "header") %>%
hline_bottom(border = fp_border_default(width = 1), part = "header") %>%
hline_bottom(border = fp_border_default(width = 1.5), part = "body") %>%
autofit()
# Hiển thị bảng
Table1_APACharacteristics1 | Total (N = 113)1 |
|---|---|
Biological Sex | |
Male | 42 (37%) |
Female | 71 (63%) |
Year of Study | |
First Year | 51 (45%) |
Second Year | 31 (27%) |
Third Year | 31 (27%) |
Residential Status | |
Dormitory | 16 (14%) |
Private Rental | 82 (73%) |
Family/Others | 15 (13%) |
Chronotype (Evening Preference) | |
Non-evening | 101 (89%) |
Evening | 12 (11%) |
Social Jetlag (SJL) Category | |
SJL < 1h | 45 (40%) |
SJL >= 1h | 68 (60%) |
Insomnia Severity (ISI ≥ 10) | |
Normal | 62 (55%) |
Clinical Insomnia | 51 (45%) |
Depressive Symptoms (PHQ-9 ≥ 10) | |
Normal | 71 (63%) |
Clinical Depression | 42 (37%) |
VN-BPS-3 Total Score | 9.00 (7.00–12.00) |
PHQ-9 Total Score | 8 (5–12) |
ISI Total Score | 9 (6–12) |
Social Jetlag (hours) | 1.25 (0.63–2.00) |
Daily Screen Time (hours) | 7.3 (5.2–10.3) |
1Values are presented as n (%) or Median (IQR). | |
3.2
library(dplyr); library(psych); library(lavaan); library(flextable); library(officer)
Jet_val_table <- Jet_val
# 1. TÍNH TOÁN RELIABILITY & CITC & ALPHA IF DELETED
items <- Jet_val_table %>% select(E1, E2, E3); n_v <- nrow(na.omit(items))
alpha_res <- psych::alpha(items); omega_res <- suppressWarnings(psych::omega(items, nfactors = 1, plot = FALSE))
# 2. TÍNH TOÁN CFA FACTOR LOADINGS (WLSMV)
cfa_fit <- cfa('BPS =~ E1 + E2 + E3', data = Jet_val_table, ordered = c("E1", "E2", "E3"), estimator = "WLSMV")
cfa_loadings <- standardizedSolution(cfa_fit) %>% filter(op == "=~") %>% pull(est.std)
# 3. TÍNH TOÁN MEDIAN (IQR) THEO ĐỊNH DẠNG CHUẨN
med_iqr <- apply(items, 2, function(x) {
sprintf("%.0f (%.0f - %.0f)", median(x, na.rm=T), quantile(x, 0.25, na.rm=T), quantile(x, 0.75, na.rm=T)) })
# 4. TẠO DATAFRAME TỔNG HỢP (THE MONEY TABLE)
T2_data <- data.frame(
Item = c("Item 1: Later than intended", "Item 2: Other things at bedtime", "Item 3: Easily distracted"),
Median_IQR = med_iqr,
Floor = round(apply(items, 2, function(x) sum(x == 1, na.rm=T)/n_v*100), 1),
Ceiling = round(apply(items, 2, function(x) sum(x == 5, na.rm=T)/n_v*100), 1),
CITC = round(alpha_res$item.stats$r.drop, 2),
Alpha_Del = round(alpha_res$alpha.drop$raw_alpha, 2),
Loading = round(cfa_loadings, 2))
# 5. XUẤT FLEXTABLE VỪA KHÍT TRANG A4
Table2_APA <- flextable(T2_data) %>%
set_header_labels(Item="VN-BPS-3 Items", Median_IQR="Median (IQR)", Floor="Floor (%)", Ceiling="Ceiling (%)", Alpha_Del="α if deleted", Loading="Factor Loading") %>%
add_footer_lines(paste0("Note. Overall α = ", round(alpha_res$total$raw_alpha, 2), ", ω = ", round(omega_res$omega.tot, 2), ". All factor loadings p < 0.001.")) %>%
font(fontname = "Times New Roman", part = "all") %>% fontsize(size = 11, part = "all") %>% border_remove() %>%
hline_top(border = fp_border_default(width = 1.5), part = "header") %>%
hline_bottom(border = fp_border_default(width = 1), part = "header") %>%
hline_bottom(border = fp_border_default(width = 1.5), part = "body") %>%
align(j = 2:7, align = "center", part = "all") %>% set_table_properties(layout = "autofit", width = 1)
print(Table2_APA)a flextable object.
col_keys: `Item`, `Median_IQR`, `Floor`, `Ceiling`, `CITC`, `Alpha_Del`, `Loading`
header has 1 row(s)
body has 3 row(s)
original dataset sample:
'data.frame': 3 obs. of 7 variables:
$ Item : chr "Item 1: Later than intended" "Item 2: Other things at bedtime" "Item 3: Easily distracted"
$ Median_IQR: chr "3 (3 - 4)" "3 (2 - 4)" "3 (2 - 4)"
$ Floor : num 4.4 8.8 8
$ Ceiling : num 8.8 8.8 10.6
$ CITC : num 0.69 0.79 0.73
$ Alpha_Del : num 0.84 0.75 0.81
$ Loading : num 0.8 0.93 0.85
library(lavaan); library(semPlot)
# 1. MODEL ESTIMATION (WLSMV for Ordinal Data)
cfa_model <- 'BPS =~ E1 + E2 + E3'
fit_cfa <- cfa(cfa_model, data = Jet_val_table, ordered = c("E1","E2","E3"), estimator = "WLSMV")
# 2. HIGH-RESOLUTION EXPORT (300 DPI)
png("C:/Users/Admin/Downloads/Figure_1_Final_Q1.png", width = 2400, height = 1800, res = 300)
# 3. SEMPATHS WITH LISREL STYLE (SOLID LINES ONLY)
semPaths(fit_cfa,
whatLabels = "std", # Show standardized loadings
layout = "tree2", # Top-down tree structure
style = "lisrel", # Force solid lines for consistency
intercepts = F, residuals = F, thresholds = F,
edge.color = "black", edge.width = 1.5, edge.label.cex = 1.3,
color = list(lat = "#E8F0FE", man = "#FFFFFF"),
sizeLat = 12, sizeMan = 10,
nodeLabels = c("Item 1", "Item 2", "Item 3", "Bedtime\nProcrastination"),
label.cex = 1.2,
mar = c(15, 5, 8, 5)) # Increase bottom margin for text safety
# 4. ANNOTATION (Fixed coordinates to avoid overlapping with Item 1)
text(x = -1.1, y = -1.45, adj = 0, cex = 0.8, font = 3, col = "grey30",
labels = "Estimator: WLSMV\nModel Status: Just-identified (df = 0)\nStandardized Factor Loadings: all p < 0.001")
dev.off()png
2
# Render directly to Rmd for preview
semPaths(fit_cfa, whatLabels = "std", layout = "tree2", style = "lisrel", intercepts = F, residuals = F, thresholds = F, edge.color = "black", color = list(lat = "#E8F0FE", man = "#FFFFFF"), sizeLat = 12, sizeMan = 10, nodeLabels = c("Item 1", "Item 2", "Item 3", "BPS"), mar = c(5, 5, 5, 5))library(dplyr); library(ggplot2); library(ggpubr); library(reshape2)
# --- PART 1: RAW DATA AUDIT (CONSOLIDATED) ---
cat("=== 3.4A. CONVERGENT VALIDITY (SPEARMAN) ===\n")=== 3.4A. CONVERGENT VALIDITY (SPEARMAN) ===
cv_vars <- c("PHQ9_raw", "ISI_raw", "SJL_raw", "Screen_time")
labels_cv <- c("Depression", "Insomnia", "Social Jetlag", "Screen Time")
invisible(lapply(1:4, function(i) {
res <- cor.test(Jet_val_table$BPS_Total, Jet_val_table[[cv_vars[i]]], method="spearman", exact=F)
cat(sprintf("BPS-3 vs %-15s: rho=%.3f | p=%.2e %s\n", labels_cv[i], res$estimate, res$p.value, ifelse(res$p.value<0.05, "(*)", "(ns)"))) }))BPS-3 vs Depression : rho=0.468 | p=1.68e-07 (*)
BPS-3 vs Insomnia : rho=0.212 | p=2.42e-02 (*)
BPS-3 vs Social Jetlag : rho=0.357 | p=1.01e-04 (*)
BPS-3 vs Screen Time : rho=0.284 | p=2.33e-03 (*)
=== 3.4B. KNOWN-GROUPS VALIDITY (MANN-WHITNEY U) ===
kg_vars <- c("Depression_Cat", "Insomnia_Cat", "SJL_Cat", "Chronotype")
invisible(lapply(kg_vars, function(g) {
wt <- wilcox.test(as.formula(paste("BPS_Total ~", g)), data=Jet_val_table, exact=F)
cat(sprintf("[%s] p=%.2e %s\n", g, wt$p.value, ifelse(wt$p.value<0.05, "(*)", "(ns)"))) }))[Depression_Cat] p=1.22e-03 (*)
[Insomnia_Cat] p=4.87e-02 (*)
[SJL_Cat] p=9.63e-04 (*)
[Chronotype] p=9.18e-02 (ns)
# --- PART 2: FIGURE 2 - CUSTOM TRIANGULAR HEATMAP (APA 7th STYLE) ---
cor_data <- Jet_val_table %>% select(BPS_Total, PHQ9_raw, ISI_raw, SJL_raw, Screen_time) %>%
rename(`BPS-3`=BPS_Total, Depression=PHQ9_raw, Insomnia=ISI_raw, SJL=SJL_raw, `Screen Time`=Screen_time)
cormat <- cor(cor_data, method="spearman", use="complete.obs"); pmat <- ggcorrplot::cor_pmat(cor_data, method="spearman")
cormat[upper.tri(cormat)] <- NA; pmat[upper.tri(pmat)] <- NA
cor_melt <- reshape2::melt(cormat, na.rm=T); p_melt <- reshape2::melt(pmat, na.rm=T); cor_melt$pvalue <- p_melt$value
cor_melt <- cor_melt %>% mutate(sig = case_when(pvalue < 0.001 ~ "***", pvalue < 0.01 ~ "**", pvalue < 0.05 ~ "*", T ~ ""),
label = ifelse(Var1 == Var2, "", paste0(sprintf("%.2f", value), sig)))
Fig2 <- ggplot(cor_melt, aes(Var2, Var1, fill = value)) + geom_tile(color = "white", size = 1) +
scale_fill_gradient2(low = "#4575B4", high = "#D73027", mid = "white", midpoint = 0, limit = c(-1,1), name="Spearman's ρ") +
geom_text(aes(label = label), color = "black", size = 4.5, family="serif", fontface="bold") + theme_minimal() +
theme(text = element_text(family="serif", size=12), axis.title = element_blank(), panel.grid.major = element_blank())
# ggsave("C:/Users/Admin/Downloads/Figure_2_Heatmap_Final.png", plot=Fig2, width=6, height=6, dpi=300)
# --- PART 3: FIGURE 3 - MINIMALIST BOXPLOTS (APA-7 P-VALUE FORMAT) ---
Jet_val_clean <- Jet_val_table %>% mutate(
Dep = factor(ifelse(Depression_Cat == "Normal", "Normal", "Clinical\nDepression"), levels=c("Normal", "Clinical\nDepression")),
Ins = factor(ifelse(Insomnia_Cat == "Normal", "Normal", "Clinical\nInsomnia"), levels=c("Normal", "Clinical\nInsomnia")),
SJL = factor(ifelse(SJL_Cat == "SJL < 1h", "SJL < 1h", "SJL ≥ 1h"), levels=c("SJL < 1h", "SJL ≥ 1h")),
Chr = factor(Chronotype, levels=c("Non-evening", "Evening")))
plot_box <- function(data, x, y_lab="") {
ggboxplot(data, x=x, y="BPS_Total", fill=x, palette="npg", ylab=y_lab, xlab="", legend="none", width=0.6) +
scale_y_continuous(limits=c(3, 16), breaks=seq(3, 15, 3)) +
stat_compare_means(method="wilcox.test", aes(label = ifelse(..p.. < 0.001, "p < 0.001", sprintf("p = %.3f", ..p..))),
label.x=1.5, label.y=15.5, hjust=0.5, size=4, fontface="italic", family="serif") +
theme(text=element_text(family="serif", size=13)) }
p1 <- plot_box(Jet_val_clean, "Dep", "BPS-3 Score"); p2 <- plot_box(Jet_val_clean, "Ins", "")
p3 <- plot_box(Jet_val_clean, "SJL", "BPS-3 Score"); p4 <- plot_box(Jet_val_clean, "Chr", "")
Fig3 <- ggarrange(p1, p2, p3, p4, ncol=2, nrow=2, labels=c("A", "B", "C", "D"), font.label=list(size=15, face="bold", family="serif"))
# ggsave("C:/Users/Admin/Downloads/Figure_3_Boxplots_Final.png", plot=Fig3, width=10, height=8, dpi=300)
print(Fig2); print(Fig3)