library(tidyverse); library(moments)

# 1.1. Load raw data
raw_data <- read.csv("C:/Users/Admin/Documents/NCKH JC/Bài Jetlag/Bài 2. validation/Jet_validation.csv", stringsAsFactors = FALSE, na.strings = c("", "NA"))

# 1.2. Select, Clean, and Recode (The "Bulletproof" Transformation)
Jet_val <- raw_data %>%
  select(Sex_raw = gender_code, Year_raw = year, Residence_raw = residence_code,
         E1 = E.1_raw, E2 = E.2_raw, E3 = E.3_raw,
         PHQ9_raw = PHQ9_total, ISI_raw = ISI_total, SJL_raw = SJL_hours,
         Chronotype_raw = MEQ_evening2, Screen_time = D2_total_hours) %>%
  drop_na(E1, E2, E3) %>%
  mutate(
    # [FIX 1] Rescale BPS-3 from 0-4 to 1-5 (Essential for Floor/Ceiling Analysis)
    E1 = E1 + 1, E2 = E2 + 1, E3 = E3 + 1, BPS_Total = E1 + E2 + E3,
    
    # [FIX 2] Residence Grouping (Merging groups < 5 to ensure statistical power)
    Residence = factor(case_when(Residence_raw == 1 ~ "Dormitory", Residence_raw == 2 ~ "Rented", 
                                 Residence_raw %in% c(3, 4) ~ "Family/Others", TRUE ~ "Unknown"),
                       levels = c("Dormitory", "Rented", "Family/Others")),
    
    # Demographic & Clinical Categorization
    Sex = factor(Sex_raw, levels = c(0, 1), labels = c("Male", "Female")),
    Year = factor(Year_raw),
    Depression_Cat = factor(ifelse(PHQ9_raw >= 10, "Clinical Depression", "Normal"), levels = c("Normal", "Clinical Depression")),
    Insomnia_Cat = factor(ifelse(ISI_raw >= 10, "Clinical Insomnia", "Normal"), levels = c("Normal", "Clinical Insomnia")),
    SJL_Cat = factor(ifelse(SJL_raw >= 1, "SJL >= 1h", "SJL < 1h"), levels = c("SJL < 1h", "SJL >= 1h")),
    Chronotype = factor(Chronotype_raw, levels = c("Non-evening", "Evening"))
  )

# [FIX 3] Quick Check (Properly separated line)
cat("Dataset dimensions:", dim(Jet_val), "\n"); head(Jet_val, 5)
Dataset dimensions: 113 19 
  Sex_raw Year_raw Residence_raw E1 E2 E3 PHQ9_raw ISI_raw    SJL_raw
1       0        3             2  4  3  3       15      17 10.5666667
2       1        2             1  3  3  3       18       7  0.7500000
3       1        2             2  4  1  3        2       2  1.0416667
4       0        2             2  5  5  5       22      17  3.2500000
5       1        2             2  3  2  2        3       8  0.4583333
  Chronotype_raw Screen_time BPS_Total Residence    Sex Year
1    Non-evening    20.66667        10    Rented   Male    3
2    Non-evening     5.25000         9 Dormitory Female    2
3    Non-evening     5.50000         8    Rented Female    2
4        Evening     9.00000        15    Rented   Male    2
5    Non-evening    12.66667         7    Rented Female    2
       Depression_Cat      Insomnia_Cat   SJL_Cat  Chronotype
1 Clinical Depression Clinical Insomnia SJL >= 1h Non-evening
2 Clinical Depression            Normal  SJL < 1h Non-evening
3              Normal            Normal SJL >= 1h Non-evening
4 Clinical Depression Clinical Insomnia SJL >= 1h     Evening
5              Normal            Normal  SJL < 1h Non-evening

1.5. Normality Diagnostics (Comprehensive Triangulation)

library(dplyr); library(ggplot2); library(ggpubr)

# 1. FUNCTION FOR AUTOMATED DIAGNOSTICS (Targeting 'Jet_val')
run_diag <- function(v, label) {
  dat <- na.omit(Jet_val[[v]])
  
  # Histogram & Q-Q Plot using ggpubr
  p1 <- gghistogram(Jet_val, x = v, fill = "steelblue", color = "white", add = "mean", rug = TRUE, title = paste("Histogram:", label), xlab = label)
  p2 <- ggqqplot(Jet_val, x = v, color = "steelblue", title = paste("Q-Q Plot:", label))
  
  print(ggarrange(p1, p2, ncol = 2))
  
  if(length(dat) >= 3 && length(dat) <= 5000) {
    sw <- shapiro.test(dat)
    cat(sprintf("=> [%s]: W = %.3f, p = %.2e (%s)\n", label, sw$statistic, sw$p.value, ifelse(sw$p.value < 0.05, "Non-normal", "Normal")))
  }
}

# 2. VARIABLE LIST (Ensuring names match Jet_val columns)
test_list <- list(
  c("BPS_Total", "Bedtime Procrastination (BPS-3)"),
  c("PHQ9_raw", "Depressive Symptoms (PHQ-9)"),
  c("ISI_raw", "Insomnia Severity (ISI)"),
  c("SJL_raw", "Social Jetlag (Hours)"),
  c("Screen_time", "Daily Screen Time (Hours)")
)

# 3. EXECUTE LOOP (x[1] for variable, x[2] for label)
invisible(lapply(test_list, function(x) run_diag(x[1], x[2])))

=> [Bedtime Procrastination (BPS-3)]: W = 0.972, p = 1.70e-02 (Non-normal)

=> [Depressive Symptoms (PHQ-9)]: W = 0.946, p = 1.67e-04 (Non-normal)

=> [Insomnia Severity (ISI)]: W = 0.981, p = 1.11e-01 (Normal)

=> [Social Jetlag (Hours)]: W = 0.732, p = 4.50e-13 (Non-normal)

=> [Daily Screen Time (Hours)]: W = 0.911, p = 1.40e-06 (Non-normal)

3.1

library(dplyr); library(gtsummary); library(flextable); library(officer)

# 1. DATA PREP: Gộp nhóm và gán nhãn lại cho chuẩn quốc tế
Jet_val_t1 <- Jet_val %>% mutate(
    Residence = factor(case_when(
      Residence_raw == 1 ~ "Dormitory", 
      Residence_raw == 2 ~ "Private Rental", 
      Residence_raw %in% c(3, 4) ~ "Family/Others"),
      levels = c("Dormitory", "Private Rental", "Family/Others")),
    Year = factor(Year_raw, labels = c("First Year", "Second Year", "Third Year"))
  )

# 2. XUẤT BẢNG TABLE 1 CHUẨN APA-7
theme_gtsummary_compact()

Table1_APA <- Jet_val_t1 %>% 
  select(Sex, Year, Residence, Chronotype, SJL_Cat, Insomnia_Cat, Depression_Cat, 
         BPS_Total, PHQ9_raw, ISI_raw, SJL_raw, Screen_time) %>%
  tbl_summary(
    type = list(all_continuous() ~ "continuous", all_categorical() ~ "categorical"),
    statistic = list(
      all_continuous() ~ "{median} ({p25}–{p75})", # Dùng dấu gạch ngang en-dash (–)
      all_categorical() ~ "{n} ({p}%)"
    ),
    label = list(
      Sex ~ "Biological Sex",
      Year ~ "Year of Study",
      Residence ~ "Residential Status",
      Chronotype ~ "Chronotype (Evening Preference)",
      SJL_Cat ~ "Social Jetlag (SJL) Category",
      Insomnia_Cat ~ "Insomnia Severity (ISI ≥ 10)",
      Depression_Cat ~ "Depressive Symptoms (PHQ-9 ≥ 10)",
      BPS_Total ~ "VN-BPS-3 Total Score",
      PHQ9_raw ~ "PHQ-9 Total Score",
      ISI_raw ~ "ISI Total Score",
      SJL_raw ~ "Social Jetlag (hours)",
      Screen_time ~ "Daily Screen Time (hours)"
    )
  ) %>%
  modify_header(label = "**Characteristics**", stat_0 = "**Total (N = 113)**") %>%
  modify_footnote(everything() ~ "Values are presented as n (%) or Median (IQR).") %>%
  bold_labels() %>% 
  as_flex_table() %>%
  font(fontname = "Times New Roman", part = "all") %>% 
  fontsize(size = 11, part = "all") %>% 
  border_remove() %>%
  hline_top(border = fp_border_default(width = 1.5), part = "header") %>% 
  hline_bottom(border = fp_border_default(width = 1), part = "header") %>% 
  hline_bottom(border = fp_border_default(width = 1.5), part = "body") %>% 
  autofit()

# Hiển thị bảng
Table1_APA

Characteristics1

Total (N = 113)1

Biological Sex

Male

42 (37%)

Female

71 (63%)

Year of Study

First Year

51 (45%)

Second Year

31 (27%)

Third Year

31 (27%)

Residential Status

Dormitory

16 (14%)

Private Rental

82 (73%)

Family/Others

15 (13%)

Chronotype (Evening Preference)

Non-evening

101 (89%)

Evening

12 (11%)

Social Jetlag (SJL) Category

SJL < 1h

45 (40%)

SJL >= 1h

68 (60%)

Insomnia Severity (ISI ≥ 10)

Normal

62 (55%)

Clinical Insomnia

51 (45%)

Depressive Symptoms (PHQ-9 ≥ 10)

Normal

71 (63%)

Clinical Depression

42 (37%)

VN-BPS-3 Total Score

9.00 (7.00–12.00)

PHQ-9 Total Score

8 (5–12)

ISI Total Score

9 (6–12)

Social Jetlag (hours)

1.25 (0.63–2.00)

Daily Screen Time (hours)

7.3 (5.2–10.3)

1Values are presented as n (%) or Median (IQR).

# save_as_docx(Table1_APA, path = "C:/Users/Admin/Downloads/Table_1_APA.docx")

3.2

library(dplyr); library(psych); library(lavaan); library(flextable); library(officer)

Jet_val_table <- Jet_val

# 1. TÍNH TOÁN RELIABILITY & CITC & ALPHA IF DELETED
items <- Jet_val_table %>% select(E1, E2, E3); n_v <- nrow(na.omit(items))
alpha_res <- psych::alpha(items); omega_res <- suppressWarnings(psych::omega(items, nfactors = 1, plot = FALSE))

# 2. TÍNH TOÁN CFA FACTOR LOADINGS (WLSMV)
cfa_fit <- cfa('BPS =~ E1 + E2 + E3', data = Jet_val_table, ordered = c("E1", "E2", "E3"), estimator = "WLSMV")
cfa_loadings <- standardizedSolution(cfa_fit) %>% filter(op == "=~") %>% pull(est.std)

# 3. TÍNH TOÁN MEDIAN (IQR) THEO ĐỊNH DẠNG CHUẨN
med_iqr <- apply(items, 2, function(x) {
  sprintf("%.0f (%.0f - %.0f)", median(x, na.rm=T), quantile(x, 0.25, na.rm=T), quantile(x, 0.75, na.rm=T)) })

# 4. TẠO DATAFRAME TỔNG HỢP (THE MONEY TABLE)
T2_data <- data.frame(
  Item = c("Item 1: Later than intended", "Item 2: Other things at bedtime", "Item 3: Easily distracted"),
  Median_IQR = med_iqr,
  Floor = round(apply(items, 2, function(x) sum(x == 1, na.rm=T)/n_v*100), 1),
  Ceiling = round(apply(items, 2, function(x) sum(x == 5, na.rm=T)/n_v*100), 1),
  CITC = round(alpha_res$item.stats$r.drop, 2),
  Alpha_Del = round(alpha_res$alpha.drop$raw_alpha, 2),
  Loading = round(cfa_loadings, 2))

# 5. XUẤT FLEXTABLE VỪA KHÍT TRANG A4
Table2_APA <- flextable(T2_data) %>%
  set_header_labels(Item="VN-BPS-3 Items", Median_IQR="Median (IQR)", Floor="Floor (%)", Ceiling="Ceiling (%)", Alpha_Del="α if deleted", Loading="Factor Loading") %>%
  add_footer_lines(paste0("Note. Overall α = ", round(alpha_res$total$raw_alpha, 2), ", ω = ", round(omega_res$omega.tot, 2), ". All factor loadings p < 0.001.")) %>%
  font(fontname = "Times New Roman", part = "all") %>% fontsize(size = 11, part = "all") %>% border_remove() %>%
  hline_top(border = fp_border_default(width = 1.5), part = "header") %>%
  hline_bottom(border = fp_border_default(width = 1), part = "header") %>%
  hline_bottom(border = fp_border_default(width = 1.5), part = "body") %>%
  align(j = 2:7, align = "center", part = "all") %>% set_table_properties(layout = "autofit", width = 1)

print(Table2_APA)
a flextable object.
col_keys: `Item`, `Median_IQR`, `Floor`, `Ceiling`, `CITC`, `Alpha_Del`, `Loading` 
header has 1 row(s) 
body has 3 row(s) 
original dataset sample: 
'data.frame':   3 obs. of  7 variables:
 $ Item      : chr  "Item 1: Later than intended" "Item 2: Other things at bedtime" "Item 3: Easily distracted"
 $ Median_IQR: chr  "3 (3 - 4)" "3 (2 - 4)" "3 (2 - 4)"
 $ Floor     : num  4.4 8.8 8
 $ Ceiling   : num  8.8 8.8 10.6
 $ CITC      : num  0.69 0.79 0.73
 $ Alpha_Del : num  0.84 0.75 0.81
 $ Loading   : num  0.8 0.93 0.85
# save_as_docx(Table2_APA, path = "C:/Users/Admin/Downloads/Table_2_Psychometrics_A4.docx")
library(lavaan); library(semPlot)

# 1. MODEL ESTIMATION (WLSMV for Ordinal Data)
cfa_model <- 'BPS =~ E1 + E2 + E3'
fit_cfa <- cfa(cfa_model, data = Jet_val_table, ordered = c("E1","E2","E3"), estimator = "WLSMV")

# 2. HIGH-RESOLUTION EXPORT (300 DPI)
png("C:/Users/Admin/Downloads/Figure_1_Final_Q1.png", width = 2400, height = 1800, res = 300)

# 3. SEMPATHS WITH LISREL STYLE (SOLID LINES ONLY)
semPaths(fit_cfa, 
         whatLabels = "std",           # Show standardized loadings
         layout = "tree2",             # Top-down tree structure
         style = "lisrel",             # Force solid lines for consistency
         intercepts = F, residuals = F, thresholds = F,
         edge.color = "black", edge.width = 1.5, edge.label.cex = 1.3,
         color = list(lat = "#E8F0FE", man = "#FFFFFF"), 
         sizeLat = 12, sizeMan = 10,
         nodeLabels = c("Item 1", "Item 2", "Item 3", "Bedtime\nProcrastination"),
         label.cex = 1.2, 
         mar = c(15, 5, 8, 5))         # Increase bottom margin for text safety

# 4. ANNOTATION (Fixed coordinates to avoid overlapping with Item 1)
text(x = -1.1, y = -1.45, adj = 0, cex = 0.8, font = 3, col = "grey30",
     labels = "Estimator: WLSMV\nModel Status: Just-identified (df = 0)\nStandardized Factor Loadings: all p < 0.001")

dev.off()
png 
  2 
# Render directly to Rmd for preview
semPaths(fit_cfa, whatLabels = "std", layout = "tree2", style = "lisrel", intercepts = F, residuals = F, thresholds = F, edge.color = "black", color = list(lat = "#E8F0FE", man = "#FFFFFF"), sizeLat = 12, sizeMan = 10, nodeLabels = c("Item 1", "Item 2", "Item 3", "BPS"), mar = c(5, 5, 5, 5))

3.3. Construct Validity (Heatmap & Minimalist Boxplots)

library(dplyr); library(ggplot2); library(ggpubr); library(reshape2)
# --- PART 1: RAW DATA AUDIT (CONSOLIDATED) ---
cat("=== 3.4A. CONVERGENT VALIDITY (SPEARMAN) ===\n")
=== 3.4A. CONVERGENT VALIDITY (SPEARMAN) ===
cv_vars <- c("PHQ9_raw", "ISI_raw", "SJL_raw", "Screen_time")
labels_cv <- c("Depression", "Insomnia", "Social Jetlag", "Screen Time")
invisible(lapply(1:4, function(i) {
    res <- cor.test(Jet_val_table$BPS_Total, Jet_val_table[[cv_vars[i]]], method="spearman", exact=F)
    cat(sprintf("BPS-3 vs %-15s: rho=%.3f | p=%.2e %s\n", labels_cv[i], res$estimate, res$p.value, ifelse(res$p.value<0.05, "(*)", "(ns)"))) }))
BPS-3 vs Depression     : rho=0.468 | p=1.68e-07 (*)
BPS-3 vs Insomnia       : rho=0.212 | p=2.42e-02 (*)
BPS-3 vs Social Jetlag  : rho=0.357 | p=1.01e-04 (*)
BPS-3 vs Screen Time    : rho=0.284 | p=2.33e-03 (*)
cat("\n=== 3.4B. KNOWN-GROUPS VALIDITY (MANN-WHITNEY U) ===\n")

=== 3.4B. KNOWN-GROUPS VALIDITY (MANN-WHITNEY U) ===
kg_vars <- c("Depression_Cat", "Insomnia_Cat", "SJL_Cat", "Chronotype")
invisible(lapply(kg_vars, function(g) {
    wt <- wilcox.test(as.formula(paste("BPS_Total ~", g)), data=Jet_val_table, exact=F)
    cat(sprintf("[%s] p=%.2e %s\n", g, wt$p.value, ifelse(wt$p.value<0.05, "(*)", "(ns)"))) }))
[Depression_Cat] p=1.22e-03 (*)
[Insomnia_Cat] p=4.87e-02 (*)
[SJL_Cat] p=9.63e-04 (*)
[Chronotype] p=9.18e-02 (ns)
# --- PART 2: FIGURE 2 - CUSTOM TRIANGULAR HEATMAP (APA 7th STYLE) ---
cor_data <- Jet_val_table %>% select(BPS_Total, PHQ9_raw, ISI_raw, SJL_raw, Screen_time) %>%
    rename(`BPS-3`=BPS_Total, Depression=PHQ9_raw, Insomnia=ISI_raw, SJL=SJL_raw, `Screen Time`=Screen_time)
cormat <- cor(cor_data, method="spearman", use="complete.obs"); pmat <- ggcorrplot::cor_pmat(cor_data, method="spearman")
cormat[upper.tri(cormat)] <- NA; pmat[upper.tri(pmat)] <- NA
cor_melt <- reshape2::melt(cormat, na.rm=T); p_melt <- reshape2::melt(pmat, na.rm=T); cor_melt$pvalue <- p_melt$value
cor_melt <- cor_melt %>% mutate(sig = case_when(pvalue < 0.001 ~ "***", pvalue < 0.01 ~ "**", pvalue < 0.05 ~ "*", T ~ ""),
                                label = ifelse(Var1 == Var2, "", paste0(sprintf("%.2f", value), sig)))
Fig2 <- ggplot(cor_melt, aes(Var2, Var1, fill = value)) + geom_tile(color = "white", size = 1) +
    scale_fill_gradient2(low = "#4575B4", high = "#D73027", mid = "white", midpoint = 0, limit = c(-1,1), name="Spearman's ρ") +
    geom_text(aes(label = label), color = "black", size = 4.5, family="serif", fontface="bold") + theme_minimal() +
    theme(text = element_text(family="serif", size=12), axis.title = element_blank(), panel.grid.major = element_blank())
# ggsave("C:/Users/Admin/Downloads/Figure_2_Heatmap_Final.png", plot=Fig2, width=6, height=6, dpi=300)

# --- PART 3: FIGURE 3 - MINIMALIST BOXPLOTS (APA-7 P-VALUE FORMAT) ---
Jet_val_clean <- Jet_val_table %>% mutate(
    Dep = factor(ifelse(Depression_Cat == "Normal", "Normal", "Clinical\nDepression"), levels=c("Normal", "Clinical\nDepression")),
    Ins = factor(ifelse(Insomnia_Cat == "Normal", "Normal", "Clinical\nInsomnia"), levels=c("Normal", "Clinical\nInsomnia")),
    SJL = factor(ifelse(SJL_Cat == "SJL < 1h", "SJL < 1h", "SJL ≥ 1h"), levels=c("SJL < 1h", "SJL ≥ 1h")),
    Chr = factor(Chronotype, levels=c("Non-evening", "Evening")))
plot_box <- function(data, x, y_lab="") {
    ggboxplot(data, x=x, y="BPS_Total", fill=x, palette="npg", ylab=y_lab, xlab="", legend="none", width=0.6) +
        scale_y_continuous(limits=c(3, 16), breaks=seq(3, 15, 3)) +
        stat_compare_means(method="wilcox.test", aes(label = ifelse(..p.. < 0.001, "p < 0.001", sprintf("p = %.3f", ..p..))),
                           label.x=1.5, label.y=15.5, hjust=0.5, size=4, fontface="italic", family="serif") +
        theme(text=element_text(family="serif", size=13)) }
p1 <- plot_box(Jet_val_clean, "Dep", "BPS-3 Score"); p2 <- plot_box(Jet_val_clean, "Ins", "")
p3 <- plot_box(Jet_val_clean, "SJL", "BPS-3 Score"); p4 <- plot_box(Jet_val_clean, "Chr", "")
Fig3 <- ggarrange(p1, p2, p3, p4, ncol=2, nrow=2, labels=c("A", "B", "C", "D"), font.label=list(size=15, face="bold", family="serif"))
# ggsave("C:/Users/Admin/Downloads/Figure_3_Boxplots_Final.png", plot=Fig3, width=10, height=8, dpi=300)
print(Fig2); print(Fig3)