Nạp thư viện

library(readxl)
library(dplyr)
library(stringr)
library(gtsummary)
library(rstatix)
library(gtsummary)
library(dplyr)
library(ggplot2)
library(extrafont)
library(pROC)

Nhập dữ liệu

library(readxl)

df <- read_excel("data1.xlsx", sheet=1, na=".")

Chuẩn bị biến

#Tạo biến phân nhóm có biến chứng - Biến outcomes
df <- df %>%
  mutate(
    early_neuro_deriotation = if_else(
      NIHSS_24_48h - NIHSS_score_in >= 4,1,0
    )
  ) 

df <- df %>%
  mutate(phan_loai_complication = case_when(
    BCh_xuat_huyet_noi_so_3 == 1 | 
      Ctscan2_xhns3_trieuchung == 1 |
      nhoi_mau_ac_tinh == 1 |
      early_neuro_deriotation == 1 ~1,
    TRUE ~ 0
  ))

#Tạo biến Dual phase CTA, biến age per 10 years
df <- df %>%
  mutate(
    dual_phase_delayed = case_when(
      THBH_cta_p2 == 1 | THBH_cta_p2 == 2 ~ 1,
      TRUE ~ 0
    )
  ) %>%
  mutate(tuoi_10 = tuoi/10)

#Hồi quy logistic đơn biến

bang4_donbien <- df %>%
  select(
    phan_loai_complication,
    tuoi_10,
    NIHSS_score_in,
    aspect_score,
    CTh_so_lan_lay_huyet_khoi,
    dual_phase_delayed
      ) %>%
  tbl_uvregression(
    method = glm,
    y = phan_loai_complication,
    method.args = list(family = binomial),
    exponentiate = TRUE,
    label = list(
      tuoi_10 ~ "Tuổi 10 năm",
      NIHSS_score_in ~ "Điểm NIHSS nền",
      aspect_score ~ "ASPECTS",
      CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK",
      dual_phase_delayed ~ "Delayed phase"
    )
  ) %>%
  modify_header(
    label ~ "**Variable**",
    estimate ~ "**OR**",
    conf.low ~ "**95% CI**",
    p.value ~ "**p value**"
  ) %>%
  bold_labels()

bang4_donbien
Variable N OR 95% CI p value
Tuổi 10 năm 80 1.75 1.15, 2.80 0.012
Điểm NIHSS nền 80 1.16 1.03, 1.31 0.016
ASPECTS 80 0.30 0.16, 0.50 <0.001
Số lần lấy HK 80 1.16 0.72, 1.88 0.5
Delayed phase 80 5.21 1.98, 14.7 0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

#Hồi quy logistic đa biến #Mô hình 1: Clinical + NCCT + Dual phase CTA

model_dabien1 <- glm(
  phan_loai_complication ~ tuoi_10 + NIHSS_score_in + aspect_score + CTh_so_lan_lay_huyet_khoi + dual_phase_delayed ,
  data = df,
  family = binomial
)
bang4_dabien1 <- model_dabien1 %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      tuoi_10 ~ "Tuổi 10 năm",
      NIHSS_score_in ~ "NIHSS",
      aspect_score ~ "ASPECTS",
      CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK",
      dual_phase_delayed ~ "Delayed phase"
    )
  ) %>%
  modify_header(
    label ~ "**Variable**",
    estimate ~ "**OR hiệu chỉnh**",
    conf.low ~ "**95% CI**",
    p.value ~ "**p value**"
  ) %>%
  bold_labels()

bang4_dabien1
Variable OR hiệu chỉnh 95% CI p value
Tuổi 10 năm 2.37 1.32, 4.78 0.007
NIHSS 1.05 0.90, 1.24 0.6
ASPECTS 0.31 0.15, 0.57 <0.001
Số lần lấy HK 1.15 0.59, 2.33 0.7
Delayed phase 4.66 1.32, 18.3 0.020
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

#Mô hình 2: Clinical + NCCT

model_dabien2 <- glm(
  phan_loai_complication ~ tuoi_10 + NIHSS_score_in + aspect_score + CTh_so_lan_lay_huyet_khoi,
  data = df,
  family = binomial
)
bang4_dabien2 <- model_dabien2 %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      tuoi_10 ~ "Tuổi 10 năm",
      NIHSS_score_in ~ "NIHSS",
      aspect_score ~ "ASPECTS",
      CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK"
    )
  ) %>%
  modify_header(
    label ~ "**Variable**",
    estimate ~ "**OR hiệu chỉnh**",
    conf.low ~ "**95% CI**",
    p.value ~ "**p value**"
  ) %>%
  bold_labels()

bang4_dabien2
Variable OR hiệu chỉnh 95% CI p value
Tuổi 10 năm 2.19 1.28, 4.15 0.008
NIHSS 1.07 0.93, 1.24 0.4
ASPECTS 0.29 0.14, 0.52 <0.001
Số lần lấy HK 1.08 0.59, 2.04 0.8
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

#Mô hình 3: Clinical model

model_dabien3 <- glm(
  phan_loai_complication ~ tuoi_10 + NIHSS_score_in + CTh_so_lan_lay_huyet_khoi,
  data = df,
  family = binomial
)
bang4_dabien3 <- model_dabien3 %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      tuoi_10 ~ "Tuổi 10 năm",
      NIHSS_score_in ~ "NIHSS",
      CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK"
    )
  ) %>%
  modify_header(
    label ~ "**Variable**",
    estimate ~ "**OR hiệu chỉnh**",
    conf.low ~ "**95% CI**",
    p.value ~ "**p value**"
  ) %>%
  bold_labels()

bang4_dabien3
Variable OR hiệu chỉnh 95% CI p value
Tuổi 10 năm 1.90 1.21, 3.14 0.008
NIHSS 1.17 1.04, 1.35 0.013
Số lần lấy HK 1.20 0.71, 2.05 0.5
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

#Mô hình 4: Mô hình 3 biến: Outcomes ~ Age + ASPECTS + Dual phase CTA

model_dabien4 <- glm(
  phan_loai_complication ~ tuoi_10 + aspect_score + dual_phase_delayed ,
  data = df,
  family = binomial
)
bang4_dabien4 <- model_dabien4 %>%
  tbl_regression(
    exponentiate = TRUE,
    label = list(
      tuoi_10 ~ "Tuổi 10 năm",
      aspect_score ~ "ASPECTS",
           dual_phase_delayed ~ "Delayed phase"
    )
  ) %>%
  modify_header(
    label ~ "**Variable**",
    estimate ~ "**OR hiệu chỉnh**",
    conf.low ~ "**95% CI**",
    p.value ~ "**p value**"
  ) %>%
  bold_labels()

bang4_dabien4
Variable OR hiệu chỉnh 95% CI p value
Tuổi 10 năm 2.32 1.30, 4.60 0.008
ASPECTS 0.29 0.14, 0.52 <0.001
Delayed phase 4.78 1.38, 18.5 0.017
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

AUC và ROC curve

# 1. Tạo predicted probability
df_roc <- df %>%
  mutate(
    pred_m1 = predict(model_dabien1, type = "response"),
    pred_m2 = predict(model_dabien2, type = "response"),
    pred_m3 = predict(model_dabien3, type = "response"),
    pred_m4 = predict(model_dabien4, type = "response")
  )

# =========================
# 2. Tạo ROC object
# =========================

roc_m1 <- roc(df_roc$phan_loai_complication, df_roc$pred_m1)
roc_m2 <- roc(df_roc$phan_loai_complication, df_roc$pred_m2)
roc_m3 <- roc(df_roc$phan_loai_complication, df_roc$pred_m3)
roc_m4 <- roc(df_roc$phan_loai_complication, df_roc$pred_m4)

# Xem AUC từng mô hình
auc(roc_m1)
## Area under the curve: 0.897
auc(roc_m2)
## Area under the curve: 0.8616
auc(roc_m3)
## Area under the curve: 0.7655
auc(roc_m4)
## Area under the curve: 0.8891
ci.auc(roc_m1)
## 95% CI: 0.8287-0.9652 (DeLong)
ci.auc(roc_m2)
## 95% CI: 0.7754-0.9478 (DeLong)
ci.auc(roc_m3)
## 95% CI: 0.6559-0.875 (DeLong)
ci.auc(roc_m4)
## 95% CI: 0.8167-0.9615 (DeLong)
auc_table <- tibble(
  Model = c(
    "Model 1: Full model",
    "Model 2: Without delayed phase",
    "Model 3: Clinical/procedural model",
    "Model 4: Final reduced model"
  ),
  AUC = c(
    as.numeric(auc(roc_m1)),
    as.numeric(auc(roc_m2)),
    as.numeric(auc(roc_m3)),
    as.numeric(auc(roc_m4))
  ),
  CI_low = c(
    ci.auc(roc_m1)[1],
    ci.auc(roc_m2)[1],
    ci.auc(roc_m3)[1],
    ci.auc(roc_m4)[1]
  ),
  CI_high = c(
    ci.auc(roc_m1)[3],
    ci.auc(roc_m2)[3],
    ci.auc(roc_m3)[3],
    ci.auc(roc_m4)[3]
  )
)

auc_table
## # A tibble: 4 × 4
##   Model                                AUC CI_low CI_high
##   <chr>                              <dbl>  <dbl>   <dbl>
## 1 Model 1: Full model                0.897  0.829   0.965
## 2 Model 2: Without delayed phase     0.862  0.775   0.948
## 3 Model 3: Clinical/procedural model 0.765  0.656   0.875
## 4 Model 4: Final reduced model       0.889  0.817   0.961

Biểu đồ ROC curve

smooth_m1 <- smooth(roc_m1)
smooth_m2 <- smooth(roc_m2)
smooth_m3 <- smooth(roc_m3)
smooth_m4 <- smooth(roc_m4)

auc_m1 <- round(as.numeric(auc(roc_m1)), 3)
auc_m2 <- round(as.numeric(auc(roc_m2)), 3)
auc_m3 <- round(as.numeric(auc(roc_m3)), 3)
auc_m4 <- round(as.numeric(auc(roc_m4)), 3)

ci_m1 <- round(as.numeric(ci.auc(roc_m1)), 3)
ci_m2 <- round(as.numeric(ci.auc(roc_m2)), 3)
ci_m3 <- round(as.numeric(ci.auc(roc_m3)), 3)
ci_m4 <- round(as.numeric(ci.auc(roc_m4)), 3)

label_m1 <- paste0(
  "Clinical + NCCT + Dual-phase CTA: AUC = ", auc_m1,
  " (95% CI: ", ci_m1[1], "–", ci_m1[3], ")"
)

label_m2 <- paste0(
  "Clinal + NCCT: AUC = ", auc_m2,
  " (95% CI: ", ci_m2[1], "–", ci_m2[3], ")"
)

label_m3 <- paste0(
  "Clinical model: AUC = ", auc_m3,
  " (95% CI: ", ci_m3[1], "–", ci_m3[3], ")"
)

label_m4 <- paste0(
  "Age + ASPECT + Dual-phase CTA: AUC = ", auc_m4,
  " (95% CI: ", ci_m4[1], "–", ci_m4[3], ")"
)

roc_smooth_df <- bind_rows(
  data.frame(
    specificity = smooth_m1$specificities,
    sensitivity = smooth_m1$sensitivities,
    model = label_m1
  ),
  data.frame(
    specificity = smooth_m2$specificities,
    sensitivity = smooth_m2$sensitivities,
    model = label_m2
  ),
  data.frame(
    specificity = smooth_m3$specificities,
    sensitivity = smooth_m3$sensitivities,
    model = label_m3
  ),
  data.frame(
    specificity = smooth_m4$specificities,
    sensitivity = smooth_m4$sensitivities,
    model = label_m4
  )
) %>%
  mutate(
    one_minus_specificity = 1 - specificity
  )

roc_smooth_df <- roc_smooth_df %>%
  mutate(
    model = factor(
      model,
      levels = c(label_m3, label_m2, label_m1, label_m4)
    )
  )

color_values <- setNames(
  c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "black"),
  c(label_m1, label_m2, label_m3, label_m4, "Reference")
)

p_roc <- ggplot(
  roc_smooth_df,
  aes(
    x = one_minus_specificity,
    y = sensitivity,
    color = model
  )
) +
  geom_line(linewidth = 1.2) +
  geom_abline(
    aes(color = "Reference"),
    intercept = 0,
    slope = 1,
    linetype = "dashed",
    linewidth = 0.8
  ) +
  scale_color_manual(values = color_values) +
  coord_equal(xlim = c(0, 1), ylim = c(0, 1)) +
  theme_bw(base_family = "Times New Roman") +
  labs(
    title = "ROC curves for prediction of reperfusion-related complications",
    x = "1 - Specificity",
    y = "Sensitivity",
    color = NULL
  ) +
  theme(
    plot.title = element_text(size = 13, hjust = 0.5),
    axis.title = element_text(size = 13),
    axis.text = element_text(size = 11),
    legend.position = c(0.65, 0.12),
    legend.background = element_rect(
      color = "grey70",
      fill = "white",
      linewidth = 0.4
    ),
    legend.text = element_text(size = 7.5),
    legend.key.width = unit(0.8, "cm"),
    legend.key.height = unit(0.45, "cm"),
    legend.spacing.y = unit(0.08, "cm"),
    legend.margin = margin(3,4,3,4),
    plot.margin = margin(10,10,10,10)
    
  )

p_roc