library(readxl)
library(dplyr)
library(stringr)
library(gtsummary)
library(rstatix)
library(gtsummary)
library(dplyr)
library(ggplot2)
library(extrafont)
library(pROC)
library(readxl)
df <- read_excel("data1.xlsx", sheet=1, na=".")
#Tạo biến phân nhóm có biến chứng - Biến outcomes
df <- df %>%
mutate(
early_neuro_deriotation = if_else(
NIHSS_24_48h - NIHSS_score_in >= 4,1,0
)
)
df <- df %>%
mutate(phan_loai_complication = case_when(
BCh_xuat_huyet_noi_so_3 == 1 |
Ctscan2_xhns3_trieuchung == 1 |
nhoi_mau_ac_tinh == 1 |
early_neuro_deriotation == 1 ~1,
TRUE ~ 0
))
#Tạo biến Dual phase CTA, biến age per 10 years
df <- df %>%
mutate(
dual_phase_delayed = case_when(
THBH_cta_p2 == 1 | THBH_cta_p2 == 2 ~ 1,
TRUE ~ 0
)
) %>%
mutate(tuoi_10 = tuoi/10)
#Hồi quy logistic đơn biến
bang4_donbien <- df %>%
select(
phan_loai_complication,
tuoi_10,
NIHSS_score_in,
aspect_score,
CTh_so_lan_lay_huyet_khoi,
dual_phase_delayed
) %>%
tbl_uvregression(
method = glm,
y = phan_loai_complication,
method.args = list(family = binomial),
exponentiate = TRUE,
label = list(
tuoi_10 ~ "Tuổi 10 năm",
NIHSS_score_in ~ "Điểm NIHSS nền",
aspect_score ~ "ASPECTS",
CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK",
dual_phase_delayed ~ "Delayed phase"
)
) %>%
modify_header(
label ~ "**Variable**",
estimate ~ "**OR**",
conf.low ~ "**95% CI**",
p.value ~ "**p value**"
) %>%
bold_labels()
bang4_donbien
| Variable | N | OR | 95% CI | p value |
|---|---|---|---|---|
| Tuổi 10 năm | 80 | 1.75 | 1.15, 2.80 | 0.012 |
| Điểm NIHSS nền | 80 | 1.16 | 1.03, 1.31 | 0.016 |
| ASPECTS | 80 | 0.30 | 0.16, 0.50 | <0.001 |
| Số lần lấy HK | 80 | 1.16 | 0.72, 1.88 | 0.5 |
| Delayed phase | 80 | 5.21 | 1.98, 14.7 | 0.001 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | ||||
#Hồi quy logistic đa biến #Mô hình 1: Clinical + NCCT + Dual phase CTA
model_dabien1 <- glm(
phan_loai_complication ~ tuoi_10 + NIHSS_score_in + aspect_score + CTh_so_lan_lay_huyet_khoi + dual_phase_delayed ,
data = df,
family = binomial
)
bang4_dabien1 <- model_dabien1 %>%
tbl_regression(
exponentiate = TRUE,
label = list(
tuoi_10 ~ "Tuổi 10 năm",
NIHSS_score_in ~ "NIHSS",
aspect_score ~ "ASPECTS",
CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK",
dual_phase_delayed ~ "Delayed phase"
)
) %>%
modify_header(
label ~ "**Variable**",
estimate ~ "**OR hiệu chỉnh**",
conf.low ~ "**95% CI**",
p.value ~ "**p value**"
) %>%
bold_labels()
bang4_dabien1
| Variable | OR hiệu chỉnh | 95% CI | p value |
|---|---|---|---|
| Tuổi 10 năm | 2.37 | 1.32, 4.78 | 0.007 |
| NIHSS | 1.05 | 0.90, 1.24 | 0.6 |
| ASPECTS | 0.31 | 0.15, 0.57 | <0.001 |
| Số lần lấy HK | 1.15 | 0.59, 2.33 | 0.7 |
| Delayed phase | 4.66 | 1.32, 18.3 | 0.020 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
#Mô hình 2: Clinical + NCCT
model_dabien2 <- glm(
phan_loai_complication ~ tuoi_10 + NIHSS_score_in + aspect_score + CTh_so_lan_lay_huyet_khoi,
data = df,
family = binomial
)
bang4_dabien2 <- model_dabien2 %>%
tbl_regression(
exponentiate = TRUE,
label = list(
tuoi_10 ~ "Tuổi 10 năm",
NIHSS_score_in ~ "NIHSS",
aspect_score ~ "ASPECTS",
CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK"
)
) %>%
modify_header(
label ~ "**Variable**",
estimate ~ "**OR hiệu chỉnh**",
conf.low ~ "**95% CI**",
p.value ~ "**p value**"
) %>%
bold_labels()
bang4_dabien2
| Variable | OR hiệu chỉnh | 95% CI | p value |
|---|---|---|---|
| Tuổi 10 năm | 2.19 | 1.28, 4.15 | 0.008 |
| NIHSS | 1.07 | 0.93, 1.24 | 0.4 |
| ASPECTS | 0.29 | 0.14, 0.52 | <0.001 |
| Số lần lấy HK | 1.08 | 0.59, 2.04 | 0.8 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
#Mô hình 3: Clinical model
model_dabien3 <- glm(
phan_loai_complication ~ tuoi_10 + NIHSS_score_in + CTh_so_lan_lay_huyet_khoi,
data = df,
family = binomial
)
bang4_dabien3 <- model_dabien3 %>%
tbl_regression(
exponentiate = TRUE,
label = list(
tuoi_10 ~ "Tuổi 10 năm",
NIHSS_score_in ~ "NIHSS",
CTh_so_lan_lay_huyet_khoi ~ "Số lần lấy HK"
)
) %>%
modify_header(
label ~ "**Variable**",
estimate ~ "**OR hiệu chỉnh**",
conf.low ~ "**95% CI**",
p.value ~ "**p value**"
) %>%
bold_labels()
bang4_dabien3
| Variable | OR hiệu chỉnh | 95% CI | p value |
|---|---|---|---|
| Tuổi 10 năm | 1.90 | 1.21, 3.14 | 0.008 |
| NIHSS | 1.17 | 1.04, 1.35 | 0.013 |
| Số lần lấy HK | 1.20 | 0.71, 2.05 | 0.5 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
#Mô hình 4: Mô hình 3 biến: Outcomes ~ Age + ASPECTS + Dual phase CTA
model_dabien4 <- glm(
phan_loai_complication ~ tuoi_10 + aspect_score + dual_phase_delayed ,
data = df,
family = binomial
)
bang4_dabien4 <- model_dabien4 %>%
tbl_regression(
exponentiate = TRUE,
label = list(
tuoi_10 ~ "Tuổi 10 năm",
aspect_score ~ "ASPECTS",
dual_phase_delayed ~ "Delayed phase"
)
) %>%
modify_header(
label ~ "**Variable**",
estimate ~ "**OR hiệu chỉnh**",
conf.low ~ "**95% CI**",
p.value ~ "**p value**"
) %>%
bold_labels()
bang4_dabien4
| Variable | OR hiệu chỉnh | 95% CI | p value |
|---|---|---|---|
| Tuổi 10 năm | 2.32 | 1.30, 4.60 | 0.008 |
| ASPECTS | 0.29 | 0.14, 0.52 | <0.001 |
| Delayed phase | 4.78 | 1.38, 18.5 | 0.017 |
| Abbreviations: CI = Confidence Interval, OR = Odds Ratio | |||
# 1. Tạo predicted probability
df_roc <- df %>%
mutate(
pred_m1 = predict(model_dabien1, type = "response"),
pred_m2 = predict(model_dabien2, type = "response"),
pred_m3 = predict(model_dabien3, type = "response"),
pred_m4 = predict(model_dabien4, type = "response")
)
# =========================
# 2. Tạo ROC object
# =========================
roc_m1 <- roc(df_roc$phan_loai_complication, df_roc$pred_m1)
roc_m2 <- roc(df_roc$phan_loai_complication, df_roc$pred_m2)
roc_m3 <- roc(df_roc$phan_loai_complication, df_roc$pred_m3)
roc_m4 <- roc(df_roc$phan_loai_complication, df_roc$pred_m4)
# Xem AUC từng mô hình
auc(roc_m1)
## Area under the curve: 0.897
auc(roc_m2)
## Area under the curve: 0.8616
auc(roc_m3)
## Area under the curve: 0.7655
auc(roc_m4)
## Area under the curve: 0.8891
ci.auc(roc_m1)
## 95% CI: 0.8287-0.9652 (DeLong)
ci.auc(roc_m2)
## 95% CI: 0.7754-0.9478 (DeLong)
ci.auc(roc_m3)
## 95% CI: 0.6559-0.875 (DeLong)
ci.auc(roc_m4)
## 95% CI: 0.8167-0.9615 (DeLong)
auc_table <- tibble(
Model = c(
"Model 1: Full model",
"Model 2: Without delayed phase",
"Model 3: Clinical/procedural model",
"Model 4: Final reduced model"
),
AUC = c(
as.numeric(auc(roc_m1)),
as.numeric(auc(roc_m2)),
as.numeric(auc(roc_m3)),
as.numeric(auc(roc_m4))
),
CI_low = c(
ci.auc(roc_m1)[1],
ci.auc(roc_m2)[1],
ci.auc(roc_m3)[1],
ci.auc(roc_m4)[1]
),
CI_high = c(
ci.auc(roc_m1)[3],
ci.auc(roc_m2)[3],
ci.auc(roc_m3)[3],
ci.auc(roc_m4)[3]
)
)
auc_table
## # A tibble: 4 × 4
## Model AUC CI_low CI_high
## <chr> <dbl> <dbl> <dbl>
## 1 Model 1: Full model 0.897 0.829 0.965
## 2 Model 2: Without delayed phase 0.862 0.775 0.948
## 3 Model 3: Clinical/procedural model 0.765 0.656 0.875
## 4 Model 4: Final reduced model 0.889 0.817 0.961
smooth_m1 <- smooth(roc_m1)
smooth_m2 <- smooth(roc_m2)
smooth_m3 <- smooth(roc_m3)
smooth_m4 <- smooth(roc_m4)
auc_m1 <- round(as.numeric(auc(roc_m1)), 3)
auc_m2 <- round(as.numeric(auc(roc_m2)), 3)
auc_m3 <- round(as.numeric(auc(roc_m3)), 3)
auc_m4 <- round(as.numeric(auc(roc_m4)), 3)
ci_m1 <- round(as.numeric(ci.auc(roc_m1)), 3)
ci_m2 <- round(as.numeric(ci.auc(roc_m2)), 3)
ci_m3 <- round(as.numeric(ci.auc(roc_m3)), 3)
ci_m4 <- round(as.numeric(ci.auc(roc_m4)), 3)
label_m1 <- paste0(
"Clinical + NCCT + Dual-phase CTA: AUC = ", auc_m1,
" (95% CI: ", ci_m1[1], "–", ci_m1[3], ")"
)
label_m2 <- paste0(
"Clinal + NCCT: AUC = ", auc_m2,
" (95% CI: ", ci_m2[1], "–", ci_m2[3], ")"
)
label_m3 <- paste0(
"Clinical model: AUC = ", auc_m3,
" (95% CI: ", ci_m3[1], "–", ci_m3[3], ")"
)
label_m4 <- paste0(
"Age + ASPECT + Dual-phase CTA: AUC = ", auc_m4,
" (95% CI: ", ci_m4[1], "–", ci_m4[3], ")"
)
roc_smooth_df <- bind_rows(
data.frame(
specificity = smooth_m1$specificities,
sensitivity = smooth_m1$sensitivities,
model = label_m1
),
data.frame(
specificity = smooth_m2$specificities,
sensitivity = smooth_m2$sensitivities,
model = label_m2
),
data.frame(
specificity = smooth_m3$specificities,
sensitivity = smooth_m3$sensitivities,
model = label_m3
),
data.frame(
specificity = smooth_m4$specificities,
sensitivity = smooth_m4$sensitivities,
model = label_m4
)
) %>%
mutate(
one_minus_specificity = 1 - specificity
)
roc_smooth_df <- roc_smooth_df %>%
mutate(
model = factor(
model,
levels = c(label_m3, label_m2, label_m1, label_m4)
)
)
color_values <- setNames(
c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "black"),
c(label_m1, label_m2, label_m3, label_m4, "Reference")
)
p_roc <- ggplot(
roc_smooth_df,
aes(
x = one_minus_specificity,
y = sensitivity,
color = model
)
) +
geom_line(linewidth = 1.2) +
geom_abline(
aes(color = "Reference"),
intercept = 0,
slope = 1,
linetype = "dashed",
linewidth = 0.8
) +
scale_color_manual(values = color_values) +
coord_equal(xlim = c(0, 1), ylim = c(0, 1)) +
theme_bw(base_family = "Times New Roman") +
labs(
title = "ROC curves for prediction of reperfusion-related complications",
x = "1 - Specificity",
y = "Sensitivity",
color = NULL
) +
theme(
plot.title = element_text(size = 13, hjust = 0.5),
axis.title = element_text(size = 13),
axis.text = element_text(size = 11),
legend.position = c(0.65, 0.12),
legend.background = element_rect(
color = "grey70",
fill = "white",
linewidth = 0.4
),
legend.text = element_text(size = 7.5),
legend.key.width = unit(0.8, "cm"),
legend.key.height = unit(0.45, "cm"),
legend.spacing.y = unit(0.08, "cm"),
legend.margin = margin(3,4,3,4),
plot.margin = margin(10,10,10,10)
)
p_roc