ICU

# Cài package nếu chưa có
#install.packages("aplore3")
#install.packages("tidyverse")
#install.packages("gtsummary")    # Tạo bảng thống kê đẹp
#install.packages("pROC")         # Vẽ ROC, tính AUC
#install.packages("ResourceSelection") # Hosmer-Lemeshow test

# Load library
library(aplore3)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(gtsummary)

# Gọi dataset
data("icu")

# Xem cấu trúc
glimpse(icu)

## Rows: 200
## Columns: 21
## $ id     <int> 4, 8, 12, 14, 27, 28, 32, 38, 40, 41, 42, 47, 50, 51, 52, 53, 5…
## $ sta    <fct> Died, Lived, Lived, Lived, Died, Lived, Lived, Lived, Lived, Li…
## $ age    <int> 87, 27, 59, 77, 76, 54, 87, 69, 63, 30, 35, 78, 70, 55, 63, 48,…
## $ gender <fct> Female, Female, Male, Male, Female, Male, Female, Male, Male, F…
## $ race   <fct> White, White, White, White, White, White, White, White, White, …
## $ ser    <fct> Surgical, Medical, Medical, Surgical, Surgical, Medical, Surgic…
## $ can    <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes, No, No, Ye…
## $ crn    <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, Yes, No…
## $ inf    <fct> Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes,…
## $ cpr    <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, No, No, No,…
## $ sys    <int> 80, 142, 112, 100, 128, 142, 110, 110, 104, 144, 108, 130, 138,…
## $ hra    <int> 96, 88, 80, 70, 90, 103, 154, 132, 66, 110, 60, 132, 103, 86, 1…
## $ pre    <fct> No, No, Yes, No, Yes, No, Yes, No, No, No, No, No, No, Yes, Yes…
## $ type   <fct> Emergency, Emergency, Emergency, Elective, Emergency, Emergency…
## $ fra    <fct> Yes, No, No, No, No, Yes, No, No, No, No, No, No, No, No, No, N…
## $ po2    <fct> <= 60, > 60, > 60, > 60, > 60, > 60, > 60, <= 60, > 60, > 60, >…
## $ ph     <fct> < 7.25, >= 7.25, >= 7.25, >= 7.25, >= 7.25, >= 7.25, >= 7.25, >…
## $ pco    <fct> > 45, <= 45, <= 45, <= 45, <= 45, <= 45, <= 45, <= 45, <= 45, <…
## $ bic    <fct> >= 18, >= 18, >= 18, >= 18, >= 18, >= 18, >= 18, < 18, >= 18, >…
## $ cre    <fct> <= 2.0, <= 2.0, <= 2.0, <= 2.0, <= 2.0, <= 2.0, <= 2.0, <= 2.0,…
## $ loc    <fct> Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, …

# Xem 6 dòng đầu
head(icu)

##   id   sta age gender  race      ser can crn inf cpr sys hra pre      type fra
## 1  4  Died  87 Female White Surgical  No  No Yes  No  80  96  No Emergency Yes
## 2  8 Lived  27 Female White  Medical  No  No Yes  No 142  88  No Emergency  No
## 3 12 Lived  59   Male White  Medical  No  No  No  No 112  80 Yes Emergency  No
## 4 14 Lived  77   Male White Surgical  No  No  No  No 100  70  No  Elective  No
## 5 27  Died  76 Female White Surgical  No  No Yes  No 128  90 Yes Emergency  No
## 6 28 Lived  54   Male White  Medical  No  No Yes  No 142 103  No Emergency Yes
##     po2      ph   pco   bic    cre     loc
## 1 <= 60  < 7.25  > 45 >= 18 <= 2.0 Nothing
## 2  > 60 >= 7.25 <= 45 >= 18 <= 2.0 Nothing
## 3  > 60 >= 7.25 <= 45 >= 18 <= 2.0 Nothing
## 4  > 60 >= 7.25 <= 45 >= 18 <= 2.0 Nothing
## 5  > 60 >= 7.25 <= 45 >= 18 <= 2.0 Nothing
## 6  > 60 >= 7.25 <= 45 >= 18 <= 2.0 Nothing

# Tóm tắt thống kê
summary(icu)

##        id           sta           age           gender       race    
##  Min.   :  4.0   Lived:160   Min.   :16.00   Male  :124   White:175  
##  1st Qu.:210.2   Died : 40   1st Qu.:46.75   Female: 76   Black: 15  
##  Median :412.5               Median :63.00                Other: 10  
##  Mean   :444.8               Mean   :57.55                           
##  3rd Qu.:671.8               3rd Qu.:72.00                           
##  Max.   :929.0               Max.   :92.00                           
##        ser       can       crn       inf       cpr           sys       
##  Medical : 93   No :180   No :181   No :116   No :187   Min.   : 36.0  
##  Surgical:107   Yes: 20   Yes: 19   Yes: 84   Yes: 13   1st Qu.:110.0  
##                                                         Median :130.0  
##                                                         Mean   :132.3  
##                                                         3rd Qu.:150.0  
##                                                         Max.   :256.0  
##       hra          pre             type      fra         po2            ph     
##  Min.   : 39.00   No :170   Elective : 53   No :185   > 60 :184   >= 7.25:187  
##  1st Qu.: 80.00   Yes: 30   Emergency:147   Yes: 15   <= 60: 16   < 7.25 : 13  
##  Median : 96.00                                                                
##  Mean   : 98.92                                                                
##  3rd Qu.:118.25                                                                
##  Max.   :192.00                                                                
##     pco         bic          cre           loc     
##  <= 45:180   >= 18:185   <= 2.0:190   Nothing:185  
##  > 45 : 20   < 18 : 15   > 2.0 : 10   Stupor :  5  
##                                       Coma   : 10  
##                                                    
##                                                    
##

library(aplore3)
library(tidyverse)
library(gtsummary)

data("icu")

# Xem cấu trúc outcome
table(icu$sta)

## 
## Lived  Died 
##   160    40

# Mặc định: Died = 1, Lived = 0 (cần kiểm tra)

# Chuyển outcome về dạng numeric 0/1
icu <- icu %>%
  mutate(
    outcome = ifelse(sta == "Died", 1, 0),
    
    # Chuẩn hóa một số biến factor
    loc = factor(loc),   # Mức độ ý thức (quan trọng!)
    ser = factor(ser),
    type = factor(type)
  )

# Kiểm tra
table(icu$outcome)  # 0 = Sống, 1 = Tử vong

## 
##   0   1 
## 160  40

#Chạy đơn biến tổng hợp
# Bảng phân tích đơn biến tất cả biến
# Cài các package phụ thuộc còn thiếu
#install.packages("broom.helpers")
#install.packages("broom")

# Sau đó load lại
library(gtsummary)
tbl_univariate <- icu %>%
  select(outcome, age, gender, ser, can, crn, inf,
         cpr, sys, hra, pre, type, fra, loc) %>%
  tbl_uvregression(
    method = glm,
    y = outcome,
    method.args = list(family = binomial),
    exponentiate = TRUE,   # Hiển thị OR thay vì log(OR)
    pvalue_fun = ~style_pvalue(.x, digits = 3)
  ) %>%
  bold_p(t = 0.05) %>%
  bold_labels() %>%
  modify_header(label = "**Biến số**") %>%
  modify_caption("**Phân tích đơn biến — Tiên lượng tử vong ICU**")

## There was a warning running `tbl_regression()` for variable "loc". See message
## below.
## ! glm.fit: fitted probabilities numerically 0 or 1 occurred, glm.fit: fitted
##   probabilities numerically 0 or 1 occurred, glm.fit: fitted probabilities
##   numerically 0 or 1 occurred, glm.fit: fitted probabilities numerically 0 or 1
##   occurred, glm.fit: fitted probabilities numerically 0 or 1 occurred, glm.fit:
##   fitted probabilities numerically 0 or 1 occurred, glm.fit: fitted
##   probabilities numerically 0 or 1 occurred, glm.fit: fitted probabilities
##   numerically 0 or 1 occurred, glm.fit: fitted probabilities numerically 0 or 1
##   occurred, glm.fit: fitted probabilities numerically 0 or 1 occurred, glm.fit:
##   fitted probabilities numerically 0 or 1 occurred, glm.fit: fitted
##   probabilities numerically 0 or 1 occurred, glm.fit: fitted probabilities
##   numerically 0 or 1 occurred, and glm.fit: fitted probabilities numerically 0
##   or 1 occurred

# Xem bảng
tbl_univariate

**Phân tích đơn biến — Tiên lượng tử vong ICU**
Biến số	N	OR	95% CI	p-value
age	200	1.03	1.01, 1.05	0.009
gender	200
Male		—	—
Female		1.11	0.54, 2.24	0.771
ser	200
Medical		—	—
Surgical		0.39	0.18, 0.79	0.010
can	200
No		—	—
Yes		1.00	0.27, 2.93	>0.999
crn	200
No		—	—
Yes		3.39	1.22, 9.06	0.015
inf	200
No		—	—
Yes		2.50	1.24, 5.16	0.011
cpr	200
No		—	—
Yes		5.44	1.70, 17.9	0.004
sys	200	0.98	0.97, 0.99	0.005
hra	200	1.00	0.99, 1.02	0.653
pre	200
No		—	—
Yes		1.26	0.47, 3.07	0.621
type	200
Elective		—	—
Emergency		8.89	2.58, 56.0	0.003
fra	200
No		—	—
Yes		1.00	0.22, 3.34	>0.999
loc	200
Nothing		—	—
Stupor		91,589,445	0.00,	0.986
Coma		23.4	5.52, 161	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

#install.packages("BMA")
#install.packages("BAS")   # Bayesian Adaptive Sampling — mạnh hơn BMA package

library(BMA)

## Loading required package: survival

## Loading required package: leaps

## Loading required package: robustbase

## 
## Attaching package: 'robustbase'

## The following object is masked from 'package:survival':
## 
##     heart

## Loading required package: inline

## Loading required package: rrcov

## Scalable Robust Estimators with High Breakdown Point (version 1.7-7)

library(BAS)

# Chạy BMA logistic regression
bma_model <- bas.glm(
  outcome ~ age + can + crn + inf + cpr + sys + type + loc,
  data = icu,
  family = binomial(link = "logit"),
  method = "MCMC",           # MCMC sampling cho không gian mô hình lớn
  MCMC.iterations = 100000,
  betaprior = bic.prior(),   # Prior phổ biến trong y văn
  modelprior = uniform()     # Prior đồng đều cho các mô hình
)

# Xem tóm tắt
summary(bma_model)

##               P(B != 0 | Y)   model 1     model 2     model 3     model 4
## Intercept        1.00000000   1.00000   1.0000000   1.0000000   1.0000000
## age              0.91283791   1.00000   1.0000000   1.0000000   1.0000000
## canYes           0.72592519   1.00000   1.0000000   0.0000000   0.0000000
## crnYes           0.11238903   0.00000   0.0000000   0.0000000   0.0000000
## infYes           0.09311721   0.00000   0.0000000   0.0000000   0.0000000
## cprYes           0.13756608   0.00000   0.0000000   0.0000000   0.0000000
## sys              0.62373067   1.00000   0.0000000   1.0000000   0.0000000
## typeEmergency    0.99194015   1.00000   1.0000000   1.0000000   1.0000000
## locStupor        0.99931172   1.00000   1.0000000   1.0000000   1.0000000
## locComa          0.88119701   1.00000   1.0000000   1.0000000   1.0000000
## BF                       NA   1.00000   0.6519453   0.2808813   0.2422451
## PostProbs                NA   0.26580   0.1760000   0.0765000   0.0684000
## R2                       NA   0.35830   0.3276000   0.3192000   0.2912000
## dim                      NA   7.00000   6.0000000   6.0000000   5.0000000
## logmarg                  NA -80.11515 -80.5429478 -81.3849764 -81.5329585
##                   model 5
## Intercept       1.0000000
## age             1.0000000
## canYes          1.0000000
## crnYes          0.0000000
## infYes          0.0000000
## cprYes          0.0000000
## sys             1.0000000
## typeEmergency   1.0000000
## locStupor       1.0000000
## locComa         0.0000000
## BF              0.1295497
## PostProbs       0.0348000
## R2              0.3114000
## dim             6.0000000
## logmarg       -82.1588442

# Inclusion probability — chỉ số quan trọng nhất của BMA
plot(bma_model, which = 4,  # Inclusion probabilities
     ask = FALSE,
     cex.axis = 0.7)

# Xem dạng bảng
bma_model$probne0  # Xác suất posterior mỗi biến ≠ 0

##  [1] 1.00000000 0.91283791 0.72592519 0.11238903 0.09311721 0.13756608
##  [7] 0.62373067 0.99194015 0.99931172 0.88119701

# Coefficient trung bình theo BMA
coef_bma <- coef(bma_model)
print(coef_bma)

## 
##  Marginal Posterior Summaries of Coefficients: 
## 
##  Using  BMA 
## 
##  Based on the top  202 models 
##                post mean   post SD     post p(B != 0)
## Intercept        -5.57521     2.23551     1.00000    
## age               0.03388     0.01619     0.91284    
## canYes            1.80274     1.37544     0.72593    
## crnYes            0.08372     0.32976     0.11239    
## infYes            0.03615     0.18739     0.09312    
## cprYes            0.14204     0.45935     0.13757    
## sys              -0.01140     0.01068     0.62373    
## typeEmergency     3.46968     1.34458     0.99194    
## locStupor        20.89150  1429.40283     0.99931    
## locComa           2.21051     1.16324     0.88120

# Visualize coefficient
plot(coef_bma,
     ask = FALSE)

library(BMA)

# Cần chuẩn bị matrix biến độc lập
X <- icu %>%
  select(age, can, crn, inf, cpr, sys, type, loc) %>%
  mutate(across(where(is.factor), as.numeric)) %>%
  as.data.frame()

bma_model2 <- bic.glm(
  x = X,
  y = icu$outcome,
  glm.family = binomial(link = "logit"),
  strict = FALSE
)

# Xem kết quả
summary(bma_model2)

## 
## Call:
## bic.glm.data.frame(x = X, y = icu$outcome, glm.family = binomial(link = "logit"),     strict = FALSE)
## 
## 
##   12  models were selected
##  Best  5  models (cumulative posterior probability =  0.7917 ): 
## 
##            p!=0    EV         SD        model 1     model 2     model 3   
## Intercept  100    -11.088458  3.080617  -1.326e+01  -9.544e+00  -1.177e+01
## age         92.9    0.031635  0.014670   3.584e-02   3.291e-02   3.508e-02
## can         50.6    0.966395  1.119466   1.925e+00       .       1.945e+00
## crn          5.6    0.024246  0.175746       .           .           .    
## inf          7.0    0.030049  0.158869       .           .           .    
## cpr          5.4    0.029808  0.225111       .           .           .    
## sys         13.3   -0.001267  0.004018       .           .      -9.646e-03
## type       100.0    2.494913  0.909742   2.893e+00   2.188e+00   2.790e+00
## loc        100.0    1.876222  0.534174   1.927e+00   1.834e+00   1.915e+00
##                                                                           
## nVar                                       4           3           5      
## BIC                                     -8.876e+02  -8.875e+02  -8.845e+02
## post prob                                0.317       0.298       0.070    
##            model 4     model 5   
## Intercept  -8.029e+00  -7.144e+00
## age         3.215e-02       .    
## can             .           .    
## crn             .           .    
## inf             .           .    
## cpr             .           .    
## sys        -9.375e-03       .    
## type        2.064e+00   1.926e+00
## loc         1.822e+00   1.894e+00
##                                  
## nVar          4           2      
## BIC        -8.844e+02  -8.836e+02
## post prob   0.063       0.043

print(bma_model2)

## 
## Call:
## bic.glm.data.frame(x = X, y = icu$outcome, glm.family = binomial(link = "logit"),     strict = FALSE)
## 
## 
##  Posterior probabilities(%): 
##   age   can   crn   inf   cpr   sys  type   loc 
##  92.9  50.6   5.6   7.0   5.4  13.3 100.0 100.0 
## 
##  Coefficient posterior expected values: 
## (Intercept)          age          can          crn          inf          cpr  
##  -11.088458     0.031635     0.966395     0.024246     0.030049     0.029808  
##         sys         type          loc  
##   -0.001267     2.494913     1.876222

# Mô hình cuối từ kết quả BMA
model_final <- glm(
  outcome ~ age + can + type + loc,
  data = icu,
  family = binomial(link = "logit")
)

# Trình bày chuẩn y văn
tbl_final <- model_final %>%
  tbl_regression(
    exponentiate = TRUE,
    pvalue_fun = ~style_pvalue(.x, digits = 3)
  ) %>%
  bold_p(t = 0.05) %>%
  bold_labels() %>%
  modify_header(label = "**Biến số**") %>%
  modify_caption("**Mô hình cuối — BMA Selected Variables**")

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

tbl_final

**Mô hình cuối — BMA Selected Variables**
Biến số	OR	95% CI	p-value
age	1.04	1.01, 1.07	0.003
can
No	—	—
Yes	11.4	1.97, 92.0	0.009
type
Elective	—	—
Emergency	48.6	6.64, 1,169	0.002
loc
Nothing	—	—
Stupor	1,101,550,279	0.00,	0.989
Coma	15.9	3.57, 112	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(ResourceSelection)

## ResourceSelection 0.3-6   2023-06-27

# Predicted probability
icu$pred_prob <- predict(model_final, type = "response")

# 1. AUC - ROC
roc_obj <- roc(icu$outcome, icu$pred_prob)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

auc(roc_obj)

## Area under the curve: 0.8552

# Vẽ ROC
plot(roc_obj,
     print.auc = TRUE,
     col = "steelblue",
     lwd = 2,
     main = "ROC Curve — ICU Mortality Prediction")

# 2. Hosmer-Lemeshow test (Calibration)
hl_test <- hoslem.test(icu$outcome, icu$pred_prob, g = 10)
print(hl_test)

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  icu$outcome, icu$pred_prob
## X-squared = 4.568, df = 8, p-value = 0.8026

# p > 0.05 = mô hình calibrate tốt

# Xem coefficients
coef(model_final)

##   (Intercept)           age        canYes typeEmergency     locStupor 
##   -7.68055261    0.03785479    2.43193239    3.88421704   20.81998437 
##       locComa 
##    2.76516818

# Kiểm tra phân phối loc vs outcome
table(icu$loc, icu$outcome)

##          
##             0   1
##   Nothing 158  27
##   Stupor    0   5
##   Coma      2   8

# Kiểm tra số lượng
icu %>%
  count(loc, outcome) %>%
  pivot_wider(names_from = outcome, 
              values_from = n,
              values_fill = 0) %>%
  rename(Survived = `0`, Died = `1`)

## # A tibble: 3 × 3
##   loc     Survived  Died
##   <fct>      <int> <int>
## 1 Nothing      158    27
## 2 Stupor         0     5
## 3 Coma           2     8

# Mô hình cuối bỏ loc
model_final3 <- glm(
  outcome ~ age + can + type,
  data = icu,
  family = binomial(link = "logit")
)

# Xem coefficients
coef(model_final3)

##   (Intercept)           age        canYes typeEmergency 
##   -6.28626263    0.03604018    1.61315724    3.03773520

# Trình bày bảng
model_final3 %>%
  tbl_regression(
    exponentiate = TRUE,
    pvalue_fun = ~style_pvalue(.x, digits = 3)
  ) %>%
  bold_p(t = 0.05) %>%
  bold_labels() %>%
  modify_caption("**Mô hình cuối — age + can + type**")

**Mô hình cuối — age + can + type**
Characteristic	OR	95% CI	p-value
age	1.04	1.02, 1.06	0.001
can
No	—	—
Yes	5.02	1.02, 27.6	0.046
type
Elective	—	—
Emergency	20.9	4.84, 166	<0.001
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

# Cập nhật predicted probability
icu$pred_prob <- predict(model_final3, type = "response")

# Kiểm tra lại AUC
roc_obj <- roc(icu$outcome, icu$pred_prob)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

auc(roc_obj)

## Area under the curve: 0.7609

coef(model_final3)

##   (Intercept)           age        canYes typeEmergency 
##   -6.28626263    0.03604018    1.61315724    3.03773520

auc(roc_obj)

## Area under the curve: 0.7609

# Coefficients (bỏ intercept)
beta <- coef(model_final3)[-1]

# Lấy beta nhỏ nhất làm đơn vị
beta_min <- min(abs(beta))
# = 0.03604 (age)

# Tính raw score
raw_score <- beta / beta_min
print(round(raw_score, 1))

##           age        canYes typeEmergency 
##           1.0          44.8          84.3

# age     = 0.036 / 0.036 = 1.0  → 1 điểm/năm tuổi
# can     = 1.613 / 0.036 = 44.8 → quá lớn!
# type    = 3.038 / 0.036 = 84.3 → quá lớn!

# Nhóm tuổi phổ biến trong y văn ICU
icu <- icu %>%
  mutate(
    age_group = case_when(
      age < 40  ~ 0,
      age < 50  ~ 1,
      age < 60  ~ 2,
      age < 70  ~ 3,
      age >= 70 ~ 4
    )
  )

# Kiểm tra phân phối
table(icu$age_group, icu$outcome)

##    
##      0  1
##   0 35  2
##   1 19  3
##   2 20  6
##   3 40 11
##   4 46 18

model_score <- glm(
  outcome ~ age_group + can + type,
  data = icu,
  family = binomial(link = "logit")
)

coef(model_score)

##   (Intercept)     age_group        canYes typeEmergency 
##    -5.5810100     0.5183759     1.6546616     3.1562033

beta2     <- coef(model_score)[-1]
beta_min2 <- min(abs(beta2))

raw_score2     <- beta2 / beta_min2
integer_score2 <- round(raw_score2, 0)

print(integer_score2)

##     age_group        canYes typeEmergency 
##             1             3             6

icu <- icu %>%
  mutate(
    pt_age  = age_group * integer_score2["age_group"],
    pt_can  = ifelse(can == "Yes", integer_score2["canYes"], 0),
    pt_type = ifelse(type == "Emergency", integer_score2["typeEmergency"], 0),
    total_score = pt_age + pt_can + pt_type
  )

summary(icu$total_score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   6.000   7.000   7.125   9.000  13.000

hist(icu$total_score,
     breaks = 10,
     col    = "steelblue",
     main   = "Phân phối tổng điểm",
     xlab   = "Tổng điểm")

roc_score <- roc(icu$outcome, icu$total_score)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

plot(roc_score,
     print.auc = TRUE,
     col  = "tomato",
     lwd  = 2,
     main = "ROC: Scoring System vs Original Model")

lines(roc_obj, col = "steelblue", lwd = 2, lty = 2)

legend("bottomright",
       legend = c(
         paste("Scoring AUC =", round(auc(roc_score), 3)),
         paste("Model AUC   =", round(auc(roc_obj), 3))
       ),
       col = c("tomato", "steelblue"),
       lwd = 2)

library(pROC)

# ROC và ngưỡng tối ưu
roc_score <- roc(icu$outcome, icu$total_score)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

best_cut <- coords(
  roc_score,
  "best",
  best.method = "youden",
  ret = c("threshold", "sensitivity", 
          "specificity", "ppv", "npv")
)
print(best_cut)

##           threshold sensitivity specificity       ppv       npv
## threshold       7.5       0.825         0.6 0.3402062 0.9320388

# Hosmer-Lemeshow cho bảng điểm
hl_score <- hoslem.test(icu$outcome, 
                        fitted(model_score), 
                        g = 10)

## Warning in hoslem.test(icu$outcome, fitted(model_score), g = 10): The data did
## not allow for the requested number of bins.

print(hl_score)

## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  icu$outcome, fitted(model_score)
## X-squared = 3.1805, df = 7, p-value = 0.8678

# Tỷ lệ tử vong theo từng mức điểm
score_perf <- icu %>%
  group_by(total_score) %>%
  summarise(
    n          = n(),
    n_died     = sum(outcome),
    mortality  = round(mean(outcome) * 100, 1)
  ) %>%
  arrange(total_score)

print(score_perf)

## # A tibble: 13 × 4
##    total_score     n n_died mortality
##          <dbl> <int>  <dbl>     <dbl>
##  1           0     2      0       0  
##  2           2     6      0       0  
##  3           3    15      0       0  
##  4           4    20      1       5  
##  5           5     1      0       0  
##  6           6    36      2       5.6
##  7           7    23      4      17.4
##  8           8    18      5      27.8
##  9           9    34     10      29.4
## 10          10    42     15      35.7
## 11          11     1      1     100  
## 12          12     1      1     100  
## 13          13     1      1     100

# Sau khi có best_cut từ bước 5.7
# Ví dụ ngưỡng = 3 (cập nhật theo kết quả thực tế)

icu <- icu %>%
  mutate(
    risk_group = case_when(
      total_score <= 2 ~ "Thấp",
      total_score <= 5 ~ "Trung bình", 
      total_score >= 6 ~ "Cao"
    ),
    risk_group = factor(risk_group, 
                        levels = c("Thấp", "Trung bình", "Cao"))
  )

# Bảng tỷ lệ tử vong theo nhóm nguy cơ
icu %>%
  group_by(risk_group) %>%
  summarise(
    n         = n(),
    n_died    = sum(outcome),
    mortality = paste0(round(mean(outcome) * 100, 1), "%")
  )

## # A tibble: 3 × 4
##   risk_group     n n_died mortality
##   <fct>      <int>  <dbl> <chr>    
## 1 Thấp           8      0 0%       
## 2 Trung bình    36      1 2.8%     
## 3 Cao          156     39 25%

# Cập nhật ngưỡng theo Youden (≥8 = nguy cơ cao)
icu <- icu %>%
  mutate(
    risk_group2 = case_when(
      total_score < 8  ~ "Thấp — Trung bình",
      total_score >= 8 ~ "Cao"
    ),
    risk_group2 = factor(risk_group2,
                         levels = c("Thấp — Trung bình", "Cao"))
  )

# Bảng tử vong theo nhóm mới
icu %>%
  group_by(risk_group2) %>%
  summarise(
    n         = n(),
    n_died    = sum(outcome),
    mortality = paste0(round(mean(outcome) * 100, 1), "%")
  )

## # A tibble: 2 × 4
##   risk_group2           n n_died mortality
##   <fct>             <int>  <dbl> <chr>    
## 1 Thấp — Trung bình   103      7 6.8%     
## 2 Cao                  97     33 34%

#install.packages("rms")
library(rms)

## Loading required package: Hmisc

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

dd <- datadist(icu)
options(datadist = "dd")

model_rms <- lrm(
  outcome ~ age_group + can + type,
  data    = icu,
  x       = TRUE,
  y       = TRUE
)

# Bootstrap 1000 lần
set.seed(123)
val <- validate(model_rms, B = 1000)
print(val)

##           index.orig training    test optimism index.corrected   Lower  Upper
## Dxy           0.5256   0.5380  0.5184   0.0197          0.5060  0.3497 0.6551
## R2            0.2393   0.2596  0.2123   0.0473          0.1920 -0.0133 0.3354
## Intercept     0.0000   0.0000 -0.1882   0.1882         -0.1882 -1.3184 0.7353
## Slope         1.0000   1.0000  0.8177   0.1823          0.8177 -0.3258 1.4806
## Emax          0.0000   0.0000  0.1646  -0.1646          0.1646 -0.0856 0.9655
## D             0.1591   0.1750  0.1395   0.0355          0.1236 -0.0289 0.2333
## U            -0.0100  -0.0100  0.0448  -0.0548          0.0448 -0.0685 0.5983
## Q             0.1691   0.1850  0.0948   0.0903          0.0788 -0.5453 0.2725
## B             0.1357   0.1330  0.1392  -0.0062          0.1420  0.1137 0.1751
## g             1.5092   2.4503  1.3960   1.0543          0.4549 -8.1767 2.4463
## gp            0.1748   0.1767  0.1579   0.0188          0.1560  0.0521 0.2253
##              n
## Dxy       1000
## R2        1000
## Intercept 1000
## Slope     1000
## Emax      1000
## D         1000
## U         1000
## Q         1000
## B         1000
## g         1000
## gp        1000

# Tính AUC hiệu chỉnh
dxy_optimism_corrected <- val["Dxy", "index.corrected"]
auc_corrected <- dxy_optimism_corrected / 2 + 0.5
cat("AUC sau bootstrap correction:", round(auc_corrected, 3), "\n")

## AUC sau bootstrap correction: 0.753

# Calibration plot chuẩn y văn
cal <- calibrate(model_rms, B = 1000)
plot(cal,
     xlab = "Predicted Probability",
     ylab = "Observed Probability",
     main = "Calibration Plot — Bootstrap 1000 lần")

## 
## n=200   Mean absolute error=0.021   Mean squared error=0.00069
## 0.9 Quantile of absolute error=0.046

library(pROC)
library(ggplot2)

# 1. ROC Curve đẹp bằng ggplot2
roc_data <- data.frame(
  fpr = 1 - roc_score$specificities,
  tpr = roc_score$sensitivities
)

ggplot(roc_data, aes(x = fpr, y = tpr)) +
  geom_line(color = "steelblue", linewidth = 1.2) +
  geom_abline(linetype = "dashed", color = "gray50") +
  geom_point(aes(x = 1 - 0.6, y = 0.825),  # Điểm tối ưu
             color = "tomato", size = 3) +
  annotate("text",
           x    = 0.55, y = 0.75,
           label = paste0("AUC = 0.761\nCorrected AUC = 0.753"),
           size  = 4, color = "steelblue") +
  labs(
    title    = "ROC Curve — ICU Mortality Scoring System",
    x        = "1 - Specificity (False Positive Rate)",
    y        = "Sensitivity (True Positive Rate)"
  ) +
  theme_minimal(base_size = 13)

## Warning in geom_point(aes(x = 1 - 0.6, y = 0.825), color = "tomato", size = 3): All aesthetics have length 1, but the data has 14 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

# 2. Tỷ lệ tử vong theo tổng điểm
score_perf %>%
  ggplot(aes(x = factor(total_score), y = mortality)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_text(aes(label = paste0(mortality, "%")),
            vjust = -0.5, size = 3.5) +
  labs(
    title = "Tỷ lệ tử vong theo tổng điểm",
    x     = "Tổng điểm",
    y     = "Tỷ lệ tử vong (%)"
  ) +
  theme_minimal(base_size = 13)

# Bảng điểm hoàn chỉnh
final_score_table <- data.frame(
  "Yếu tố"      = c("Tuổi < 40", "Tuổi 40–49", "Tuổi 50–59",
                     "Tuổi 60–69", "Tuổi ≥ 70",
                     "Ung thư: Không", "Ung thư: Có",
                     "Nhập viện chương trình", "Nhập viện cấp cứu"),
  "Điểm số"     = c(0, 1, 2, 3, 4, 0, 3, 0, 6),
  check.names   = FALSE
)

print(final_score_table)

##                   Yếu tố Điểm số
## 1              Tuổi < 40       0
## 2             Tuổi 40–49       1
## 3             Tuổi 50–59       2
## 4             Tuổi 60–69       3
## 5              Tuổi ≥ 70       4
## 6         Ung thư: Không       0
## 7            Ung thư: Có       3
## 8 Nhập viện chương trình       0
## 9      Nhập viện cấp cứu       6

# Bảng phân tầng nguy cơ
risk_table <- data.frame(
  "Tổng điểm"    = c("0 – 7", "≥ 8"),
  "Nhóm nguy cơ" = c("Thấp – Trung bình", "Cao"),
  "Tỷ lệ tử vong" = c("< 5%", "~25%"),
  check.names    = FALSE
)

print(risk_table)

##   Tổng điểm      Nhóm nguy cơ Tỷ lệ tử vong
## 1     0 – 7 Thấp – Trung bình          < 5%
## 2       ≥ 8               Cao          ~25%

ICU

NhuHao

2026-05-08