---
title: "AUC Analysis for Pneumonia Severity Scores in Predicting Death"
author: "Tung Lam"
date: "2025-03-28"
output: pdf_document
---
## Introduction
This report presents the AUC analysis for pneumonia severity scores (`CURB65`, `PSI`, `NEWS`, `NEWSL`, `SMARTCOP`) in predicting death among patients diagnosed with pneumonia. The analysis includes an ROC curve plot and a summary table with AUC, standard error, confidence intervals, sensitivity, specificity, cut-off values, Youden Index, and p-values.
## Load Required Packages and Data
``` r
# Load required packages
library(readxl) # For reading Excel files
library(pROC) # For ROC analysis
library(knitr) # For table formatting
# Read the Excel file
file_path <- "C:/Users/Lenovo/Documents/Zalo Received Files/Danh Sach Benh Nhan viêm phổi 2022.xls"
data <- read_excel(file_path)
# Standardize column names
colnames(data) <- make.names(colnames(data))
if ("CURB.65" %in% colnames(data)) {
names(data)[names(data) == "CURB.65"] <- "CURB65"
}
# Prepare the data
data$DEATH <- as.numeric(data$DEATH) # Outcome: 0 = survival, 1 = death
data$CURB65 <- as.numeric(data$CURB65)
data$PSI <- as.numeric(data$PSI)
data$NEWS <- as.numeric(data$NEWS)
data$NEWSL <- as.numeric(data$NEWSL)
data$SMARTCOP <- as.numeric(data$SMARTCOP)
data <- na.omit(data[, c("DEATH", "CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")])
The following plot shows the ROC curves for each severity score, with the x-axis as 1 - Specificity (False Positive Rate) and the y-axis as Sensitivity, starting at (0,0).
# Define severity scores
severity_scores <- c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")
# Compute ROC curves
roc_CURB65 <- roc(data$DEATH, data$CURB65)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_PSI <- roc(data$DEATH, data$PSI)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_NEWS <- roc(data$DEATH, data$NEWS)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_NEWSL <- roc(data$DEATH, data$NEWSL)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_SMARTCOP <- roc(data$DEATH, data$SMARTCOP)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Extract coordinates
fpr_CURB65 <- 1 - roc_CURB65$specificities
sens_CURB65 <- roc_CURB65$sensitivities
fpr_PSI <- 1 - roc_PSI$specificities
sens_PSI <- roc_PSI$sensitivities
fpr_NEWS <- 1 - roc_NEWS$specificities
sens_NEWS <- roc_NEWS$sensitivities
fpr_NEWSL <- 1 - roc_NEWSL$specificities
sens_NEWSL <- roc_NEWSL$sensitivities
fpr_SMARTCOP <- 1 - roc_SMARTCOP$specificities
sens_SMARTCOP <- roc_SMARTCOP$sensitivities
# Plot ROC curves
plot(fpr_CURB65, sens_CURB65, type = "l", col = "red", lwd = 2,
xlim = c(0, 1), ylim = c(0, 1),
xlab = "1 - Specificity (False Positive Rate)",
ylab = "Sensitivity",
main = "ROC Curves for Death Prediction")
lines(fpr_PSI, sens_PSI, col = "blue", lwd = 2)
lines(fpr_NEWS, sens_NEWS, col = "green", lwd = 2)
lines(fpr_NEWSL, sens_NEWSL, col = "purple", lwd = 2)
lines(fpr_SMARTCOP, sens_SMARTCOP, col = "orange", lwd = 2)
# Add reference line
abline(a = 0, b = 1, lty = 2, col = "gray")
# Add legend
legend("bottomright", legend = c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP"),
col = c("red", "blue", "green", "purple", "orange"), lwd = 2)
The table below summarizes the AUC analysis for each severity score in predicting death.
# Create results table
results <- data.frame(
Scores = character(),
AUC = numeric(),
SE = numeric(),
`95% CI` = character(),
`Sensitivity (%)` = numeric(),
`Specificity (%)` = numeric(),
`Cut-off` = numeric(),
`Youden Index J` = numeric(),
P = numeric(),
stringsAsFactors = FALSE
)
# Compute metrics for each score
for (score in severity_scores) {
roc_obj <- roc(data$DEATH, data[[score]], ci = TRUE)
auc_value <- as.numeric(roc_obj$auc)
# Compute SE using (upper CI - lower CI)/(2*1.96)
ci_values <- ci.auc(roc_obj)
se_value <- (ci_values[3] - ci_values[1]) / (2 * 1.96)
# Format 95% CI
ci_text <- sprintf("%.4f - %.4f", ci_values[1], ci_values[3])
# Compute Youden Index and optimal cut-off
youden <- roc_obj$sensitivities + roc_obj$specificities - 1
optimal_idx <- which.max(youden)
optimal_sensitivity <- roc_obj$sensitivities[optimal_idx] * 100
optimal_specificity <- roc_obj$specificities[optimal_idx] * 100
optimal_cutoff <- roc_obj$thresholds[optimal_idx]
youden_index <- youden[optimal_idx]
# Compute p-value (compare AUC to 0.5)
p_value <- roc.test(roc_obj, roc(data$DEATH, runif(length(data$DEATH)), levels = c(0, 1)))$p.value
# Add row to results
results <- rbind(results, data.frame(
Scores = score,
AUC = round(auc_value, 4),
SE = round(se_value, 4),
`95% CI` = ci_text,
`Sensitivity (%)` = round(optimal_sensitivity, 2),
`Specificity (%)` = round(optimal_specificity, 2),
`Cut-off` = round(optimal_cutoff, 2),
`Youden Index J` = round(youden_index, 4),
P = p_value
))
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
# Display the table
kable(results, caption = "Summary of AUC Analysis for Death Prediction")
| Scores | AUC | SE | X95..CI | Sensitivity…. | Specificity…. | Cut.off | Youden.Index.J | P |
|---|---|---|---|---|---|---|---|---|
| CURB65 | 0.8155 | 0.0414 | 0.7343 - 0.8967 | 86.36 | 63.11 | 1.50 | 0.4947 | 0.0001116 |
| PSI | 0.9160 | 0.0342 | 0.8490 - 0.9831 | 95.45 | 78.35 | 116.50 | 0.7381 | 0.0000000 |
| NEWS | 0.8579 | 0.0352 | 0.7888 - 0.9270 | 86.36 | 67.38 | 5.50 | 0.5374 | 0.0002458 |
| NEWSL | 0.8525 | 0.0359 | 0.7822 - 0.9229 | 86.36 | 73.78 | 8.35 | 0.6014 | 0.0000453 |
| SMARTCOP | 0.8011 | 0.0342 | 0.7341 - 0.8682 | 90.91 | 62.50 | 3.50 | 0.5341 | 0.0009664 |