---
title: "AUC Analysis for Pneumonia Severity Scores in Predicting Death"
author: "Tung Lam"
date: "2025-03-28"
output: pdf_document
---




## Introduction

This report presents the AUC analysis for pneumonia severity scores (`CURB65`, `PSI`, `NEWS`, `NEWSL`, `SMARTCOP`) in predicting death among patients diagnosed with pneumonia. The analysis includes an ROC curve plot and a summary table with AUC, standard error, confidence intervals, sensitivity, specificity, cut-off values, Youden Index, and p-values.

## Load Required Packages and Data


``` r
# Load required packages
library(readxl)  # For reading Excel files
library(pROC)    # For ROC analysis
library(knitr)   # For table formatting

# Read the Excel file
file_path <- "C:/Users/Lenovo/Documents/Zalo Received Files/Danh Sach Benh Nhan viêm phổi 2022.xls"
data <- read_excel(file_path)

# Standardize column names
colnames(data) <- make.names(colnames(data))
if ("CURB.65" %in% colnames(data)) {
  names(data)[names(data) == "CURB.65"] <- "CURB65"
}

# Prepare the data
data$DEATH  <- as.numeric(data$DEATH)  # Outcome: 0 = survival, 1 = death
data$CURB65 <- as.numeric(data$CURB65)
data$PSI    <- as.numeric(data$PSI)
data$NEWS   <- as.numeric(data$NEWS)
data$NEWSL  <- as.numeric(data$NEWSL)
data$SMARTCOP <- as.numeric(data$SMARTCOP)
data <- na.omit(data[, c("DEATH", "CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")])

ROC Curve Plot

The following plot shows the ROC curves for each severity score, with the x-axis as 1 - Specificity (False Positive Rate) and the y-axis as Sensitivity, starting at (0,0).

# Define severity scores
severity_scores <- c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")

# Compute ROC curves
roc_CURB65 <- roc(data$DEATH, data$CURB65)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_PSI    <- roc(data$DEATH, data$PSI)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_NEWS   <- roc(data$DEATH, data$NEWS)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_NEWSL  <- roc(data$DEATH, data$NEWSL)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_SMARTCOP <- roc(data$DEATH, data$SMARTCOP)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Extract coordinates
fpr_CURB65 <- 1 - roc_CURB65$specificities
sens_CURB65 <- roc_CURB65$sensitivities
fpr_PSI    <- 1 - roc_PSI$specificities
sens_PSI    <- roc_PSI$sensitivities
fpr_NEWS   <- 1 - roc_NEWS$specificities
sens_NEWS   <- roc_NEWS$sensitivities
fpr_NEWSL  <- 1 - roc_NEWSL$specificities
sens_NEWSL  <- roc_NEWSL$sensitivities
fpr_SMARTCOP <- 1 - roc_SMARTCOP$specificities
sens_SMARTCOP <- roc_SMARTCOP$sensitivities

# Plot ROC curves
plot(fpr_CURB65, sens_CURB65, type = "l", col = "red", lwd = 2,
     xlim = c(0, 1), ylim = c(0, 1),
     xlab = "1 - Specificity (False Positive Rate)",
     ylab = "Sensitivity",
     main = "ROC Curves for Death Prediction")
lines(fpr_PSI, sens_PSI, col = "blue", lwd = 2)
lines(fpr_NEWS, sens_NEWS, col = "green", lwd = 2)
lines(fpr_NEWSL, sens_NEWSL, col = "purple", lwd = 2)
lines(fpr_SMARTCOP, sens_SMARTCOP, col = "orange", lwd = 2)

# Add reference line
abline(a = 0, b = 1, lty = 2, col = "gray")

# Add legend
legend("bottomright", legend = c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP"),
       col = c("red", "blue", "green", "purple", "orange"), lwd = 2)

Summary Table

The table below summarizes the AUC analysis for each severity score in predicting death.

# Create results table
results <- data.frame(
  Scores = character(),
  AUC = numeric(),
  SE = numeric(),
  `95% CI` = character(),
  `Sensitivity (%)` = numeric(),
  `Specificity (%)` = numeric(),
  `Cut-off` = numeric(),
  `Youden Index J` = numeric(),
  P = numeric(),
  stringsAsFactors = FALSE
)

# Compute metrics for each score
for (score in severity_scores) {
  roc_obj <- roc(data$DEATH, data[[score]], ci = TRUE)
  auc_value <- as.numeric(roc_obj$auc)
  
  # Compute SE using (upper CI - lower CI)/(2*1.96)
  ci_values <- ci.auc(roc_obj)
  se_value <- (ci_values[3] - ci_values[1]) / (2 * 1.96)
  
  # Format 95% CI
  ci_text <- sprintf("%.4f - %.4f", ci_values[1], ci_values[3])
  
  # Compute Youden Index and optimal cut-off
  youden <- roc_obj$sensitivities + roc_obj$specificities - 1
  optimal_idx <- which.max(youden)
  optimal_sensitivity <- roc_obj$sensitivities[optimal_idx] * 100
  optimal_specificity <- roc_obj$specificities[optimal_idx] * 100
  optimal_cutoff <- roc_obj$thresholds[optimal_idx]
  youden_index <- youden[optimal_idx]
  
  # Compute p-value (compare AUC to 0.5)
  p_value <- roc.test(roc_obj, roc(data$DEATH, runif(length(data$DEATH)), levels = c(0, 1)))$p.value
  
  # Add row to results
  results <- rbind(results, data.frame(
    Scores = score,
    AUC = round(auc_value, 4),
    SE = round(se_value, 4),
    `95% CI` = ci_text,
    `Sensitivity (%)` = round(optimal_sensitivity, 2),
    `Specificity (%)` = round(optimal_specificity, 2),
    `Cut-off` = round(optimal_cutoff, 2),
    `Youden Index J` = round(youden_index, 4),
    P = p_value
  ))
}
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting direction: controls > cases
# Display the table
kable(results, caption = "Summary of AUC Analysis for Death Prediction")
Summary of AUC Analysis for Death Prediction
Scores AUC SE X95..CI Sensitivity…. Specificity…. Cut.off Youden.Index.J P
CURB65 0.8155 0.0414 0.7343 - 0.8967 86.36 63.11 1.50 0.4947 0.0001116
PSI 0.9160 0.0342 0.8490 - 0.9831 95.45 78.35 116.50 0.7381 0.0000000
NEWS 0.8579 0.0352 0.7888 - 0.9270 86.36 67.38 5.50 0.5374 0.0002458
NEWSL 0.8525 0.0359 0.7822 - 0.9229 86.36 73.78 8.35 0.6014 0.0000453
SMARTCOP 0.8011 0.0342 0.7341 - 0.8682 90.91 62.50 3.50 0.5341 0.0009664