Introduction

This report analyzes pneumonia severity scores (CURB65, PSI, NEWS, NEWSL, SMARTCOP) in patients diagnosed with pneumonia. It includes: 1. A table summarizing the distribution of patients across score classifications, with mean ± SD and p-values comparing death vs. survival and ICU vs. non-ICU admission. 2. An ROC plot and AUC summary table for predicting death.

Load Required Packages and Data

# Load required packages
library(readxl)  # For reading Excel files
library(pROC)    # For ROC analysis
library(knitr)   # For table formatting

# Read the Excel file
file_path <- "C:/Users/Lenovo/Documents/Zalo Received Files/Danh Sach Benh Nhan viêm phổi 2022 fix 20_3_25.xls"
data <- read_excel(file_path)

# Standardize column names
colnames(data) <- make.names(colnames(data))
if ("CURB.65" %in% colnames(data)) {
  names(data)[names(data) == "CURB.65"] <- "CURB65"
}

# Prepare the data
data$DEATH  <- as.numeric(data$DEATH)  # AQ column: 0 = survive, 1 = death
data$ICU    <- as.numeric(data$ICU)    # DM column: 0 = non-ICU, 1 = ICU
data$CURB65 <- as.numeric(data$CURB65) # AR column: CURB-65 score
data$PSI    <- as.numeric(data$PSI)    # AT column: PSI score
data$NEWS   <- as.numeric(data$NEWS)   # DJ column: NEWS score
data$NEWSL  <- as.numeric(data$NEWSL)  # DL column: NEWSL score
data$SMARTCOP <- as.numeric(data$SMARTCOP) # Assumed column: SMARTCOP score
data$CURB65_CLASS <- as.factor(data$CURB65) # Assumed column: CURB-65 classes
data$PSI_CLASS <- as.factor(data$PSI_CLASS)       # Assumed column: PSI classes
data$NEWS_CLASS <- as.factor(data$NEWS_CLASS)     # Assumed column: NEWS classes
data$NEWSL_CLASS <- as.factor(data$NEWSL_CLASS)   # Assumed column: NEWSL classes
data$SMARTCOP_CLASS <- as.factor(data$SMARTCOP_CLASS) # Assumed column: SMARTCOP classes

Distribution of Patients Across Severity Score Classifications

The table below shows the mean ± SD for each score and the number (percentage) of patients in each classification, across all patients, death vs. survival, and ICU vs. non-ICU admission groups. P-values compare death vs. survival and ICU vs. non-ICU admission for each score.

# Calculate group sizes
n_all <- nrow(data)
n_death <- sum(data$DEATH == 1, na.rm = TRUE)
n_survived <- sum(data$DEATH == 0, na.rm = TRUE)
n_icu <- sum(data$ICU == 1, na.rm = TRUE)
n_non_icu <- sum(data$ICU == 0, na.rm = TRUE)

# Define scores and corresponding classification columns
scores <- c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")
class_cols <- c("CURB65_CLASS", "PSI_CLASS", "NEWS_CLASS", "NEWSL_CLASS", "SMARTCOP_CLASS")

# Define class levels for each score
classes_list <- list(
  CURB65 = 1:3,
  PSI = 1:5,
  NEWS = 1:3,
  NEWSL = 1:4,
  SMARTCOP = 1:4
)

# Helper functions
calc_mean_sd <- function(score, subset) {
  mean_val <- mean(subset[[score]], na.rm = TRUE)
  sd_val <- sd(subset[[score]], na.rm = TRUE)
  paste0(round(mean_val, 2), "±", round(sd_val, 2))
}

calc_n_pct <- function(class_col, class_value, subset) {
  n <- sum(subset[[class_col]] == class_value, na.rm = TRUE)
  total <- nrow(subset)
  if (total == 0) return("0 (0.0)")  # Handle empty subsets
  pct <- (n / total) * 100
  paste0(n, " (", round(pct, 1), ")")
}

calc_p_value <- function(score, group_var, data) {
  group1 <- data[[score]][data[[group_var]] == 1]
  group0 <- data[[score]][data[[group_var]] == 0]
  if (length(group1) < 2 || length(group0) < 2) return("NA")  # Avoid t-test errors
  test <- t.test(group1, group0)
  if (test$p.value < 0.001) {
    return("P<.001")
  } else {
    return(sprintf("%.3f", test$p.value))
  }
}

# Initialize table data frame
table_df <- data.frame(
  Scores = character(),
  All_Patients = character(),
  DEATH = character(),
  Survive = character(),
  P_Death_Survive = character(),
  ICU_Admission = character(),
  Non_ICU_Admission = character(),
  P_ICU_NonICU = character(),
  stringsAsFactors = FALSE
)

# Populate table
for (i in 1:length(scores)) {
  score <- scores[i]
  class_col <- class_cols[i]
  classes <- classes_list[[score]]
  
  # Add mean±SD row
  mean_sd_all <- calc_mean_sd(score, data)
  mean_sd_death <- calc_mean_sd(score, data[data$DEATH == 1, ])
  mean_sd_survive <- calc_mean_sd(score, data[data$DEATH == 0, ])
  p_death <- calc_p_value(score, "DEATH", data)
  mean_sd_icu <- calc_mean_sd(score, data[data$ICU == 1, ])
  mean_sd_non_icu <- calc_mean_sd(score, data[data$ICU == 0, ])
  p_icu <- calc_p_value(score, "ICU", data)
  
  table_df <- rbind(table_df, data.frame(
    Scores = paste0(score, " (mean±SD)"),
    All_Patients = mean_sd_all,
    DEATH = mean_sd_death,
    Survive = mean_sd_survive,
    P_Death_Survive = p_death,
    ICU_Admission = mean_sd_icu,
    Non_ICU_Admission = mean_sd_non_icu,
    P_ICU_NonICU = p_icu
  ))
  
  # Add class rows
  for (cls in classes) {
    n_pct_all <- calc_n_pct(class_col, cls, data)
    n_pct_death <- calc_n_pct(class_col, cls, data[data$DEATH == 1, ])
    n_pct_survive <- calc_n_pct(class_col, cls, data[data$DEATH == 0, ])
    n_pct_icu <- calc_n_pct(class_col, cls, data[data$ICU == 1, ])
    n_pct_non_icu <- calc_n_pct(class_col, cls, data[data$ICU == 0, ])
    
    table_df <- rbind(table_df, data.frame(
      Scores = paste0("Class ", cls),
      All_Patients = n_pct_all,
      DEATH = n_pct_death,
      Survive = n_pct_survive,
      P_Death_Survive = "",
      ICU_Admission = n_pct_icu,
      Non_ICU_Admission = n_pct_non_icu,
      P_ICU_NonICU = ""
    ))
  }
}

# Display the table
kable(table_df, caption = "Distribution of Patients Across Severity Score Classifications")
Distribution of Patients Across Severity Score Classifications
Scores All_Patients DEATH Survive P_Death_Survive ICU_Admission Non_ICU_Admission P_ICU_NonICU
CURB65 (mean±SD) 1.35±0.98 2.59±1.18 1.26±0.9 P<.001 2.36±1.07 1.07±0.74 P<.001
Class 1 145 (33.5) 3 (2.9) 142 (34.5) 12 (7.6) 133 (37.2)
Class 2 106 (24.5) 9 (8.6) 97 (23.6) 30 (19) 76 (21.2)
Class 3 26 (6) 7 (6.7) 19 (4.6) 23 (14.6) 3 (0.8)
PSI (mean±SD) 97.85±33.06 150.95±27.47 94.29±30.26 P<.001 132.21±28.64 88.48±27.56 P<.001
Class 1 19 (4.4) 0 (0) 19 (4.6) 0 (0) 19 (5.3)
Class 2 53 (12.2) 0 (0) 53 (12.9) 2 (1.3) 51 (14.2)
Class 3 86 (19.9) 1 (1) 85 (20.7) 1 (0.6) 85 (23.7)
Class 4 139 (32.1) 3 (2.9) 136 (33.1) 36 (22.8) 103 (28.8)
Class 5 53 (12.2) 18 (17.1) 35 (8.5) 36 (22.8) 17 (4.7)
NEWS (mean±SD) 4.56±3.54 9.23±3.05 4.24±3.35 P<.001 8.16±3.47 3.57±2.86 P<.001
Class 1 190 (43.9) 1 (1) 189 (46) 10 (6.3) 180 (50.3)
Class 2 52 (12) 4 (3.8) 48 (11.7) 12 (7.6) 40 (11.2)
Class 3 108 (24.9) 17 (16.2) 91 (22.1) 53 (33.5) 55 (15.4)
NEWSL (mean±SD) 6.35±4.16 11.38±3.37 6.01±3.99 P<.001 10.53±4.45 5.21±3.25 P<.001
Class 1 100 (23.1) 0 (0) 100 (24.3) 4 (2.5) 96 (26.8)
Class 2 68 (15.7) 1 (1) 67 (16.3) 5 (3.2) 63 (17.6)
Class 3 69 (15.9) 2 (1.9) 67 (16.3) 12 (7.6) 57 (15.9)
Class 4 113 (26.1) 19 (18.1) 94 (22.9) 54 (34.2) 59 (16.5)
SMARTCOP (mean±SD) 3.32±1.71 5±1.51 3.21±1.66 P<.001 4.87±1.77 2.9±1.43 P<.001
Class 1 117 (27) 0 (0) 117 (28.5) 3 (1.9) 114 (31.8)
Class 2 156 (36) 10 (9.5) 146 (35.5) 33 (20.9) 123 (34.4)
Class 3 61 (14.1) 9 (8.6) 52 (12.7) 25 (15.8) 36 (10.1)
Class 4 16 (3.7) 3 (2.9) 13 (3.2) 14 (8.9) 2 (0.6)