Table2

Introduction

This report analyzes pneumonia severity scores (CURB65, PSI, NEWS, NEWSL, SMARTCOP) in patients diagnosed with pneumonia. It includes: 1. A table summarizing the distribution of patients across score classifications, with mean ± SD and p-values comparing death vs. survival and ICU vs. non-ICU admission. 2. An ROC plot and AUC summary table for predicting death.

Load Required Packages and Data

# Load required packages
library(readxl)  # For reading Excel files
library(pROC)    # For ROC analysis
library(knitr)   # For table formatting

# Read the Excel file
file_path <- "C:/Users/Lenovo/Documents/Zalo Received Files/Danh Sach Benh Nhan viêm phổi 2022 fix 20_3_25.xls"
data <- read_excel(file_path)

# Standardize column names
colnames(data) <- make.names(colnames(data))
if ("CURB.65" %in% colnames(data)) {
  names(data)[names(data) == "CURB.65"] <- "CURB65"
}

# Prepare the data
data$DEATH  <- as.numeric(data$DEATH)  # AQ column: 0 = survive, 1 = death
data$ICU    <- as.numeric(data$ICU)    # DM column: 0 = non-ICU, 1 = ICU
data$CURB65 <- as.numeric(data$CURB65) # AR column: CURB-65 score
data$PSI    <- as.numeric(data$PSI)    # AT column: PSI score
data$NEWS   <- as.numeric(data$NEWS)   # DJ column: NEWS score
data$NEWSL  <- as.numeric(data$NEWSL)  # DL column: NEWSL score
data$SMARTCOP <- as.numeric(data$SMARTCOP) # Assumed column: SMARTCOP score
data$CURB65_CLASS <- as.factor(data$CURB65) # Assumed column: CURB-65 classes
data$PSI_CLASS <- as.factor(data$PSI_CLASS)       # Assumed column: PSI classes
data$NEWS_CLASS <- as.factor(data$NEWS_CLASS)     # Assumed column: NEWS classes
data$NEWSL_CLASS <- as.factor(data$NEWSL_CLASS)   # Assumed column: NEWSL classes
data$SMARTCOP_CLASS <- as.factor(data$SMARTCOP_CLASS) # Assumed column: SMARTCOP classes

Distribution of Patients Across Severity Score Classifications

The table below shows the mean ± SD for each score and the number (percentage) of patients in each classification, across all patients, death vs. survival, and ICU vs. non-ICU admission groups. P-values compare death vs. survival and ICU vs. non-ICU admission for each score.

# Calculate group sizes
n_all <- nrow(data)
n_death <- sum(data$DEATH == 1, na.rm = TRUE)
n_survived <- sum(data$DEATH == 0, na.rm = TRUE)
n_icu <- sum(data$ICU == 1, na.rm = TRUE)
n_non_icu <- sum(data$ICU == 0, na.rm = TRUE)

# Define scores and corresponding classification columns
scores <- c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")
class_cols <- c("CURB65_CLASS", "PSI_CLASS", "NEWS_CLASS", "NEWSL_CLASS", "SMARTCOP_CLASS")

# Define class levels for each score
classes_list <- list(
  CURB65 = 1:3,
  PSI = 1:5,
  NEWS = 1:3,
  NEWSL = 1:4,
  SMARTCOP = 1:4
)

# Helper functions
calc_mean_sd <- function(score, subset) {
  mean_val <- mean(subset[[score]], na.rm = TRUE)
  sd_val <- sd(subset[[score]], na.rm = TRUE)
  paste0(round(mean_val, 2), "±", round(sd_val, 2))
}

calc_n_pct <- function(class_col, class_value, subset) {
  n <- sum(subset[[class_col]] == class_value, na.rm = TRUE)
  total <- nrow(subset)
  if (total == 0) return("0 (0.0)")  # Handle empty subsets
  pct <- (n / total) * 100
  paste0(n, " (", round(pct, 1), ")")
}

calc_p_value <- function(score, group_var, data) {
  group1 <- data[[score]][data[[group_var]] == 1]
  group0 <- data[[score]][data[[group_var]] == 0]
  if (length(group1) < 2 || length(group0) < 2) return("NA")  # Avoid t-test errors
  test <- t.test(group1, group0)
  if (test$p.value < 0.001) {
    return("P<.001")
  } else {
    return(sprintf("%.3f", test$p.value))
  }
}

# Initialize table data frame
table_df <- data.frame(
  Scores = character(),
  All_Patients = character(),
  DEATH = character(),
  Survive = character(),
  P_Death_Survive = character(),
  ICU_Admission = character(),
  Non_ICU_Admission = character(),
  P_ICU_NonICU = character(),
  stringsAsFactors = FALSE
)

# Populate table
for (i in 1:length(scores)) {
  score <- scores[i]
  class_col <- class_cols[i]
  classes <- classes_list[[score]]
  
  # Add mean±SD row
  mean_sd_all <- calc_mean_sd(score, data)
  mean_sd_death <- calc_mean_sd(score, data[data$DEATH == 1, ])
  mean_sd_survive <- calc_mean_sd(score, data[data$DEATH == 0, ])
  p_death <- calc_p_value(score, "DEATH", data)
  mean_sd_icu <- calc_mean_sd(score, data[data$ICU == 1, ])
  mean_sd_non_icu <- calc_mean_sd(score, data[data$ICU == 0, ])
  p_icu <- calc_p_value(score, "ICU", data)
  
  table_df <- rbind(table_df, data.frame(
    Scores = paste0(score, " (mean±SD)"),
    All_Patients = mean_sd_all,
    DEATH = mean_sd_death,
    Survive = mean_sd_survive,
    P_Death_Survive = p_death,
    ICU_Admission = mean_sd_icu,
    Non_ICU_Admission = mean_sd_non_icu,
    P_ICU_NonICU = p_icu
  ))
  
  # Add class rows
  for (cls in classes) {
    n_pct_all <- calc_n_pct(class_col, cls, data)
    n_pct_death <- calc_n_pct(class_col, cls, data[data$DEATH == 1, ])
    n_pct_survive <- calc_n_pct(class_col, cls, data[data$DEATH == 0, ])
    n_pct_icu <- calc_n_pct(class_col, cls, data[data$ICU == 1, ])
    n_pct_non_icu <- calc_n_pct(class_col, cls, data[data$ICU == 0, ])
    
    table_df <- rbind(table_df, data.frame(
      Scores = paste0("Class ", cls),
      All_Patients = n_pct_all,
      DEATH = n_pct_death,
      Survive = n_pct_survive,
      P_Death_Survive = "",
      ICU_Admission = n_pct_icu,
      Non_ICU_Admission = n_pct_non_icu,
      P_ICU_NonICU = ""
    ))
  }
}

# Display the table
kable(table_df, caption = "Distribution of Patients Across Severity Score Classifications")

Distribution of Patients Across Severity Score Classifications
Scores	All_Patients	DEATH	Survive	P_Death_Survive	ICU_Admission	Non_ICU_Admission	P_ICU_NonICU
CURB65 (mean±SD)	1.35±0.98	2.59±1.18	1.26±0.9	P<.001	2.36±1.07	1.07±0.74	P<.001
Class 1	145 (33.5)	3 (2.9)	142 (34.5)		12 (7.6)	133 (37.2)
Class 2	106 (24.5)	9 (8.6)	97 (23.6)		30 (19)	76 (21.2)
Class 3	26 (6)	7 (6.7)	19 (4.6)		23 (14.6)	3 (0.8)
PSI (mean±SD)	97.85±33.06	150.95±27.47	94.29±30.26	P<.001	132.21±28.64	88.48±27.56	P<.001
Class 1	19 (4.4)	0 (0)	19 (4.6)		0 (0)	19 (5.3)
Class 2	53 (12.2)	0 (0)	53 (12.9)		2 (1.3)	51 (14.2)
Class 3	86 (19.9)	1 (1)	85 (20.7)		1 (0.6)	85 (23.7)
Class 4	139 (32.1)	3 (2.9)	136 (33.1)		36 (22.8)	103 (28.8)
Class 5	53 (12.2)	18 (17.1)	35 (8.5)		36 (22.8)	17 (4.7)
NEWS (mean±SD)	4.56±3.54	9.23±3.05	4.24±3.35	P<.001	8.16±3.47	3.57±2.86	P<.001
Class 1	190 (43.9)	1 (1)	189 (46)		10 (6.3)	180 (50.3)
Class 2	52 (12)	4 (3.8)	48 (11.7)		12 (7.6)	40 (11.2)
Class 3	108 (24.9)	17 (16.2)	91 (22.1)		53 (33.5)	55 (15.4)
NEWSL (mean±SD)	6.35±4.16	11.38±3.37	6.01±3.99	P<.001	10.53±4.45	5.21±3.25	P<.001
Class 1	100 (23.1)	0 (0)	100 (24.3)		4 (2.5)	96 (26.8)
Class 2	68 (15.7)	1 (1)	67 (16.3)		5 (3.2)	63 (17.6)
Class 3	69 (15.9)	2 (1.9)	67 (16.3)		12 (7.6)	57 (15.9)
Class 4	113 (26.1)	19 (18.1)	94 (22.9)		54 (34.2)	59 (16.5)
SMARTCOP (mean±SD)	3.32±1.71	5±1.51	3.21±1.66	P<.001	4.87±1.77	2.9±1.43	P<.001
Class 1	117 (27)	0 (0)	117 (28.5)		3 (1.9)	114 (31.8)
Class 2	156 (36)	10 (9.5)	146 (35.5)		33 (20.9)	123 (34.4)
Class 3	61 (14.1)	9 (8.6)	52 (12.7)		25 (15.8)	36 (10.1)
Class 4	16 (3.7)	3 (2.9)	13 (3.2)		14 (8.9)	2 (0.6)

Table2

Tung Lam

2025-03-31

Introduction

Load Required Packages and Data

Distribution of Patients Across Severity Score Classifications