This report analyzes pneumonia severity scores (CURB65, PSI, NEWS, NEWSL, SMARTCOP) in patients diagnosed with pneumonia. It includes: 1. A table summarizing the distribution of patients across score classifications, with mean ± SD and p-values comparing death vs. survival and ICU vs. non-ICU admission. 2. An ROC plot and AUC summary table for predicting death.
# Load required packages
library(readxl) # For reading Excel files
library(pROC) # For ROC analysis
library(knitr) # For table formatting
# Read the Excel file
file_path <- "C:/Users/Lenovo/Documents/Zalo Received Files/Danh Sach Benh Nhan viêm phổi 2022 fix 20_3_25.xls"
data <- read_excel(file_path)
# Standardize column names
colnames(data) <- make.names(colnames(data))
if ("CURB.65" %in% colnames(data)) {
names(data)[names(data) == "CURB.65"] <- "CURB65"
}
# Prepare the data
data$DEATH <- as.numeric(data$DEATH) # AQ column: 0 = survive, 1 = death
data$ICU <- as.numeric(data$ICU) # DM column: 0 = non-ICU, 1 = ICU
data$CURB65 <- as.numeric(data$CURB65) # AR column: CURB-65 score
data$PSI <- as.numeric(data$PSI) # AT column: PSI score
data$NEWS <- as.numeric(data$NEWS) # DJ column: NEWS score
data$NEWSL <- as.numeric(data$NEWSL) # DL column: NEWSL score
data$SMARTCOP <- as.numeric(data$SMARTCOP) # Assumed column: SMARTCOP score
data$CURB65_CLASS <- as.factor(data$CURB65) # Assumed column: CURB-65 classes
data$PSI_CLASS <- as.factor(data$PSI_CLASS) # Assumed column: PSI classes
data$NEWS_CLASS <- as.factor(data$NEWS_CLASS) # Assumed column: NEWS classes
data$NEWSL_CLASS <- as.factor(data$NEWSL_CLASS) # Assumed column: NEWSL classes
data$SMARTCOP_CLASS <- as.factor(data$SMARTCOP_CLASS) # Assumed column: SMARTCOP classes
The table below shows the mean ± SD for each score and the number (percentage) of patients in each classification, across all patients, death vs. survival, and ICU vs. non-ICU admission groups. P-values compare death vs. survival and ICU vs. non-ICU admission for each score.
# Calculate group sizes
n_all <- nrow(data)
n_death <- sum(data$DEATH == 1, na.rm = TRUE)
n_survived <- sum(data$DEATH == 0, na.rm = TRUE)
n_icu <- sum(data$ICU == 1, na.rm = TRUE)
n_non_icu <- sum(data$ICU == 0, na.rm = TRUE)
# Define scores and corresponding classification columns
scores <- c("CURB65", "PSI", "NEWS", "NEWSL", "SMARTCOP")
class_cols <- c("CURB65_CLASS", "PSI_CLASS", "NEWS_CLASS", "NEWSL_CLASS", "SMARTCOP_CLASS")
# Define class levels for each score
classes_list <- list(
CURB65 = 1:3,
PSI = 1:5,
NEWS = 1:3,
NEWSL = 1:4,
SMARTCOP = 1:4
)
# Helper functions
calc_mean_sd <- function(score, subset) {
mean_val <- mean(subset[[score]], na.rm = TRUE)
sd_val <- sd(subset[[score]], na.rm = TRUE)
paste0(round(mean_val, 2), "±", round(sd_val, 2))
}
calc_n_pct <- function(class_col, class_value, subset) {
n <- sum(subset[[class_col]] == class_value, na.rm = TRUE)
total <- nrow(subset)
if (total == 0) return("0 (0.0)") # Handle empty subsets
pct <- (n / total) * 100
paste0(n, " (", round(pct, 1), ")")
}
calc_p_value <- function(score, group_var, data) {
group1 <- data[[score]][data[[group_var]] == 1]
group0 <- data[[score]][data[[group_var]] == 0]
if (length(group1) < 2 || length(group0) < 2) return("NA") # Avoid t-test errors
test <- t.test(group1, group0)
if (test$p.value < 0.001) {
return("P<.001")
} else {
return(sprintf("%.3f", test$p.value))
}
}
# Initialize table data frame
table_df <- data.frame(
Scores = character(),
All_Patients = character(),
DEATH = character(),
Survive = character(),
P_Death_Survive = character(),
ICU_Admission = character(),
Non_ICU_Admission = character(),
P_ICU_NonICU = character(),
stringsAsFactors = FALSE
)
# Populate table
for (i in 1:length(scores)) {
score <- scores[i]
class_col <- class_cols[i]
classes <- classes_list[[score]]
# Add mean±SD row
mean_sd_all <- calc_mean_sd(score, data)
mean_sd_death <- calc_mean_sd(score, data[data$DEATH == 1, ])
mean_sd_survive <- calc_mean_sd(score, data[data$DEATH == 0, ])
p_death <- calc_p_value(score, "DEATH", data)
mean_sd_icu <- calc_mean_sd(score, data[data$ICU == 1, ])
mean_sd_non_icu <- calc_mean_sd(score, data[data$ICU == 0, ])
p_icu <- calc_p_value(score, "ICU", data)
table_df <- rbind(table_df, data.frame(
Scores = paste0(score, " (mean±SD)"),
All_Patients = mean_sd_all,
DEATH = mean_sd_death,
Survive = mean_sd_survive,
P_Death_Survive = p_death,
ICU_Admission = mean_sd_icu,
Non_ICU_Admission = mean_sd_non_icu,
P_ICU_NonICU = p_icu
))
# Add class rows
for (cls in classes) {
n_pct_all <- calc_n_pct(class_col, cls, data)
n_pct_death <- calc_n_pct(class_col, cls, data[data$DEATH == 1, ])
n_pct_survive <- calc_n_pct(class_col, cls, data[data$DEATH == 0, ])
n_pct_icu <- calc_n_pct(class_col, cls, data[data$ICU == 1, ])
n_pct_non_icu <- calc_n_pct(class_col, cls, data[data$ICU == 0, ])
table_df <- rbind(table_df, data.frame(
Scores = paste0("Class ", cls),
All_Patients = n_pct_all,
DEATH = n_pct_death,
Survive = n_pct_survive,
P_Death_Survive = "",
ICU_Admission = n_pct_icu,
Non_ICU_Admission = n_pct_non_icu,
P_ICU_NonICU = ""
))
}
}
# Display the table
kable(table_df, caption = "Distribution of Patients Across Severity Score Classifications")
| Scores | All_Patients | DEATH | Survive | P_Death_Survive | ICU_Admission | Non_ICU_Admission | P_ICU_NonICU |
|---|---|---|---|---|---|---|---|
| CURB65 (mean±SD) | 1.35±0.98 | 2.59±1.18 | 1.26±0.9 | P<.001 | 2.36±1.07 | 1.07±0.74 | P<.001 |
| Class 1 | 145 (33.5) | 3 (2.9) | 142 (34.5) | 12 (7.6) | 133 (37.2) | ||
| Class 2 | 106 (24.5) | 9 (8.6) | 97 (23.6) | 30 (19) | 76 (21.2) | ||
| Class 3 | 26 (6) | 7 (6.7) | 19 (4.6) | 23 (14.6) | 3 (0.8) | ||
| PSI (mean±SD) | 97.85±33.06 | 150.95±27.47 | 94.29±30.26 | P<.001 | 132.21±28.64 | 88.48±27.56 | P<.001 |
| Class 1 | 19 (4.4) | 0 (0) | 19 (4.6) | 0 (0) | 19 (5.3) | ||
| Class 2 | 53 (12.2) | 0 (0) | 53 (12.9) | 2 (1.3) | 51 (14.2) | ||
| Class 3 | 86 (19.9) | 1 (1) | 85 (20.7) | 1 (0.6) | 85 (23.7) | ||
| Class 4 | 139 (32.1) | 3 (2.9) | 136 (33.1) | 36 (22.8) | 103 (28.8) | ||
| Class 5 | 53 (12.2) | 18 (17.1) | 35 (8.5) | 36 (22.8) | 17 (4.7) | ||
| NEWS (mean±SD) | 4.56±3.54 | 9.23±3.05 | 4.24±3.35 | P<.001 | 8.16±3.47 | 3.57±2.86 | P<.001 |
| Class 1 | 190 (43.9) | 1 (1) | 189 (46) | 10 (6.3) | 180 (50.3) | ||
| Class 2 | 52 (12) | 4 (3.8) | 48 (11.7) | 12 (7.6) | 40 (11.2) | ||
| Class 3 | 108 (24.9) | 17 (16.2) | 91 (22.1) | 53 (33.5) | 55 (15.4) | ||
| NEWSL (mean±SD) | 6.35±4.16 | 11.38±3.37 | 6.01±3.99 | P<.001 | 10.53±4.45 | 5.21±3.25 | P<.001 |
| Class 1 | 100 (23.1) | 0 (0) | 100 (24.3) | 4 (2.5) | 96 (26.8) | ||
| Class 2 | 68 (15.7) | 1 (1) | 67 (16.3) | 5 (3.2) | 63 (17.6) | ||
| Class 3 | 69 (15.9) | 2 (1.9) | 67 (16.3) | 12 (7.6) | 57 (15.9) | ||
| Class 4 | 113 (26.1) | 19 (18.1) | 94 (22.9) | 54 (34.2) | 59 (16.5) | ||
| SMARTCOP (mean±SD) | 3.32±1.71 | 5±1.51 | 3.21±1.66 | P<.001 | 4.87±1.77 | 2.9±1.43 | P<.001 |
| Class 1 | 117 (27) | 0 (0) | 117 (28.5) | 3 (1.9) | 114 (31.8) | ||
| Class 2 | 156 (36) | 10 (9.5) | 146 (35.5) | 33 (20.9) | 123 (34.4) | ||
| Class 3 | 61 (14.1) | 9 (8.6) | 52 (12.7) | 25 (15.8) | 36 (10.1) | ||
| Class 4 | 16 (3.7) | 3 (2.9) | 13 (3.2) | 14 (8.9) | 2 (0.6) |