📥 Load Libraries and Dataset

library(tidyverse)
library(readxl)
library(rstatix)
library(ggpubr)
library(broom)
library(pROC)
library(caret)
library(rmda)
library(kableExtra)
library(MASS)
library(dcurves)

covid_data <- read_excel("covidpredict-1.xlsx")
covid_data <- covid_data %>% mutate(mortality = factor(mortality, levels = c(0,1), labels = c("Alive", "Dead")))
str(covid_data)

## tibble [5,782 × 11] (S3: tbl_df/tbl/data.frame)
##  $ rr         : num [1:5782] 33 25 26 14 17 42 19 22 33 22 ...
##  $ oxygen_sat : num [1:5782] 90 92 89 80 90 92 91 92 99 94 ...
##  $ urea       : num [1:5782] 26.2 21.3 5.6 24.4 6.7 17.5 4.1 21.2 3.7 10.3 ...
##  $ crp        : num [1:5782] 130 171 164 137 296 ...
##  $ gcs        : num [1:5782] 13 15 15 15 15 13 15 15 15 15 ...
##  $ age        : num [1:5782] 54 86 86 88 86 55 76 85 70 91 ...
##  $ female     : num [1:5782] 0 0 1 0 0 1 0 1 0 0 ...
##  $ comorbidity: num [1:5782] 2 2 1 2 0 1 1 1 2 2 ...
##  $ date       : POSIXct[1:5782], format: "2020-02-06 00:00:00" "2020-02-06 00:42:14" ...
##  $ set        : chr [1:5782] "dev" "dev" "dev" "dev" ...
##  $ mortality  : Factor w/ 2 levels "Alive","Dead": 1 2 2 2 1 2 2 1 2 1 ...

📊 Data distribution

numerical_vars <- c("age", "rr", "oxygen_sat", "gcs", "urea", "crp")
categorical_vars <- c("female", "comorbidity", "set")


boxplots <- lapply(numerical_vars, function(var) {
  ggplot(covid_data, aes(x = mortality, y = .data[[var]], fill = mortality)) +
    geom_boxplot(outlier.color = "red") +
    labs(title = paste("Boxplot of", var, "by Mortality")) +
    theme_minimal()
})
ggarrange(plotlist = boxplots, ncol = 2)

## $`1`

## 
## $`2`

## 
## $`3`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

📊 Density Plot

density_plots <- lapply(numerical_vars, function(var) {
  ggplot(covid_data, aes(x = .data[[var]], fill = mortality, color = mortality)) +
    geom_density(alpha = 0.5) +
    labs(title = paste("Density Plot of", var)) +
    theme_minimal()
})
ggarrange(plotlist = density_plots, ncol = 2)

## $`1`

## 
## $`2`

## 
## $`3`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

🔬 Normality Test: Shapiro-Wilk

normality_results <- map_dfr(numerical_vars, function(var) {
  train_vals <- na.omit(covid_data %>% filter(set == "dev") %>% pull(var))
  val_vals <- na.omit(covid_data %>% filter(set == "val") %>% pull(var))
  tibble(
    Variable = var,
    Shapiro_W_p_train = shapiro.test(train_vals)$p.value,
    Shapiro_W_stat_train = shapiro.test(train_vals)$statistic,
    Shapiro_W_p_val = shapiro.test(val_vals)$p.value,
    Shapiro_W_stat_val = shapiro.test(val_vals)$statistic
  )
})
kable(normality_results, digits = 4) %>% kable_styling()

Variable	Shapiro_W_stat_train	Shapiro_W_stat_val
age	0.9663	0.9652
rr	0.9734	0.9749
oxygen_sat	0.9708	0.9739
gcs	0.5469	0.5482
urea	0.8704	0.8846
crp	0.9135	0.9203

📉 Mann-Whitney U Test

mannwhitney_results <- map_df(numerical_vars, function(var) {
  res <- wilcox_test(covid_data, as.formula(paste(var, "~ mortality")))
  res %>% mutate(Variable = var) 
})
kable(mannwhitney_results, digits = 3) %>% kable_styling()

.y.	group1	group2	n1	n2	statistic	Variable
age	Alive	Dead	3527	2255	2729391	age
rr	Alive	Dead	3527	2255	2874189	rr
oxygen_sat	Alive	Dead	3527	2255	4986468	oxygen_sat
gcs	Alive	Dead	3527	2255	4689462	gcs
urea	Alive	Dead	3527	2255	2672199	urea
crp	Alive	Dead	3527	2255	2868646	crp

📊 Chi-square Test with Stacked Bar Plot

cat_plots <- lapply(categorical_vars, function(var) {
  ggplot(covid_data, aes(x = .data[[var]], fill = mortality)) +
    geom_bar(position = "fill") +
    labs(title = paste("Stacked Bar Plot of", var, "by Mortality")) +
    ylab("Proportion") +
    theme_minimal()
})
ggarrange(plotlist = cat_plots, ncol = 2)

## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

comorbidity_test <-table(covid_data$comorbidity, covid_data$mortality)
chisq.test(comorbidity_test)

## 
##  Pearson's Chi-squared test
## 
## data:  comorbidity_test
## X-squared = 52.511, df = 2, p-value = 3.958e-12

female_test <-table(covid_data$female, covid_data$mortality)
chisq.test(female_test)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  female_test
## X-squared = 1.3282, df = 1, p-value = 0.2491

The data-set is composed by 6 numerical variables and 2 categorical variables (numbers of comorbidities has been treated as categorical for simplicity).

Shapiro-Wilk tests showed that none of the numerical variables is normally distributed.

Mann-Whitney U-test and Chi-Squared test showed that all the variables have a significant effect on mortality except sex.

🤖 Logistic Regression

train_data <- covid_data %>% filter(set == "dev")
test_data <- covid_data %>% filter(set == "val")

model <- glm(mortality ~ age + rr + oxygen_sat + gcs + urea + crp + female + comorbidity, 
             data = train_data, family = binomial)

model_table <- tidy(model) %>%
  mutate(across(where(is.numeric), ~signif(.x, 4)))
kable(model_table, digits = 4) %>% kable_styling()

term	estimate	std.error	statistic	p.value
(Intercept)	0.2427	0.7869	0.3085	0.7577
age	0.0463	0.0027	17.0300	0.0000
rr	0.0287	0.0047	6.1590	0.0000
oxygen_sat	-0.0496	0.0073	-6.8000	0.0000
gcs	-0.1126	0.0255	-4.4190	0.0000
urea	0.0355	0.0040	8.7720	0.0000
crp	0.0027	0.0004	6.4120	0.0000
female	-0.1424	0.0791	-1.8000	0.0718
comorbidity	0.3176	0.0488	6.5020	0.0000

Logistic regression model results confirm what expected: sex is the only variable with no significant effect of mortality (however, p-value is very close to significance being p=0.07).

📈 ROC Curve and Confusion Matrix

thrreshold = 0.5

# Predict probabilities and classes
prob_test <- predict(model, newdata = test_data, type = "response")
pred_class <- ifelse(prob_test > 0.5, "Dead", "Alive") %>% factor(levels = c("Alive", "Dead"))

# ROC Curve and AUC
roc_curve <- roc(test_data$mortality, prob_test)
auc_value <- auc(roc_curve)
plot(roc_curve, main = "ROC Curve - Validation Set")

cat("AUC =", round(auc_value, 3), "\n")

## AUC = 0.775

# Confusion Matrix
conf_matrix <- confusionMatrix(pred_class, test_data$mortality, positive = "Dead")
print(conf_matrix)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Dead
##      Alive  1134  421
##      Dead    209  472
##                                           
##                Accuracy : 0.7182          
##                  95% CI : (0.6991, 0.7368)
##     No Information Rate : 0.6006          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3884          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.5286          
##             Specificity : 0.8444          
##          Pos Pred Value : 0.6931          
##          Neg Pred Value : 0.7293          
##              Prevalence : 0.3994          
##          Detection Rate : 0.2111          
##    Detection Prevalence : 0.3046          
##       Balanced Accuracy : 0.6865          
##                                           
##        'Positive' Class : Dead            
##

cm_df <- as.data.frame(as.table(conf_matrix))

ggplot(cm_df, aes(Prediction, Reference, fill = Freq)) + 
  geom_tile() + 
  geom_text(aes(label = Freq), vjust = 1) + 
  scale_fill_gradient(low = "white", high = "blue") + 
  labs(x = "Predicted", y = "Actual", fill = "Frequency") +
  theme_minimal() + 
  ggtitle("Confusion Matrix Heatmap")

# Precision, Recall
precision_value <- precision(test_data$mortality, pred_class, positive = "Dead")
recall_value <- recall(test_data$mortality, pred_class, positive = "Dead")

cat("Precision: ", round(precision_value, 3), "\n")

## Precision:  0.844

cat("Recall: ", round(recall_value, 3), "\n")

## Recall:  0.729

threshold = 0.1

# Predict probabilities and classes
prob_test <- predict(model, newdata = test_data, type = "response")
pred_class <- ifelse(prob_test > 0.1, "Dead", "Alive") %>% factor(levels = c("Alive", "Dead"))

# Confusion Matrix
conf_matrix <- confusionMatrix(pred_class, test_data$mortality, positive = "Dead")
print(conf_matrix)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Dead
##      Alive   157   10
##      Dead   1186  883
##                                          
##                Accuracy : 0.4651         
##                  95% CI : (0.4443, 0.486)
##     No Information Rate : 0.6006         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0866         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.9888         
##             Specificity : 0.1169         
##          Pos Pred Value : 0.4268         
##          Neg Pred Value : 0.9401         
##              Prevalence : 0.3994         
##          Detection Rate : 0.3949         
##    Detection Prevalence : 0.9253         
##       Balanced Accuracy : 0.5529         
##                                          
##        'Positive' Class : Dead           
##

cm_df <- as.data.frame(as.table(conf_matrix))

ggplot(cm_df, aes(Prediction, Reference, fill = Freq)) + 
  geom_tile() + 
  geom_text(aes(label = Freq), vjust = 1) + 
  scale_fill_gradient(low = "white", high = "blue") + 
  labs(x = "Predicted", y = "Actual", fill = "Frequency") +
  theme_minimal() + 
  ggtitle("Confusion Matrix Heatmap")

# Precision, Recall
precision_value <- precision(test_data$mortality, pred_class, positive = "Dead")
recall_value <- recall(test_data$mortality, pred_class, positive = "Dead")

cat("Precision: ", round(precision_value, 3), "\n")

## Precision:  0.117

cat("Recall: ", round(recall_value, 3), "\n")

## Recall:  0.94

At the clinically relevant threshold of 0.10 (ICU admission risk cutoff), the model achieves:

High sensitivity (recall = 0.99) for high-risk patients, which is valuable for not missing severe cases.
Very low precision (0.42) and overall low accuracy (0.46) indicate a high false positive rate, risking unnecessary ICU admissions.

In contrast, at a 0.50 threshold:

Performance is more balanced (accuracy = 0.73), but this is less clinically relevant for early ICU triage.

📊 Decision Curve Analysis

covid_data <- covid_data %>%
  mutate(
    pred_prob = predict(model, newdata = covid_data, type = "response")
  )

dca_result <- dca( mortality ~ pred_prob,
                  data = covid_data,
                  thresholds = seq(0, 0.50, by = 0.01),
                  label = list(pred_prob = "Prediction Model"))

plot(dca_result, smooth = TRUE, main = "Multivariable Decision Curve Analysis")

The decision curve shows that the prediction model provides higher net benefit than both “treat all” and “treat none” strategies across all threshold probabilities. This indicates that the model is clinically useful for guiding treatment decisions within this range, offering improved patient stratification and reducing unnecessary interventions.

📊 EVPI

bootstrap_evpi_nb <- function(data, outcome_var = "mortality", pred_probs, pt = 0.1, n_boot = 1000, seed = 42) {
  set.seed(seed)
  N <- nrow(data)
  odds_pt <- pt / (1 - pt)

  net_benefit <- function(pred, true) {
    TP <- sum(pred == 1 & true == 1)
    FP <- sum(pred == 1 & true == 0)
    NB <- (TP / N) - (FP / N) * odds_pt
    return(NB)
  }

  outcome <- data[[outcome_var]]
  pred_labels <- ifelse(pred_probs >= pt, 1, 0)
  NB_all <- net_benefit(rep(1, N), outcome)
  NB_none <- 0  # no one treated

  boot_max_nb <- numeric(n_boot)
  nb_model_boot <- numeric(n_boot)

  for (i in 1:n_boot) {
    idx <- sample(1:N, N, replace = TRUE)
    y_boot <- outcome[idx]
    p_boot <- pred_probs[idx]
    pred_boot <- ifelse(p_boot >= pt, 1, 0)

    nb_model <- net_benefit(pred_boot, y_boot)
    nb_all <- net_benefit(rep(1, N), y_boot)

    nb_model_boot[i] <- nb_model
    boot_max_nb[i] <- max(nb_model, nb_all, NB_none)
  }

  EVPI <- mean(boot_max_nb) - max(mean(nb_model_boot), NB_all, NB_none)

  return(list(
    EVPI = EVPI,
    mean_model_nb = mean(nb_model_boot),
    NB_all = NB_all,
    NB_none = NB_none,
    expected_max_nb = mean(boot_max_nb)
  ))
}

pred_probs <- predict(model, newdata = covid_data, type = "response")


result <- bootstrap_evpi_nb(data = covid_data, outcome_var = "mortality", pred_probs = pred_probs, pt = 0.1)

print(result$EVPI)

## [1] 0

evpi_dist <- result$expected_max_nb - pmax(result$mean_model_nb, result$NB_all, result$NB_none)
quantile(evpi_dist, c(0.025, 0.975))

##  2.5% 97.5% 
##     0     0

EVPI = 0.00, suggesting limited added value from further model refinement under current conditions.

Discussion

Recommendation

While the model shows good discrimination and performs well overall, UK-specific development call for external validation in Dutch hospitals. Differences in admission practices, resource availability, and patient profiles may affect real-world performance.

Conclusion:

The model can support early ICU decision-making, but external validation is essential before deployment in the Netherlands to ensure safe, effective use.

COVID-19 Mortality Prediction Analysis

Dario_De_Caro