📥 Load Libraries and Dataset

library(tidyverse)
library(readxl)
library(rstatix)
library(ggpubr)
library(broom)
library(pROC)
library(caret)
library(rmda)
library(kableExtra)
library(MASS)
library(dcurves)
covid_data <- read_excel("covidpredict-1.xlsx")
covid_data <- covid_data %>% mutate(mortality = factor(mortality, levels = c(0,1), labels = c("Alive", "Dead")))
str(covid_data)
## tibble [5,782 × 11] (S3: tbl_df/tbl/data.frame)
##  $ rr         : num [1:5782] 33 25 26 14 17 42 19 22 33 22 ...
##  $ oxygen_sat : num [1:5782] 90 92 89 80 90 92 91 92 99 94 ...
##  $ urea       : num [1:5782] 26.2 21.3 5.6 24.4 6.7 17.5 4.1 21.2 3.7 10.3 ...
##  $ crp        : num [1:5782] 130 171 164 137 296 ...
##  $ gcs        : num [1:5782] 13 15 15 15 15 13 15 15 15 15 ...
##  $ age        : num [1:5782] 54 86 86 88 86 55 76 85 70 91 ...
##  $ female     : num [1:5782] 0 0 1 0 0 1 0 1 0 0 ...
##  $ comorbidity: num [1:5782] 2 2 1 2 0 1 1 1 2 2 ...
##  $ date       : POSIXct[1:5782], format: "2020-02-06 00:00:00" "2020-02-06 00:42:14" ...
##  $ set        : chr [1:5782] "dev" "dev" "dev" "dev" ...
##  $ mortality  : Factor w/ 2 levels "Alive","Dead": 1 2 2 2 1 2 2 1 2 1 ...

📊 Data distribution

numerical_vars <- c("age", "rr", "oxygen_sat", "gcs", "urea", "crp")
categorical_vars <- c("female", "comorbidity", "set")


boxplots <- lapply(numerical_vars, function(var) {
  ggplot(covid_data, aes(x = mortality, y = .data[[var]], fill = mortality)) +
    geom_boxplot(outlier.color = "red") +
    labs(title = paste("Boxplot of", var, "by Mortality")) +
    theme_minimal()
})
ggarrange(plotlist = boxplots, ncol = 2)
## $`1`

## 
## $`2`

## 
## $`3`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

📊 Density Plot

density_plots <- lapply(numerical_vars, function(var) {
  ggplot(covid_data, aes(x = .data[[var]], fill = mortality, color = mortality)) +
    geom_density(alpha = 0.5) +
    labs(title = paste("Density Plot of", var)) +
    theme_minimal()
})
ggarrange(plotlist = density_plots, ncol = 2)
## $`1`

## 
## $`2`

## 
## $`3`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

🔬 Normality Test: Shapiro-Wilk

normality_results <- map_dfr(numerical_vars, function(var) {
  train_vals <- na.omit(covid_data %>% filter(set == "dev") %>% pull(var))
  val_vals <- na.omit(covid_data %>% filter(set == "val") %>% pull(var))
  tibble(
    Variable = var,
    Shapiro_W_p_train = shapiro.test(train_vals)$p.value,
    Shapiro_W_stat_train = shapiro.test(train_vals)$statistic,
    Shapiro_W_p_val = shapiro.test(val_vals)$p.value,
    Shapiro_W_stat_val = shapiro.test(val_vals)$statistic
  )
})
kable(normality_results, digits = 4) %>% kable_styling()
Variable Shapiro_W_p_train Shapiro_W_stat_train Shapiro_W_p_val Shapiro_W_stat_val
age 0 0.9663 0 0.9652
rr 0 0.9734 0 0.9749
oxygen_sat 0 0.9708 0 0.9739
gcs 0 0.5469 0 0.5482
urea 0 0.8704 0 0.8846
crp 0 0.9135 0 0.9203

📉 Mann-Whitney U Test

mannwhitney_results <- map_df(numerical_vars, function(var) {
  res <- wilcox_test(covid_data, as.formula(paste(var, "~ mortality")))
  res %>% mutate(Variable = var) 
})
kable(mannwhitney_results, digits = 3) %>% kable_styling()
.y. group1 group2 n1 n2 statistic p Variable
age Alive Dead 3527 2255 2729391 0 age
rr Alive Dead 3527 2255 2874189 0 rr
oxygen_sat Alive Dead 3527 2255 4986468 0 oxygen_sat
gcs Alive Dead 3527 2255 4689462 0 gcs
urea Alive Dead 3527 2255 2672199 0 urea
crp Alive Dead 3527 2255 2868646 0 crp

📊 Chi-square Test with Stacked Bar Plot

cat_plots <- lapply(categorical_vars, function(var) {
  ggplot(covid_data, aes(x = .data[[var]], fill = mortality)) +
    geom_bar(position = "fill") +
    labs(title = paste("Stacked Bar Plot of", var, "by Mortality")) +
    ylab("Proportion") +
    theme_minimal()
})
ggarrange(plotlist = cat_plots, ncol = 2)
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"
comorbidity_test <-table(covid_data$comorbidity, covid_data$mortality)
chisq.test(comorbidity_test)
## 
##  Pearson's Chi-squared test
## 
## data:  comorbidity_test
## X-squared = 52.511, df = 2, p-value = 3.958e-12
female_test <-table(covid_data$female, covid_data$mortality)
chisq.test(female_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  female_test
## X-squared = 1.3282, df = 1, p-value = 0.2491

The data-set is composed by 6 numerical variables and 2 categorical variables (numbers of comorbidities has been treated as categorical for simplicity).

Shapiro-Wilk tests showed that none of the numerical variables is normally distributed.

Mann-Whitney U-test and Chi-Squared test showed that all the variables have a significant effect on mortality except sex.

🤖 Logistic Regression

train_data <- covid_data %>% filter(set == "dev")
test_data <- covid_data %>% filter(set == "val")

model <- glm(mortality ~ age + rr + oxygen_sat + gcs + urea + crp + female + comorbidity, 
             data = train_data, family = binomial)

model_table <- tidy(model) %>%
  mutate(across(where(is.numeric), ~signif(.x, 4)))
kable(model_table, digits = 4) %>% kable_styling()
term estimate std.error statistic p.value
(Intercept) 0.2427 0.7869 0.3085 0.7577
age 0.0463 0.0027 17.0300 0.0000
rr 0.0287 0.0047 6.1590 0.0000
oxygen_sat -0.0496 0.0073 -6.8000 0.0000
gcs -0.1126 0.0255 -4.4190 0.0000
urea 0.0355 0.0040 8.7720 0.0000
crp 0.0027 0.0004 6.4120 0.0000
female -0.1424 0.0791 -1.8000 0.0718
comorbidity 0.3176 0.0488 6.5020 0.0000

Logistic regression model results confirm what expected: sex is the only variable with no significant effect of mortality (however, p-value is very close to significance being p=0.07).

📈 ROC Curve and Confusion Matrix

thrreshold = 0.5

# Predict probabilities and classes
prob_test <- predict(model, newdata = test_data, type = "response")
pred_class <- ifelse(prob_test > 0.5, "Dead", "Alive") %>% factor(levels = c("Alive", "Dead"))

# ROC Curve and AUC
roc_curve <- roc(test_data$mortality, prob_test)
auc_value <- auc(roc_curve)
plot(roc_curve, main = "ROC Curve - Validation Set")

cat("AUC =", round(auc_value, 3), "\n")
## AUC = 0.775
# Confusion Matrix
conf_matrix <- confusionMatrix(pred_class, test_data$mortality, positive = "Dead")
print(conf_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Dead
##      Alive  1134  421
##      Dead    209  472
##                                           
##                Accuracy : 0.7182          
##                  95% CI : (0.6991, 0.7368)
##     No Information Rate : 0.6006          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.3884          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
##                                           
##             Sensitivity : 0.5286          
##             Specificity : 0.8444          
##          Pos Pred Value : 0.6931          
##          Neg Pred Value : 0.7293          
##              Prevalence : 0.3994          
##          Detection Rate : 0.2111          
##    Detection Prevalence : 0.3046          
##       Balanced Accuracy : 0.6865          
##                                           
##        'Positive' Class : Dead            
## 
cm_df <- as.data.frame(as.table(conf_matrix))

ggplot(cm_df, aes(Prediction, Reference, fill = Freq)) + 
  geom_tile() + 
  geom_text(aes(label = Freq), vjust = 1) + 
  scale_fill_gradient(low = "white", high = "blue") + 
  labs(x = "Predicted", y = "Actual", fill = "Frequency") +
  theme_minimal() + 
  ggtitle("Confusion Matrix Heatmap")

# Precision, Recall
precision_value <- precision(test_data$mortality, pred_class, positive = "Dead")
recall_value <- recall(test_data$mortality, pred_class, positive = "Dead")

cat("Precision: ", round(precision_value, 3), "\n")
## Precision:  0.844
cat("Recall: ", round(recall_value, 3), "\n")
## Recall:  0.729

threshold = 0.1

# Predict probabilities and classes
prob_test <- predict(model, newdata = test_data, type = "response")
pred_class <- ifelse(prob_test > 0.1, "Dead", "Alive") %>% factor(levels = c("Alive", "Dead"))

# Confusion Matrix
conf_matrix <- confusionMatrix(pred_class, test_data$mortality, positive = "Dead")
print(conf_matrix)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Alive Dead
##      Alive   157   10
##      Dead   1186  883
##                                          
##                Accuracy : 0.4651         
##                  95% CI : (0.4443, 0.486)
##     No Information Rate : 0.6006         
##     P-Value [Acc > NIR] : 1              
##                                          
##                   Kappa : 0.0866         
##                                          
##  Mcnemar's Test P-Value : <2e-16         
##                                          
##             Sensitivity : 0.9888         
##             Specificity : 0.1169         
##          Pos Pred Value : 0.4268         
##          Neg Pred Value : 0.9401         
##              Prevalence : 0.3994         
##          Detection Rate : 0.3949         
##    Detection Prevalence : 0.9253         
##       Balanced Accuracy : 0.5529         
##                                          
##        'Positive' Class : Dead           
## 
cm_df <- as.data.frame(as.table(conf_matrix))

ggplot(cm_df, aes(Prediction, Reference, fill = Freq)) + 
  geom_tile() + 
  geom_text(aes(label = Freq), vjust = 1) + 
  scale_fill_gradient(low = "white", high = "blue") + 
  labs(x = "Predicted", y = "Actual", fill = "Frequency") +
  theme_minimal() + 
  ggtitle("Confusion Matrix Heatmap")

# Precision, Recall
precision_value <- precision(test_data$mortality, pred_class, positive = "Dead")
recall_value <- recall(test_data$mortality, pred_class, positive = "Dead")

cat("Precision: ", round(precision_value, 3), "\n")
## Precision:  0.117
cat("Recall: ", round(recall_value, 3), "\n")
## Recall:  0.94

At the clinically relevant threshold of 0.10 (ICU admission risk cutoff), the model achieves:

In contrast, at a 0.50 threshold:

📊 Decision Curve Analysis

covid_data <- covid_data %>%
  mutate(
    pred_prob = predict(model, newdata = covid_data, type = "response")
  )

dca_result <- dca( mortality ~ pred_prob,
                  data = covid_data,
                  thresholds = seq(0, 0.50, by = 0.01),
                  label = list(pred_prob = "Prediction Model"))

plot(dca_result, smooth = TRUE, main = "Multivariable Decision Curve Analysis")

The decision curve shows that the prediction model provides higher net benefit than both “treat all” and “treat none” strategies across all threshold probabilities. This indicates that the model is clinically useful for guiding treatment decisions within this range, offering improved patient stratification and reducing unnecessary interventions.

📊 EVPI

bootstrap_evpi_nb <- function(data, outcome_var = "mortality", pred_probs, pt = 0.1, n_boot = 1000, seed = 42) {
  set.seed(seed)
  N <- nrow(data)
  odds_pt <- pt / (1 - pt)

  net_benefit <- function(pred, true) {
    TP <- sum(pred == 1 & true == 1)
    FP <- sum(pred == 1 & true == 0)
    NB <- (TP / N) - (FP / N) * odds_pt
    return(NB)
  }

  outcome <- data[[outcome_var]]
  pred_labels <- ifelse(pred_probs >= pt, 1, 0)
  NB_all <- net_benefit(rep(1, N), outcome)
  NB_none <- 0  # no one treated

  boot_max_nb <- numeric(n_boot)
  nb_model_boot <- numeric(n_boot)

  for (i in 1:n_boot) {
    idx <- sample(1:N, N, replace = TRUE)
    y_boot <- outcome[idx]
    p_boot <- pred_probs[idx]
    pred_boot <- ifelse(p_boot >= pt, 1, 0)

    nb_model <- net_benefit(pred_boot, y_boot)
    nb_all <- net_benefit(rep(1, N), y_boot)

    nb_model_boot[i] <- nb_model
    boot_max_nb[i] <- max(nb_model, nb_all, NB_none)
  }

  EVPI <- mean(boot_max_nb) - max(mean(nb_model_boot), NB_all, NB_none)

  return(list(
    EVPI = EVPI,
    mean_model_nb = mean(nb_model_boot),
    NB_all = NB_all,
    NB_none = NB_none,
    expected_max_nb = mean(boot_max_nb)
  ))
}

pred_probs <- predict(model, newdata = covid_data, type = "response")


result <- bootstrap_evpi_nb(data = covid_data, outcome_var = "mortality", pred_probs = pred_probs, pt = 0.1)

print(result$EVPI)
## [1] 0
evpi_dist <- result$expected_max_nb - pmax(result$mean_model_nb, result$NB_all, result$NB_none)
quantile(evpi_dist, c(0.025, 0.975))
##  2.5% 97.5% 
##     0     0

Discussion

Recommendation

While the model shows good discrimination and performs well overall, UK-specific development call for external validation in Dutch hospitals. Differences in admission practices, resource availability, and patient profiles may affect real-world performance.

Conclusion:

The model can support early ICU decision-making, but external validation is essential before deployment in the Netherlands to ensure safe, effective use.