library(tidyverse)
library(knitr)
library(kableExtra)
library(cowplot)
library(scales)
library(DT)
library(plotly)
library(lubridate)
library(gganimate)
library(DiagrammeR)



theme_set(theme_classic(base_size = 12) + 
              background_grid(color.major = "grey90", 
                              color.minor = "grey95", 
                              minor = "xy", major = "xy") +
              theme(legend.position = "none"))
draws <- read_csv("Data/Posterior_Draws_2020-03-24.csv") %>% 
    select(days, iter, cases_new = new_cases)

Introduction

In the previous notes we saw how to obtain posterior predictions for future numbers of new COVID-19 cases in Iceland. Here we will look at how to turn those predictions into predictions for the corresponding health-care burden, that is; hospital and ICU admissions.

Method

The figure below shows a summary of the prediction model for hospital and ICU admissions.

grViz("digraph flowchart {
      # node definitions with substituted label text
      node [fontname = Helvetica, shape = rectangle]        
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '@@3']
      tab4 [label = '@@4']
      tab5 [label = '@@5']

      # edge definitions with the node IDs
      tab1 -> tab2 -> tab3 -> tab4 -> tab5;
      }

      [1]: 'Obtain posterior predictions for daily diagnosed cases'
      [2]: 'Split those predictions into age-groups'
      [3]: 'Sample hospital admissions from casecounts within each age-group'
      [4]: 'Sample ICU admissions from hospital admissions within each age-group'
      [5]: 'Use guesses based on expert knowledge for how long patients are in the hospital or ICU'
      ")

1. Predictions for daily diagnosed cases

First we will need future predictions for daily diagnosed cases. We obtained them in the last notes by performing calculations with samples from the posterior distribution of parameters from our logistic model.

draws %>% 
    filter(iter == 1) %>% 
    mutate(date = ymd("2020-03-02") + days) %>% 
    select(-days) %>% 
    select(Date = date, "New diagnosed cases" = cases_new) %>% 
    kable(caption = "One of our 4000 posterior samples for diagnosed cases") %>% 
    kable_styling(bootstrap_options = c("striped", "hover")) %>% 
    scroll_box(height = "500px")
One of our 4000 posterior samples for diagnosed cases
Date New diagnosed cases
2020-03-02 3
2020-03-03 2
2020-03-04 12
2020-03-05 2
2020-03-06 5
2020-03-07 15
2020-03-08 7
2020-03-09 17
2020-03-10 14
2020-03-11 15
2020-03-12 30
2020-03-13 23
2020-03-14 30
2020-03-15 17
2020-03-16 25
2020-03-17 58
2020-03-18 49
2020-03-19 50
2020-03-20 43
2020-03-21 57
2020-03-22 25
2020-03-23 88
2020-03-24 82
2020-03-25 116
2020-03-26 66
2020-03-27 44
2020-03-28 91
2020-03-29 115
2020-03-30 140
2020-03-31 63
2020-04-01 75
2020-04-02 129
2020-04-03 189
2020-04-04 79
2020-04-05 66
2020-04-06 125
2020-04-07 101
2020-04-08 157
2020-04-09 157
2020-04-10 124
2020-04-11 144
2020-04-12 85
2020-04-13 63
2020-04-14 85
2020-04-15 75
2020-04-16 78
2020-04-17 98
2020-04-18 32
2020-04-19 29
2020-04-20 57
2020-04-21 72
2020-04-22 27
2020-04-23 35
2020-04-24 24
2020-04-25 29
2020-04-26 16
2020-04-27 25
2020-04-28 7
2020-04-29 6
2020-04-30 7
2020-05-01 16
2020-05-02 2
2020-05-03 9
2020-05-04 21
2020-05-05 3
2020-05-06 2
2020-05-07 6
2020-05-08 8
2020-05-09 1
2020-05-10 2
2020-05-11 1
2020-05-12 1
2020-05-13 2
2020-05-14 0
2020-05-15 3
2020-05-16 1
2020-05-17 4
2020-05-18 1
2020-05-19 0
2020-05-20 4
2020-05-21 1
2020-05-22 0
2020-05-23 0
2020-05-24 0
2020-05-25 1
2020-05-26 0
2020-05-27 0
2020-05-28 0
2020-05-29 0
2020-05-30 0
2020-05-31 0
2020-06-01 0

2. Age distribution of new cases

Then we need the age distribution of cases in Iceland. We can obtain this by taking the number of cases in each age category and dividing it by the total number of cases. Then we get the age distribution below

ages <- c("0 - 9", "10 - 19", "20 - 29", "30 - 39", "40 - 49", "50 - 59", "60 - 69", "70 - 79", "80+")
age_cases <- c(46, 149, 265, 254, 324, 264, 191, 52, 17)
age_dist <- age_cases / sum(age_cases)
hospital_dist <- c(0.001, 0.003, 0.012, 0.032, 0.049, 0.102, 0.166, 0.243, 0.273)
icu_dist <- c(0.05, 0.05, 0.05, 0.05, 0.06, 0.12, 0.27, 0.43, 0.709)
age_dat <- tibble(age = ages, 
                  age_dist = age_dist,
                  hospital_dist = hospital_dist,
                  icu_dist = icu_dist)
ggplot(age_dat, aes(age, age_dist, group = "none")) +
    geom_col() +
    background_grid(major = "none", minor = "none") +
    labs(title = "Age distribution of diagnosed cases in Iceland",
         subtitle = "Calculated simply as number in each group divided by total number") +
    theme(axis.title = element_blank())

3. Sampling future age distributions

Imagine that we have predicted 100 new cases on some day. Given the age distribution above we can split those cases randomly into age groups by sampling from the multinomial distribution. The multinomial distribution is just like the binomial, except for that there can be more than two outcomes. Instead of just a success and failure we have many outcomes, for example one outcome per age group.

samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
    set.seed(i)
    sampled_cases <- rmultinom(n = 1, size = 120, prob = age_dist)
    samples[, i] <- as.numeric(sampled_cases)
}
samples %>% 
    as_tibble(rownames = "age") %>% 
    pivot_longer(c(-age), names_to = "sample", values_to = "value") %>% 
    ggplot(aes(age, value)) +
    geom_col() +
    transition_states(sample, transition_length = 0.01, state_length = 0.02) +
    ease_aes() +
    labs(title = "Imagined future number of cases in each age group",
         subtitle = "Obtained by sampling from the multinomial distribution") +
    theme(axis.title = element_blank())

4. Imperial College estimates of Hospital and ICU admission rates in Wuhan

Next we want to use the age-grouped samples to predict hospital and ICU admissions. Luckily for us Imperial College London published on the \(16^{th}\) of March a report on the Impact of non-pharmaceutical interventions (NPIs) to reduce COVID-19 mortality and healthcare demand. NPIs are not of direct relevance to us, but Table 1 (shown below) is of interest.

knitr::include_graphics("Table1_ICL.png")

We can use these estimates to sample possible numbers of hospital and ICU admissions based on our age-grouped numbers of cases.

5. Sampling Hospital and ICU Admissions

5.1 Hospital Admissions

To continue with our example above, imagine that we’ve predicted 120 new cases on some day and split them into age-groups as below

sampled_age_cases <- rmultinom(n = 1, size = 120, prob = age_dist)

tibble(age = ages,
       age_cases = as.numeric(sampled_age_cases)) %>% 
    ggplot(aes(age, age_cases)) +
    geom_col() +
    labs(title = "Imagined future number of cases in each age group",
         subtitle = "Obtained by sampling from the multinomial distribution") +
    theme(axis.title = element_blank())

For each age-group we can then condition on the number of cases and use the binomial distribution to sample possible numbers of hospital admissions.

samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
        set.seed(i)
        sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
        samples[, i] <- as.numeric(sampled_cases)
    
}
samples %>% 
    as_tibble(rownames = "age") %>% 
    pivot_longer(c(-age), names_to = "sample", values_to = "value") %>% 
    ggplot(aes(age, value)) +
    geom_col() +
    transition_states(sample, transition_length = 0.01, state_length = 0.02) +
    ease_aes() +
    scale_y_continuous(breaks = 0:20) +
    labs(title = "Imagined future number of hospital admissions in each age-group",
         subtitle = "Obtained by sampling from one binomial distribution for each age-group") +
    theme(axis.title = element_blank())

Or we can look at the total number of hospital admissions, which is just the sum over all age-groups. What’s very convenient about this workflow is that we don’t need to exactly know the distribution of hospital admissions to sample from it. By performing all the necessary steps abive we can sample from the distribution without knowing exactly which distribution it is.

samples <- matrix(0, nrow = 9, ncol = 3000)
rownames(samples) <- ages
for (i in 1:3000) {
    sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
    samples[, i] <- as.numeric(sampled_cases)
    
}

total_admissions <- colSums(samples)
tibble(admissions = total_admissions) %>% 
    count(admissions) %>% 
    mutate(p = n / sum(n)) %>% 
    ggplot(aes(admissions, p)) +
    geom_area(alpha = 0.5) +
    geom_line() +
    scale_y_continuous(labels = percent) +
    scale_x_continuous(breaks = pretty_breaks(8)) +
    labs(x = "Total admissions", y = "Probability",
         title = "Distribution of daily total hospital admissions with 120 new cases and the age distribution above.")

5.2 ICU Admissions

To sample the ICU admissions we also use the binomial distribution, but now \(n\) is the number of hospital admissions and \(p\) is the ICU distribution from Imperial College.

sampled_hospital_cases <- numeric(9)
sampled_hospital_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)

samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
    sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
    samples[, i] <- as.numeric(sampled_cases)
    
}

samples %>% 
    as_tibble(rownames = "age") %>% 
    pivot_longer(c(-age), names_to = "sample", values_to = "value") %>% 
    ggplot(aes(age, value)) +
    geom_col() +
    transition_states(sample, transition_length = 0.01, state_length = 0.02) +
    ease_aes() +
    scale_y_continuous(breaks = 0:5) +
    labs(title = "Imagined future number of ICU admissions in each age-group",
         subtitle = "Obtained by sampling from one binomial distribution for each age-group") +
    theme(axis.title = element_blank())

Or for the total number of ICU admissions.

samples <- matrix(0, nrow = 9, ncol = 3000)
rownames(samples) <- ages
for (i in 1:3000) {
    sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_hospital_cases), prob = icu_dist)
    samples[, i] <- as.numeric(sampled_cases)
    
}

total_admissions <- colSums(samples)
tibble(admissions = total_admissions) %>% 
    count(admissions) %>% 
    mutate(p = n / sum(n)) %>% 
    ggplot(aes(admissions, p)) +
    geom_area(alpha = 0.5) +
    geom_line() +
    scale_y_continuous(labels = percent) +
    scale_x_continuous(breaks = 0:10) +
    labs(x = "Total ICU admissions", y = "Probability",
         title = "Distribution of daily total ICU admissions with 120 new cases and the age+hospital distributions above.")

6. Using educated guesses for length-of-stay

Now we have predictions for new hospital admissions, but we can’t let those patients stay in the hospital forever. This is were domain expertise in science becomes extremely valuable. We worked with scientists from the Directorate of Health, National University Hospital and Center for Public Health with considerable expertise. The came up with the following assumptions:

  • Time from infection to hospital admission: 7 days
  • Time from hospital admission to ICU admission: 3 days
  • Time from infection to becoming healthy: 21 days
  • Length of hospital stay: 14 days
  • Length of ICU stay: 10 days

Using these numbers and our predictions for daily new cases and daily admissions we can calculate everything we need:

  1. Cumulative cases and admissions: The cumulative sum of new cases. So if there are 1, 6 and 7 cases then the cumulative sum is 1, 7, 14 *(1, 1 + 6, 1 + 6 + 7).
  2. Recovered cases and hospital/ICU discharges: If people take 21 days to recover then the number of recoveries on day t is just the number of new cases on day t - 21. The same goes for hospital discharges.
  3. Active cases and hospital/ICU stays: Take the cumulative number of cases and subtract the cumulative number of cases 21 days ago (recovered cases). Then you will have an estimate of the currently active cases. The same goes for hospital/ICU stays.