library(tidyverse)
library(knitr)
library(kableExtra)
library(cowplot)
library(scales)
library(DT)
library(plotly)
library(lubridate)
library(gganimate)
library(DiagrammeR)
theme_set(theme_classic(base_size = 12) +
background_grid(color.major = "grey90",
color.minor = "grey95",
minor = "xy", major = "xy") +
theme(legend.position = "none"))
draws <- read_csv("Data/Posterior_Draws_2020-03-24.csv") %>%
select(days, iter, cases_new = new_cases)
In the previous notes we saw how to obtain posterior predictions for future numbers of new COVID-19 cases in Iceland. Here we will look at how to turn those predictions into predictions for the corresponding health-care burden, that is; hospital and ICU admissions.
The figure below shows a summary of the prediction model for hospital and ICU admissions.
grViz("digraph flowchart {
# node definitions with substituted label text
node [fontname = Helvetica, shape = rectangle]
tab1 [label = '@@1']
tab2 [label = '@@2']
tab3 [label = '@@3']
tab4 [label = '@@4']
tab5 [label = '@@5']
# edge definitions with the node IDs
tab1 -> tab2 -> tab3 -> tab4 -> tab5;
}
[1]: 'Obtain posterior predictions for daily diagnosed cases'
[2]: 'Split those predictions into age-groups'
[3]: 'Sample hospital admissions from casecounts within each age-group'
[4]: 'Sample ICU admissions from hospital admissions within each age-group'
[5]: 'Use guesses based on expert knowledge for how long patients are in the hospital or ICU'
")
First we will need future predictions for daily diagnosed cases. We obtained them in the last notes by performing calculations with samples from the posterior distribution of parameters from our logistic model.
draws %>%
filter(iter == 1) %>%
mutate(date = ymd("2020-03-02") + days) %>%
select(-days) %>%
select(Date = date, "New diagnosed cases" = cases_new) %>%
kable(caption = "One of our 4000 posterior samples for diagnosed cases") %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
scroll_box(height = "500px")
| Date | New diagnosed cases |
|---|---|
| 2020-03-02 | 3 |
| 2020-03-03 | 2 |
| 2020-03-04 | 12 |
| 2020-03-05 | 2 |
| 2020-03-06 | 5 |
| 2020-03-07 | 15 |
| 2020-03-08 | 7 |
| 2020-03-09 | 17 |
| 2020-03-10 | 14 |
| 2020-03-11 | 15 |
| 2020-03-12 | 30 |
| 2020-03-13 | 23 |
| 2020-03-14 | 30 |
| 2020-03-15 | 17 |
| 2020-03-16 | 25 |
| 2020-03-17 | 58 |
| 2020-03-18 | 49 |
| 2020-03-19 | 50 |
| 2020-03-20 | 43 |
| 2020-03-21 | 57 |
| 2020-03-22 | 25 |
| 2020-03-23 | 88 |
| 2020-03-24 | 82 |
| 2020-03-25 | 116 |
| 2020-03-26 | 66 |
| 2020-03-27 | 44 |
| 2020-03-28 | 91 |
| 2020-03-29 | 115 |
| 2020-03-30 | 140 |
| 2020-03-31 | 63 |
| 2020-04-01 | 75 |
| 2020-04-02 | 129 |
| 2020-04-03 | 189 |
| 2020-04-04 | 79 |
| 2020-04-05 | 66 |
| 2020-04-06 | 125 |
| 2020-04-07 | 101 |
| 2020-04-08 | 157 |
| 2020-04-09 | 157 |
| 2020-04-10 | 124 |
| 2020-04-11 | 144 |
| 2020-04-12 | 85 |
| 2020-04-13 | 63 |
| 2020-04-14 | 85 |
| 2020-04-15 | 75 |
| 2020-04-16 | 78 |
| 2020-04-17 | 98 |
| 2020-04-18 | 32 |
| 2020-04-19 | 29 |
| 2020-04-20 | 57 |
| 2020-04-21 | 72 |
| 2020-04-22 | 27 |
| 2020-04-23 | 35 |
| 2020-04-24 | 24 |
| 2020-04-25 | 29 |
| 2020-04-26 | 16 |
| 2020-04-27 | 25 |
| 2020-04-28 | 7 |
| 2020-04-29 | 6 |
| 2020-04-30 | 7 |
| 2020-05-01 | 16 |
| 2020-05-02 | 2 |
| 2020-05-03 | 9 |
| 2020-05-04 | 21 |
| 2020-05-05 | 3 |
| 2020-05-06 | 2 |
| 2020-05-07 | 6 |
| 2020-05-08 | 8 |
| 2020-05-09 | 1 |
| 2020-05-10 | 2 |
| 2020-05-11 | 1 |
| 2020-05-12 | 1 |
| 2020-05-13 | 2 |
| 2020-05-14 | 0 |
| 2020-05-15 | 3 |
| 2020-05-16 | 1 |
| 2020-05-17 | 4 |
| 2020-05-18 | 1 |
| 2020-05-19 | 0 |
| 2020-05-20 | 4 |
| 2020-05-21 | 1 |
| 2020-05-22 | 0 |
| 2020-05-23 | 0 |
| 2020-05-24 | 0 |
| 2020-05-25 | 1 |
| 2020-05-26 | 0 |
| 2020-05-27 | 0 |
| 2020-05-28 | 0 |
| 2020-05-29 | 0 |
| 2020-05-30 | 0 |
| 2020-05-31 | 0 |
| 2020-06-01 | 0 |
Then we need the age distribution of cases in Iceland. We can obtain this by taking the number of cases in each age category and dividing it by the total number of cases. Then we get the age distribution below
ages <- c("0 - 9", "10 - 19", "20 - 29", "30 - 39", "40 - 49", "50 - 59", "60 - 69", "70 - 79", "80+")
age_cases <- c(46, 149, 265, 254, 324, 264, 191, 52, 17)
age_dist <- age_cases / sum(age_cases)
hospital_dist <- c(0.001, 0.003, 0.012, 0.032, 0.049, 0.102, 0.166, 0.243, 0.273)
icu_dist <- c(0.05, 0.05, 0.05, 0.05, 0.06, 0.12, 0.27, 0.43, 0.709)
age_dat <- tibble(age = ages,
age_dist = age_dist,
hospital_dist = hospital_dist,
icu_dist = icu_dist)
ggplot(age_dat, aes(age, age_dist, group = "none")) +
geom_col() +
background_grid(major = "none", minor = "none") +
labs(title = "Age distribution of diagnosed cases in Iceland",
subtitle = "Calculated simply as number in each group divided by total number") +
theme(axis.title = element_blank())
Imagine that we have predicted 100 new cases on some day. Given the age distribution above we can split those cases randomly into age groups by sampling from the multinomial distribution. The multinomial distribution is just like the binomial, except for that there can be more than two outcomes. Instead of just a success and failure we have many outcomes, for example one outcome per age group.
samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
set.seed(i)
sampled_cases <- rmultinom(n = 1, size = 120, prob = age_dist)
samples[, i] <- as.numeric(sampled_cases)
}
samples %>%
as_tibble(rownames = "age") %>%
pivot_longer(c(-age), names_to = "sample", values_to = "value") %>%
ggplot(aes(age, value)) +
geom_col() +
transition_states(sample, transition_length = 0.01, state_length = 0.02) +
ease_aes() +
labs(title = "Imagined future number of cases in each age group",
subtitle = "Obtained by sampling from the multinomial distribution") +
theme(axis.title = element_blank())
Next we want to use the age-grouped samples to predict hospital and ICU admissions. Luckily for us Imperial College London published on the \(16^{th}\) of March a report on the Impact of non-pharmaceutical interventions (NPIs) to reduce COVID-19 mortality and healthcare demand. NPIs are not of direct relevance to us, but Table 1 (shown below) is of interest.
knitr::include_graphics("Table1_ICL.png")
We can use these estimates to sample possible numbers of hospital and ICU admissions based on our age-grouped numbers of cases.
To continue with our example above, imagine that we’ve predicted 120 new cases on some day and split them into age-groups as below
sampled_age_cases <- rmultinom(n = 1, size = 120, prob = age_dist)
tibble(age = ages,
age_cases = as.numeric(sampled_age_cases)) %>%
ggplot(aes(age, age_cases)) +
geom_col() +
labs(title = "Imagined future number of cases in each age group",
subtitle = "Obtained by sampling from the multinomial distribution") +
theme(axis.title = element_blank())
For each age-group we can then condition on the number of cases and use the binomial distribution to sample possible numbers of hospital admissions.
samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
set.seed(i)
sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
samples[, i] <- as.numeric(sampled_cases)
}
samples %>%
as_tibble(rownames = "age") %>%
pivot_longer(c(-age), names_to = "sample", values_to = "value") %>%
ggplot(aes(age, value)) +
geom_col() +
transition_states(sample, transition_length = 0.01, state_length = 0.02) +
ease_aes() +
scale_y_continuous(breaks = 0:20) +
labs(title = "Imagined future number of hospital admissions in each age-group",
subtitle = "Obtained by sampling from one binomial distribution for each age-group") +
theme(axis.title = element_blank())
Or we can look at the total number of hospital admissions, which is just the sum over all age-groups. What’s very convenient about this workflow is that we don’t need to exactly know the distribution of hospital admissions to sample from it. By performing all the necessary steps abive we can sample from the distribution without knowing exactly which distribution it is.
samples <- matrix(0, nrow = 9, ncol = 3000)
rownames(samples) <- ages
for (i in 1:3000) {
sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
samples[, i] <- as.numeric(sampled_cases)
}
total_admissions <- colSums(samples)
tibble(admissions = total_admissions) %>%
count(admissions) %>%
mutate(p = n / sum(n)) %>%
ggplot(aes(admissions, p)) +
geom_area(alpha = 0.5) +
geom_line() +
scale_y_continuous(labels = percent) +
scale_x_continuous(breaks = pretty_breaks(8)) +
labs(x = "Total admissions", y = "Probability",
title = "Distribution of daily total hospital admissions with 120 new cases and the age distribution above.")
To sample the ICU admissions we also use the binomial distribution, but now \(n\) is the number of hospital admissions and \(p\) is the ICU distribution from Imperial College.
sampled_hospital_cases <- numeric(9)
sampled_hospital_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
samples <- matrix(0, nrow = 9, ncol = 10)
rownames(samples) <- ages
for (i in 1:10) {
sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_age_cases), prob = hospital_dist)
samples[, i] <- as.numeric(sampled_cases)
}
samples %>%
as_tibble(rownames = "age") %>%
pivot_longer(c(-age), names_to = "sample", values_to = "value") %>%
ggplot(aes(age, value)) +
geom_col() +
transition_states(sample, transition_length = 0.01, state_length = 0.02) +
ease_aes() +
scale_y_continuous(breaks = 0:5) +
labs(title = "Imagined future number of ICU admissions in each age-group",
subtitle = "Obtained by sampling from one binomial distribution for each age-group") +
theme(axis.title = element_blank())
Or for the total number of ICU admissions.
samples <- matrix(0, nrow = 9, ncol = 3000)
rownames(samples) <- ages
for (i in 1:3000) {
sampled_cases <- rbinom(n = 9, size = as.numeric(sampled_hospital_cases), prob = icu_dist)
samples[, i] <- as.numeric(sampled_cases)
}
total_admissions <- colSums(samples)
tibble(admissions = total_admissions) %>%
count(admissions) %>%
mutate(p = n / sum(n)) %>%
ggplot(aes(admissions, p)) +
geom_area(alpha = 0.5) +
geom_line() +
scale_y_continuous(labels = percent) +
scale_x_continuous(breaks = 0:10) +
labs(x = "Total ICU admissions", y = "Probability",
title = "Distribution of daily total ICU admissions with 120 new cases and the age+hospital distributions above.")
Now we have predictions for new hospital admissions, but we can’t let those patients stay in the hospital forever. This is were domain expertise in science becomes extremely valuable. We worked with scientists from the Directorate of Health, National University Hospital and Center for Public Health with considerable expertise. The came up with the following assumptions:
Using these numbers and our predictions for daily new cases and daily admissions we can calculate everything we need: