Get the data first, and adjust the columns into date format.
if(!file.exists("new_Dengue.csv")){
download.file(url = "https://od.cdc.gov.tw/eic/Dengue_Daily_last12m_EN.csv",
destfile="new_Dengue.csv",method='curl')}
library(dplyr)
data <- read.csv('new_Dengue.csv') %>% mutate(across(starts_with("Date"), ~as.Date(., format="%Y/%m/%d")))
Filter out the data from Kaohsiung City, and add a day
column counting from the minimum date from KHC.
data %>% filter(County_living == "Kaohsiung City", Imported == "N") %>%
mutate( day = as.integer(Date_Onset-min(Date_Onset)) +1) %>%
select(Date_Onset, day, County_infected) %>%
mutate(KHC_infected = if_else(County_infected %in% c("Kaohsiung City","None"),T,F))-> KHC
We can see that data KHC is now a local KHC case per
row, and column KHC_infected with information about if case
had infected in KHC or not.
head(KHC)
## Date_Onset day County_infected KHC_infected
## 1 2023-06-21 1 Tainan City FALSE
## 2 2023-06-28 8 Tainan City FALSE
## 3 2023-06-30 10 Tainan City FALSE
## 4 2023-06-30 10 Tainan City FALSE
## 5 2023-06-30 10 Tainan City FALSE
## 6 2023-07-01 11 Tainan City FALSE
break the KHC data into 2 datasets, local infected cases(from KHC) and imported cases(from other Taiwan cities).
KHC %>% filter(KHC_infected == T) %>% select(-KHC_infected) %>% group_by(day) %>%
summarise(count = n()) %>% rename(local_count = count)-> KHC_local_count
KHC %>% filter(KHC_infected == F) %>% select(-KHC_infected) %>% group_by(day) %>%
summarise(count = n())%>% rename(imported_count = count)-> KHC_imported_count
and aggregate the data for further analysis, and also check the cases for KHC.
head(KHC_imported_count)
## # A tibble: 6 × 2
## day imported_count
## <dbl> <int>
## 1 1 1
## 2 8 1
## 3 10 3
## 4 11 1
## 5 13 1
## 6 14 2
c(colSums(KHC_local_count)[2],colSums(KHC_imported_count)[2])
## local_count imported_count
## 2959 306
Merge it into a new dataset`KHC_2.
day_range <- min(KHC_local_count,KHC_imported_count):max(KHC_local_count,KHC_imported_count)
date_range <- as.Date(min(KHC$Date_Onset):max(KHC$Date_Onset))
library(tidyr)
expand.grid(day = day_range) %>%
left_join(KHC_local_count, by = join_by(day)) %>%
left_join(KHC_imported_count, by = join_by(day)) %>%
replace_na(list(local_count = 0, imported_count = 0)) %>%
mutate(date = date_range, total_count = local_count + imported_count) -> KHC_2
head(KHC_2)
## day local_count imported_count date total_count
## 1 1 0 1 2023-06-21 1
## 2 2 0 0 2023-06-22 0
## 3 3 0 0 2023-06-23 0
## 4 4 0 0 2023-06-24 0
## 5 5 0 0 2023-06-25 0
## 6 6 0 0 2023-06-26 0
Use 7 days as a split, and add a new column
Rt index.
# day <- 8
# (day - 1) %/% Rt_step + 1
# %/% integral division operator
Rt_step = 7
KHC_2 %>%
mutate(Rt_index = ((day - 1) %/% Rt_step) + 1) -> KHC_2
Preparation for the stan code.
# generation time par from the paper
gt_shape = 3.44
gt_scale = 5.12
data_list <-list(N = nrow(KHC_2),
local_n = KHC_2$local_count,
imported_n = KHC_2$imported_count,
total_n = KHC_2$total_count,
Rt_index = KHC_2$Rt_index,
gt_shape = gt_shape, gt_scale = gt_scale)
This is the stan code chunk, note that generated quantities is not used in this case.
data {
int<lower=1> N;
array[N] int<lower=0> local_n;
array[N] int<lower=0> imported_n;
array[N] int<lower=0> total_n;
array[N] int<lower=1> Rt_index;
real<lower=0> gt_shape;
real<lower=0> gt_scale;
}transformed data {
int M = max(Rt_index);
vector[N-1] gt;
{vector[N] gt_cdf;
for (t in 1:N)
gt_cdf[t] = exp(gamma_lcdf(t | gt_shape, 1.0 / gt_scale));
gt = tail(gt_cdf, N-1) - head(gt_cdf, N-1);}
vector[N-1] gt_rev = reverse(gt);
//vector<lower = 0>[N] total_n = to_vector(local_n) + to_vector(imported_n);
vector<lower=0>[N] conv;
conv[1] = 0.0;
conv[2] = (total_n[1]) * gt[1];
for (t in 3:N)
conv[t] = dot_product(head(to_vector(total_n), t-1), tail(gt_rev, t-1));
}parameters {
vector[M] logRt;
real<lower=0> phi;
}transformed parameters {
vector[M] Rt = exp(logRt);
}model {
// priors
logRt ~ normal(1, 2);
phi ~ cauchy(0, 5);
// local_n ~ neg_binomial(conv[N],phi);
//likelihood
// target += neg_binomial_2_lpmf(n | Rt[Rt_index] .* conv);
for (t in 2:N)
if (local_n[t] > 0)
target += neg_binomial_2_lpmf(local_n[t] | Rt[Rt_index[t]] * conv[t], phi);
}generated quantities {
}
Modeling from stan.
library(rstan)
fit = sampling(KHC_noise, data = data_list)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000161 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.61 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 2.325 seconds (Warm-up)
## Chain 1: 1.139 seconds (Sampling)
## Chain 1: 3.464 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000115 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.15 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 1.995 seconds (Warm-up)
## Chain 2: 1.136 seconds (Sampling)
## Chain 2: 3.131 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 9e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.9 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 1.866 seconds (Warm-up)
## Chain 3: 0.951 seconds (Sampling)
## Chain 3: 2.817 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 8.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.87 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
## Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
## Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
## Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
## Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 2.059 seconds (Warm-up)
## Chain 4: 1.066 seconds (Sampling)
## Chain 4: 3.125 seconds (Total)
## Chain 4:
Finally plot it, and we can see Rt may not be stable during the head and the end of the endemic.
Rt_summary <- summary(fit, probs=c(0.025, 0.5, 0.975), pars=c('Rt'))
Rt_by_step <- as.data.frame(Rt_summary$summary) %>%
mutate(Rt_index = 1:n()) %>%
select(mean, `2.5%`, `50%`, `97.5%`, Rt_index) %>%
mutate(Rt_index = as.factor(Rt_index))
summary(fit, probs=c(0.025, 0.5, 0.975), pars=c('Rt'))$summary %>%
as.data.frame() %>%
mutate(Rt_index = 1:n()) %>%
select(mean, `2.5%`, `50%`, `97.5%`,Rt_index) %>%
mutate(Rt_index = as.factor(Rt_index)) -> Rt_by_step
# date with Rt_index
KHC_2 %>% select(Rt_index, date) -> date_index
# daily count with infected or not, Rt_index, Rt by step, and Confidence Interval
KHC %>% group_by(Date_Onset, KHC_infected) %>%
summarise(count = n()) %>%
rename(date = Date_Onset) %>%
left_join(date_index, by = join_by(date)) %>%
mutate(Rt_index = as.factor(Rt_index)) %>%
left_join(Rt_by_step, by = join_by(Rt_index)) -> plot_data
make a simple barplot first!
library(ggplot2)
plot_data$KHC_infected <- factor(ifelse(plot_data$KHC_infected==T,"local", "imported"),
levels = c("local","imported"))
plot_data %>% ggplot(aes(x = date, y = count, fill = KHC_infected, alpha = 0.5)) +
geom_col(width = 1) +
scale_fill_manual(values = c("steelblue", "pink"), name = "KHC cases")+
scale_x_date(date_labels = "%b %d", date_breaks = "3 weeks",expand = c(0, 0))+
guides(alpha = F)+theme_minimal()
plot_data %>%
ggplot(aes(x = date)) +
geom_col(aes(y = count , fill = KHC_infected), alpha = 0.5, width = 1) +
scale_fill_manual(values = c("steelblue", "pink"), name = "KHC cases")+
scale_y_continuous(name = "New cases",expand = c(0, 0),
sec.axis = sec_axis(~ .x /14, name = "Effective Reproduction Number (Rt)"))+
coord_cartesian(ylim = c(0, 65))+
labs(x = "Date") +
geom_step(aes(y = mean*14)) +
geom_ribbon(aes(ymin = `2.5%`*14, ymax = `97.5%`*14), alpha = 0.3, fill = "lightgray") +
scale_x_date(date_labels = "%b %d", date_breaks = "3 weeks", expand = c(0, 0)) +
theme_minimal() + guides(alpha = F)