KHC dengue

1

Get the data first, and adjust the columns into date format.

if(!file.exists("new_Dengue.csv")){
        download.file(url = "https://od.cdc.gov.tw/eic/Dengue_Daily_last12m_EN.csv",
          destfile="new_Dengue.csv",method='curl')}

library(dplyr)
data <- read.csv('new_Dengue.csv') %>% mutate(across(starts_with("Date"), ~as.Date(., format="%Y/%m/%d")))

2

Filter out the data from Kaohsiung City, and add a day column counting from the minimum date from KHC.

data %>% filter(County_living == "Kaohsiung City", Imported == "N") %>% 
        mutate( day = as.integer(Date_Onset-min(Date_Onset)) +1) %>% 
        select(Date_Onset, day, County_infected) %>% 
        mutate(KHC_infected = if_else(County_infected %in% c("Kaohsiung City","None"),T,F))-> KHC

We can see that data KHC is now a local KHC case per row, and column KHC_infected with information about if case had infected in KHC or not.

head(KHC)

##   Date_Onset day County_infected KHC_infected
## 1 2023-06-21   1     Tainan City        FALSE
## 2 2023-06-28   8     Tainan City        FALSE
## 3 2023-06-30  10     Tainan City        FALSE
## 4 2023-06-30  10     Tainan City        FALSE
## 5 2023-06-30  10     Tainan City        FALSE
## 6 2023-07-01  11     Tainan City        FALSE

3

break the KHC data into 2 datasets, local infected cases(from KHC) and imported cases(from other Taiwan cities).

KHC %>% filter(KHC_infected == T) %>% select(-KHC_infected) %>% group_by(day) %>%
        summarise(count = n()) %>% rename(local_count = count)-> KHC_local_count


KHC %>% filter(KHC_infected == F) %>% select(-KHC_infected) %>% group_by(day) %>%
        summarise(count = n())%>% rename(imported_count = count)-> KHC_imported_count

and aggregate the data for further analysis, and also check the cases for KHC.

head(KHC_imported_count)

## # A tibble: 6 × 2
##     day imported_count
##   <dbl>          <int>
## 1     1              1
## 2     8              1
## 3    10              3
## 4    11              1
## 5    13              1
## 6    14              2

c(colSums(KHC_local_count)[2],colSums(KHC_imported_count)[2])

##    local_count imported_count 
##           2959            306

Merge it into a new dataset`KHC_2.

day_range <- min(KHC_local_count,KHC_imported_count):max(KHC_local_count,KHC_imported_count)
date_range <- as.Date(min(KHC$Date_Onset):max(KHC$Date_Onset))

library(tidyr)

expand.grid(day = day_range) %>%
        left_join(KHC_local_count, by = join_by(day)) %>%
        left_join(KHC_imported_count, by = join_by(day)) %>%
        replace_na(list(local_count = 0, imported_count = 0)) %>%
        mutate(date = date_range, total_count = local_count + imported_count) -> KHC_2

head(KHC_2)

##   day local_count imported_count       date total_count
## 1   1           0              1 2023-06-21           1
## 2   2           0              0 2023-06-22           0
## 3   3           0              0 2023-06-23           0
## 4   4           0              0 2023-06-24           0
## 5   5           0              0 2023-06-25           0
## 6   6           0              0 2023-06-26           0

4

Use 7 days as a split, and add a new column Rt index.

# day <- 8
# (day - 1) %/% Rt_step + 1
# %/% integral division operator

Rt_step = 7
KHC_2 %>%
  mutate(Rt_index = ((day - 1) %/% Rt_step) + 1) -> KHC_2

5

Preparation for the stan code.

# generation time par from the paper
gt_shape = 3.44
gt_scale = 5.12

data_list <-list(N = nrow(KHC_2),
                local_n = KHC_2$local_count,
            imported_n = KHC_2$imported_count,
                total_n = KHC_2$total_count,
                Rt_index = KHC_2$Rt_index,
                gt_shape = gt_shape, gt_scale = gt_scale)

6

This is the stan code chunk, note that generated quantities is not used in this case.

data {
      int<lower=1> N;
      array[N] int<lower=0> local_n;
      array[N] int<lower=0> imported_n;
      array[N] int<lower=0> total_n;
      array[N] int<lower=1> Rt_index;
      real<lower=0> gt_shape;
      real<lower=0> gt_scale;
      
}transformed data {
              int M = max(Rt_index);
              vector[N-1] gt;
            {vector[N] gt_cdf;
             for (t in 1:N)
                         gt_cdf[t] = exp(gamma_lcdf(t | gt_shape, 1.0 / gt_scale));
                         gt = tail(gt_cdf, N-1) - head(gt_cdf, N-1);}
             vector[N-1] gt_rev = reverse(gt);
             
                 //vector<lower = 0>[N] total_n = to_vector(local_n) + to_vector(imported_n);
                 
                 vector<lower=0>[N] conv;
              conv[1] = 0.0;
              conv[2] = (total_n[1]) * gt[1];
           for (t in 3:N)
                        conv[t] = dot_product(head(to_vector(total_n), t-1), tail(gt_rev, t-1));
                        
}parameters {
        vector[M] logRt;
        real<lower=0> phi;
        
}transformed parameters {
                vector[M] Rt = exp(logRt);
}model {
        // priors
                  logRt ~ normal(1, 2);
                  phi ~ cauchy(0, 5);
                  // local_n ~ neg_binomial(conv[N],phi);
                  

        //likelihood
                  // target += neg_binomial_2_lpmf(n | Rt[Rt_index] .* conv);
                  for (t in 2:N)
                    if (local_n[t] > 0)
                      target += neg_binomial_2_lpmf(local_n[t] | Rt[Rt_index[t]] * conv[t], phi);
}generated quantities {

}

7

Modeling from stan.

library(rstan)
fit = sampling(KHC_noise, data = data_list)

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000161 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.61 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 2.325 seconds (Warm-up)
## Chain 1:                1.139 seconds (Sampling)
## Chain 1:                3.464 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000115 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.15 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 1.995 seconds (Warm-up)
## Chain 2:                1.136 seconds (Sampling)
## Chain 2:                3.131 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 9e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.9 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 1.866 seconds (Warm-up)
## Chain 3:                0.951 seconds (Sampling)
## Chain 3:                2.817 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 8.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.87 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 2.059 seconds (Warm-up)
## Chain 4:                1.066 seconds (Sampling)
## Chain 4:                3.125 seconds (Total)
## Chain 4:

8

Finally plot it, and we can see Rt may not be stable during the head and the end of the endemic.

Rt_summary <- summary(fit, probs=c(0.025, 0.5, 0.975), pars=c('Rt'))
Rt_by_step <- as.data.frame(Rt_summary$summary) %>%
  mutate(Rt_index = 1:n()) %>%
  select(mean, `2.5%`, `50%`, `97.5%`, Rt_index) %>%
  mutate(Rt_index = as.factor(Rt_index))

summary(fit, probs=c(0.025, 0.5, 0.975), pars=c('Rt'))$summary %>%
        as.data.frame() %>% 
        mutate(Rt_index = 1:n()) %>%
        select(mean, `2.5%`, `50%`, `97.5%`,Rt_index) %>%
        mutate(Rt_index = as.factor(Rt_index)) -> Rt_by_step

# date with Rt_index
KHC_2 %>% select(Rt_index, date) -> date_index

# daily count with infected or not, Rt_index, Rt by step, and Confidence Interval
KHC %>% group_by(Date_Onset, KHC_infected) %>% 
        summarise(count = n()) %>% 
        rename(date = Date_Onset) %>%
        left_join(date_index, by = join_by(date)) %>%
        mutate(Rt_index = as.factor(Rt_index)) %>%
        left_join(Rt_by_step, by = join_by(Rt_index)) -> plot_data

make a simple barplot first!

library(ggplot2)
plot_data$KHC_infected <- factor(ifelse(plot_data$KHC_infected==T,"local", "imported"),
                           levels = c("local","imported"))

plot_data %>% ggplot(aes(x = date, y = count, fill = KHC_infected, alpha = 0.5)) + 
        geom_col(width = 1) +
        scale_fill_manual(values = c("steelblue", "pink"), name = "KHC cases")+
        scale_x_date(date_labels = "%b %d", date_breaks = "3 weeks",expand = c(0, 0))+
        guides(alpha = F)+theme_minimal()

plot_data %>% 
  ggplot(aes(x = date)) +
        geom_col(aes(y = count , fill = KHC_infected), alpha = 0.5, width = 1) +
        scale_fill_manual(values = c("steelblue", "pink"), name = "KHC cases")+
        scale_y_continuous(name = "New cases",expand = c(0, 0), 
         sec.axis = sec_axis(~ .x /14, name = "Effective Reproduction Number (Rt)"))+ 
        coord_cartesian(ylim = c(0, 65))+
        labs(x = "Date") + 
        geom_step(aes(y = mean*14)) +
        geom_ribbon(aes(ymin = `2.5%`*14, ymax = `97.5%`*14), alpha = 0.3, fill = "lightgray") +    
        scale_x_date(date_labels = "%b %d", date_breaks = "3 weeks", expand = c(0, 0)) +
    theme_minimal() +  guides(alpha = F)

KHC dengue

Gillian Cheng

2024-03-09

1

2

3

4

5

6

7

8