suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(incidence))
suppressPackageStartupMessages(library(projections))
suppressPackageStartupMessages(library(earlyR))
suppressPackageStartupMessages(library(EpiCurve))
## Warning: package 'scales' was built under R version 4.0.2
suppressPackageStartupMessages(library(EpiModel))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(gridExtra))
library(dplyr)
dn = read_excel("~/Dropbox/Bao chi/Coronavirus articles/Da Nang Data/Du lieu coronavirus DaNang cleaned.xlsx")
dn$Date = as.Date(dn$DateAnnounced, "%d/%m/%Y")
dn$YoB = as.numeric(as.character(dn$YoB))
dn$Age = 2020-dn$YoB
today = as.Date("2020-08-22")
i = incidence(dn$Date, last_date = today)
plot(i,border="white", color="blue") + labs(x="Date", y="Number of confirmed cases")
dn %>% group_by(Date) %>% summarise(Cases = n()) %>% ggplot(aes(x=Date, y=Cases)) + geom_bar(stat="identity", col="white", fill="blue")
## `summarise()` ungrouping output (override with `.groups` argument)
For estimating R, we need estimates of the mean and standard deviation of the serial interval, i.e. the delay between primary and secondary symptom onset dates. This data are derived from Ann Int Med
# https://annals.org/aim/fullarticle/2762808/incubation-period-coro# navirus-disease-2019-covid-19-from-publicly-reported
mu = 5.1 # mean in days days
sigma = 2.32 # standard deviation in days
# Estimating the most likely value of R0
res = get_R(i, si_mean = mu, si_sd = sigma)
res
##
## /// Early estimate of reproduction number (R) //
## // class: earlyR, list
##
## // Maximum-Likelihood estimate of R ($R_ml):
## [1] 1.041041
##
##
## // $lambda:
## 0.1348761 0.4594212 2.05651 3.310493 4.601753 5.674742...
##
## // $dates:
## [1] "2020-07-26" "2020-07-27" "2020-07-28" "2020-07-29" "2020-07-30"
## [6] "2020-07-31"
## ...
##
## // $si (serial interval):
## A discrete distribution
## name: gamma
## parameters:
## shape: 4.83241676575505
## scale: 1.05537254901961
plot(res, xlim=c(0, 4))
# Possible values of R0
# Based on this figure and on the estimated R, we can wonder if new cases will be seen in the near future. How likely is this?
R_val = sample_R(res, 1000)
summary(R_val)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8809 1.0110 1.0410 1.0459 1.0811 1.3013
quantile(R_val, c(0.025, 0.975))
## 2.5% 97.5%
## 0.9507007 1.1511512
ggplot() + geom_bar(aes(x=R_val), col="white", fill="blue") + labs(x="Reproduction Ratio", y="Frequency")
onset = dn$Date
plot(res, "lambdas", scale = length(onset) + 1)
future_i = project(i, R=R_val, n_sim=1000, si=res$si, n_days=60)
future_i
##
## /// Incidence projections //
##
## // class: projections, matrix
## // 60 dates (rows); 1,000 simulations (columns)
##
## // first rows/columns:
## [,1] [,2] [,3] [,4] [,5] [,6]
## 2020-08-23 6 11 7 8 2 8
## 2020-08-24 5 4 6 10 6 5
## 2020-08-25 5 7 9 7 3 6
## 2020-08-26 3 5 8 3 3 11
## .
## .
## .
##
## // dates:
## [1] "2020-08-23" "2020-08-24" "2020-08-25" "2020-08-26" "2020-08-27"
## [6] "2020-08-28" "2020-08-29" "2020-08-30" "2020-08-31" "2020-09-01"
## [11] "2020-09-02" "2020-09-03" "2020-09-04" "2020-09-05" "2020-09-06"
## [16] "2020-09-07" "2020-09-08" "2020-09-09" "2020-09-10" "2020-09-11"
## [21] "2020-09-12" "2020-09-13" "2020-09-14" "2020-09-15" "2020-09-16"
## [26] "2020-09-17" "2020-09-18" "2020-09-19" "2020-09-20" "2020-09-21"
## [31] "2020-09-22" "2020-09-23" "2020-09-24" "2020-09-25" "2020-09-26"
## [36] "2020-09-27" "2020-09-28" "2020-09-29" "2020-09-30" "2020-10-01"
## [41] "2020-10-02" "2020-10-03" "2020-10-04" "2020-10-05" "2020-10-06"
## [46] "2020-10-07" "2020-10-08" "2020-10-09" "2020-10-10" "2020-10-11"
## [51] "2020-10-12" "2020-10-13" "2020-10-14" "2020-10-15" "2020-10-16"
## [56] "2020-10-17" "2020-10-18" "2020-10-19" "2020-10-20" "2020-10-21"
mean(future_i)
## [1] 7.33655
plot(future_i)
predicted_n <- colSums(future_i)
summary(predicted_n)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.0 298.0 424.5 440.2 560.2 1283.0
hist(predicted_n, col = "blue", border = "white", main = "Dự báo 60 ngà y", ylab="Tần số", xlab = "Số ca mới")
# Cumulative cases
future.cum = cumulate(future_i)
plot(future.cum) + labs(x="TÃnh từ 3/4/2020", y="Số ca tÃch lÅ©y")
# ggplot2
df <- as.data.frame(future_i, long = TRUE)
ggplot(df, aes(x = date, y = incidence)) + geom_jitter(alpha = .3) + geom_smooth() + labs(x="Ngà y (từ 2/4/2020)", y="Số ca mới")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'