Marcelo Ponce, Amit Sandhel (2020). covid19.analytics: An R Package to Obtain, Analyze and Visualize Data from the Corona Virus Disease Pandemic. URL https://arxiv.org/abs/2009.01091
predictionDays <- 90
country <- "Canada"
population <- 37590000
noCache <- TRUE
limNbCases <- 1500
limNbDeaths <- 2000
limNbDeathsYear <- 150000
## [1] "Data dated from: 2022-06-06"
Looking at if new cases can predict hospitalisation 13 days in future.
# 13 jours Test
casCovidParJour <- confirmes_quebec
casCovidParJour <- casCovidParJour[order(casCovidParJour$Date), ]
casCovidParJour$Date <- as.Date(casCovidParJour$Date)
hospTreizeJours <- hosp_quebec
merg <- inner_join(casCovidParJour, hospTreizeJours, by = c("Date"))
merg <- merg[order(merg$Date), ]
# Remove start of pandemy irregular
merg <- merg[-1:-144, ]
newCases <- head(merg$Nb_Nvx_Cas, -13)
icu <- tail(merg$ACT_Si_RSS99, -13)
hosp <- tail(merg$ACT_Hsi_RSS99, -13)
corrNewCasesIcu <- data.frame(newCases, icu)
corrNewCasesHosp <- data.frame(newCases, hosp)
ratioIcu <- mean(newCases / icu)
ratioHosp <- mean(newCases / hosp)
# Mean ratio new cases / hosp + 13 days
# ratioHosp <- 1.97
# ratioIcu <- 10.6
m.icu <- lm(newCases ~ icu, data = corrNewCasesIcu)
coeffIcu <- m.icu$coefficients[2]
interceptIcu <- m.icu$coefficients[1]
m.hosp <- lm(newCases ~ hosp, data = corrNewCasesHosp)
coeffHosp <- m.hosp$coefficients[2]
interceptHosp <- m.hosp$coefficients[1]
merg$Date <- as.Date(merg$Date) + 13
last13Days <- tail(merg, 13)
last13Days$Pred_ACT_Hsi_RSS99 <- as.integer(interceptHosp + (last13Days$Nb_Nvx_Cas/coeffHosp))
last13Days$Pred_ACT_Si_RSS99 <- as.integer(interceptIcu + (last13Days$Nb_Nvx_Cas/coeffIcu))
last13Days <- last13Days %>% select(Date, Nb_Nvx_Cas, ACT_Hsi_RSS99, ACT_Si_RSS99, Pred_ACT_Hsi_RSS99, Pred_ACT_Si_RSS99)
last13Days$ACT_Hsi_RSS99 <- rev(last13Days$ACT_Hsi_RSS99)
last13Days$ACT_Si_RSS99 <- rev(last13Days$ACT_Si_RSS99)
Test de corrélation entre les nouveaux cas et le nombre de patients en soins intensifs 13 jours plus tard
plot(newCases ~ icu, ylab = "Nouveaux cas", xlab = "Soins Internes 13 jours + tard")
plot(rstudent(m.icu) ~ fitted(m.icu),
ylab = "Résidus de Student",
xlab = "Valeurs prédites",
main = "Homogénéité des variances"
)
qqnorm(rstudent(m.icu),
ylab = "Quantiles observés",
xlab = "Quantiles théoriques", main = "Normalité des résidus"
)
qqline(rstudent(m.icu))
summary(m.hosp)
##
## Call:
## lm(formula = newCases ~ hosp, data = corrNewCasesHosp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3500.4 -411.1 103.7 304.5 12125.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -219.41898 95.08262 -2.308 0.0213 *
## hosp 2.48019 0.09233 26.861 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1681 on 626 degrees of freedom
## Multiple R-squared: 0.5354, Adjusted R-squared: 0.5347
## F-statistic: 721.5 on 1 and 626 DF, p-value: < 0.00000000000000022
summary(m.icu)
##
## Call:
## lm(formula = newCases ~ icu, data = corrNewCasesIcu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3252.7 -942.4 -50.5 631.7 12017.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1218.512 136.273 -8.942 <0.0000000000000002 ***
## icu 28.858 1.196 24.133 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1775 on 626 degrees of freedom
## Multiple R-squared: 0.4819, Adjusted R-squared: 0.4811
## F-statistic: 582.4 on 1 and 626 DF, p-value: < 0.00000000000000022
cor.test(corrNewCasesIcu$newCases, corrNewCasesIcu$icu)
##
## Pearson's product-moment correlation
##
## data: corrNewCasesIcu$newCases and corrNewCasesIcu$icu
## t = 24.133, df = 626, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6513669 0.7326695
## sample estimates:
## cor
## 0.6942262
Test de corrélation entre les nouveaux cas et le nombre de patients hospitalisés 13 jours plus tard
cor.test(newCases, hosp)
##
## Pearson's product-moment correlation
##
## data: newCases and hosp
## t = 26.861, df = 626, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6931850 0.7661166
## sample estimates:
## cor
## 0.7317385
plot(hosp_quebec$ACT_Si_RSS99, ylim = c(0, max(hosp_quebec$ACT_Si_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations Soins Intensifs", xlab = "Jours", main = "Quebec: Actuel - Soins Intensifs")
previsions(hosp_quebec$Date, hosp_quebec$ACT_Si_RSS99, "Québec - Prévision Soins Intensifs ", "Date", "Patients", 90)
plot(hosp_quebec$ACT_Hsi_RSS99, ylim = c(0, max(hosp_quebec$ACT_Hsi_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations", xlab = "Jours", main = "Quebec: Actuel - Hospitalisations")
previsions(hosp_quebec$Date, hosp_quebec$ACT_Hsi_RSS99, "Québec - Prévision Hospitalisations ", "Date", "Patients", 90)
plot(confirmes_quebec$Nb_Nvx_Cas, ylim = c(0, max(confirmes_quebec$Nb_Nvx_Cas)), xlim = c(0, nrow(confirmes_quebec)), ylab = "Cas", xlab = "Jours", main = "Quebec : Actuel - Cas")
previsions(confirmes_quebec$Date, confirmes_quebec$Nb_Nvx_Cas, "Québec - Prévision - Cas ", "Date", "Cas", 90)
plot(deces_quebec$total_jour, ylim = c(0, max(deces_quebec$total_jour)), xlim = c(0, nrow(deces_quebec)), ylab = "Décès", xlab = "Jours", main = "Quebec : Actuel - Décès")
previsions(deces_quebec$Date, deces_quebec$total_jour, "Québec - Prévision - Décès ", "Date", "Décès", 90)
plot(canada$total_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, max(canada$total_deaths, na.rm = T)), main = paste(country, " - Total Morts"))
plot(canada$total_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), , ylim = c(0, max(canada$total_cases)), main = paste(country, " - Total Cas"))
plot(canada$new_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, 400), main = paste(country, " - Morts / Jour"))
plot(canada$new_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), main = paste(country, " - Nouveaux Cas / Jour"))
plot(canada$total_cases_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), , main = "Total Cases /1 Million Population", col = "green", ylim = c(0, max(usa$total_cases_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_cases_per_million, col = "red")
legend(1, 40000, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$total_deaths_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), main = "Total Deaths /1 Million Population", col = "green", ylim = c(0, max(usa$total_deaths_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$new_cases_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Cases/1 Million Population per Day", col = "green", ylim = c(0, limNbCases), cex.axis = 0.75)
points(usa$new_cases_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
plot(canada$new_deaths_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Deaths/1 Million Population per Day", col = "green", ylim = c(0, 15), cex.axis = 0.75)
points(usa$new_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)
tsCan <- ts(canada$new_deaths_per_million, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan), h = 90), sub = "Forecast 90 jours")
tsCan2 <- ts(canada$new_deaths, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan2), h = 90), sub = "Forecast 90 jours", ylab = "Nb de morts par jour", xlab = "Jours")
tsc_us <- tsc %>% filter(Country.Region == country)
tsc_us <- data.frame(t(tsc_us))
tsc_us <- cbind(rownames(tsc_us), data.frame(tsc_us, row.names = NULL))
colnames(tsc_us) <- c("Date", "Confirmed")
tsc_us <- tsc_us[-c(1:4), ]
tsc_us$Date <- ymd(tsc_us$Date)
tsc_us$Confirmed <- as.numeric(tsc_us$Confirmed)
str(tsc_us)
tsd_us <- tsd %>% filter(Country.Region == country)
tsd_us <- data.frame(t(tsd_us))
tsd_us <- cbind(rownames(tsd_us), data.frame(tsd_us, row.names = NULL))
colnames(tsd_us) <- c("Date", "Confirmed")
tsd_us <- tsd_us[-c(1:4), ]
tsd_us$Date <- ymd(tsd_us$Date)
tsd_us$Confirmed <- as.numeric(tsd_us$Confirmed)
str(tsd_us)
qplot(Date, Confirmed, data = tsc_us, main = paste("Covid19 - Number of confirmed Cases in ", country))
qplot(Date, Confirmed, data = tsd_us, main = paste("Covid19 - Number of confirmed deaths in ", country))
ds <- tsc_us$Date
y <- tsc_us$Confirmed
df <- data.frame(ds, y)
colnames(df) <- c("ds", "y")
ds2 <- tsd_us$Date
y2 <- tsd_us$Confirmed
df2 <- data.frame(ds2, y2)
colnames(df2) <- c("ds", "y")
tsd_us$delta <- c(NA, diff(tsd_us$Confirmed, lag = 1))
tsd_us$month <- format(as.Date(tsd_us$Date), "%m")
tsd_us$week <- week(as.Date(tsd_us$Date))
tsd_us$delta[is.na(tsd_us$delta)] <- 0
dfChunked <- df[-(1:100), , drop = FALSE]
m <- prophet(dfChunked, yearly.seasonality = F, daily.seasonality = F)
future <- make_future_dataframe(m, periods = predictionDays)
forcast <- predict(m, future)
plot(m, forcast, xlabel = "Date", ylabel = "Cases") + ggtitle(paste(country, " - Predicted Covid Caseses ", predictionDays, " days"))
dfChunked2 <- df2[-(1:100), , drop = FALSE]
m2 <- prophet(dfChunked2, yearly.seasonality = F, daily.seasonality = F)
future2 <- make_future_dataframe(m2, periods = predictionDays)
forcast2 <- predict(m2, future2)
plot(m2, forcast2, xlabel = "Date", ylabel = "Deaths") + ggtitle(paste(country, " - Predicted Covid Deaths ", predictionDays, " days"))
dyplot.prophet(m, forcast,
main = paste(country, " - Predicted Covid Cases ", predictionDays, " days - FB Prophet"),
xlab = "Date", ylab = "Cases"
) %>% dyOptions(maxNumberWidth = 20)
dyplot.prophet(m2, forcast2,
main = paste(country, " - Predicted Covid Deaths ", predictionDays, " days - FB Prophet"),
xlab = "Date", ylab = "Deaths"
) %>% dyOptions(maxNumberWidth = 20)
prophet_plot_components(m, forcast)
prophet_plot_components(m2, forcast2)
l <- nrow(dfChunked)
pred <- forcast$yhat[1:l]
actual <- m$history$y
plot(actual, pred, main = paste(country, " Cases - Prediction vs Actual"))
abline(lm(pred ~ actual), col = "red")
pred2 <- forcast2$yhat[1:l]
actual2 <- m2$history$y
plot(actual2, pred2, main = paste(country, " Deaths - Prediction vs Actual"))
abline(lm(pred2 ~ actual2), col = "red")
summary(lm(pred ~ actual))
##
## Call:
## lm(formula = pred ~ actual)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43854 -7934 -1433 7501 57952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1868.07660 905.32142 2.063 0.0394 *
## actual 0.99165 0.00311 318.818 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16030 on 765 degrees of freedom
## Multiple R-squared: 0.9925, Adjusted R-squared: 0.9925
## F-statistic: 1.016e+05 on 1 and 765 DF, p-value: < 0.00000000000000022
summary(lm(pred2 ~ actual2))
##
## Call:
## lm(formula = pred2 ~ actual2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -108.873 -8.312 -0.516 9.785 99.330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1375175 1.8491694 0.615 0.539
## actual2 0.9994374 0.0007511 1330.594 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 29.09 on 765 degrees of freedom
## Multiple R-squared: 0.9996, Adjusted R-squared: 0.9996
## F-statistic: 1.77e+06 on 1 and 765 DF, p-value: < 0.00000000000000022
rmarkdown::render(“COVID_CANADA.Rmd”)