Marcelo Ponce, Amit Sandhel (2020). covid19.analytics: An R Package to Obtain, Analyze and Visualize Data from the Corona Virus Disease Pandemic. URL https://arxiv.org/abs/2009.01091

Analysis of current COVID data and predictions

predictionDays <- 90
country <- "Canada"
population <- 37590000
noCache <- TRUE
limNbCases <- 1500
limNbDeaths <- 2000
limNbDeathsYear <- 150000
## [1] "Data dated from:  2022-06-06"

Looking at if new cases can predict hospitalisation 13 days in future.

# 13 jours Test
casCovidParJour <- confirmes_quebec
casCovidParJour <- casCovidParJour[order(casCovidParJour$Date), ]
casCovidParJour$Date <- as.Date(casCovidParJour$Date)
hospTreizeJours <- hosp_quebec

merg <- inner_join(casCovidParJour, hospTreizeJours, by = c("Date"))
merg <- merg[order(merg$Date), ]
# Remove start of pandemy irregular
merg <- merg[-1:-144, ]

newCases <- head(merg$Nb_Nvx_Cas, -13)
icu <- tail(merg$ACT_Si_RSS99, -13)
hosp <- tail(merg$ACT_Hsi_RSS99, -13)

corrNewCasesIcu <- data.frame(newCases, icu)
corrNewCasesHosp <- data.frame(newCases, hosp)

ratioIcu <- mean(newCases / icu)
ratioHosp <- mean(newCases / hosp)

# Mean ratio new cases / hosp + 13 days
# ratioHosp <- 1.97
# ratioIcu <- 10.6


m.icu <- lm(newCases ~ icu, data = corrNewCasesIcu)
coeffIcu <- m.icu$coefficients[2]
interceptIcu <- m.icu$coefficients[1]

m.hosp <- lm(newCases ~ hosp, data = corrNewCasesHosp)
coeffHosp <- m.hosp$coefficients[2]
interceptHosp <- m.hosp$coefficients[1]

merg$Date <- as.Date(merg$Date) + 13
last13Days <- tail(merg, 13)
last13Days$Pred_ACT_Hsi_RSS99 <- as.integer(interceptHosp + (last13Days$Nb_Nvx_Cas/coeffHosp))
last13Days$Pred_ACT_Si_RSS99 <- as.integer(interceptIcu + (last13Days$Nb_Nvx_Cas/coeffIcu))
last13Days <- last13Days %>% select(Date, Nb_Nvx_Cas, ACT_Hsi_RSS99, ACT_Si_RSS99, Pred_ACT_Hsi_RSS99, Pred_ACT_Si_RSS99)
last13Days$ACT_Hsi_RSS99 <- rev(last13Days$ACT_Hsi_RSS99)
last13Days$ACT_Si_RSS99 <- rev(last13Days$ACT_Si_RSS99)

Test de corrélation entre les nouveaux cas et le nombre de patients en soins intensifs 13 jours plus tard

plot(newCases ~ icu, ylab = "Nouveaux cas", xlab = "Soins Internes 13 jours + tard")

plot(rstudent(m.icu) ~ fitted(m.icu),
  ylab = "Résidus de Student",
  xlab = "Valeurs prédites",
  main = "Homogénéité des variances"
)

qqnorm(rstudent(m.icu),
  ylab = "Quantiles observés",
  xlab = "Quantiles théoriques", main = "Normalité des résidus"
)
qqline(rstudent(m.icu))

summary(m.hosp)
## 
## Call:
## lm(formula = newCases ~ hosp, data = corrNewCasesHosp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3500.4  -411.1   103.7   304.5 12125.2 
## 
## Coefficients:
##               Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) -219.41898   95.08262  -2.308              0.0213 *  
## hosp           2.48019    0.09233  26.861 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1681 on 626 degrees of freedom
## Multiple R-squared:  0.5354, Adjusted R-squared:  0.5347 
## F-statistic: 721.5 on 1 and 626 DF,  p-value: < 0.00000000000000022
summary(m.icu)
## 
## Call:
## lm(formula = newCases ~ icu, data = corrNewCasesIcu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3252.7  -942.4   -50.5   631.7 12017.8 
## 
## Coefficients:
##              Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) -1218.512    136.273  -8.942 <0.0000000000000002 ***
## icu            28.858      1.196  24.133 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1775 on 626 degrees of freedom
## Multiple R-squared:  0.4819, Adjusted R-squared:  0.4811 
## F-statistic: 582.4 on 1 and 626 DF,  p-value: < 0.00000000000000022
cor.test(corrNewCasesIcu$newCases, corrNewCasesIcu$icu)
## 
##  Pearson's product-moment correlation
## 
## data:  corrNewCasesIcu$newCases and corrNewCasesIcu$icu
## t = 24.133, df = 626, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6513669 0.7326695
## sample estimates:
##       cor 
## 0.6942262

Test de corrélation entre les nouveaux cas et le nombre de patients hospitalisés 13 jours plus tard

cor.test(newCases, hosp)
## 
##  Pearson's product-moment correlation
## 
## data:  newCases and hosp
## t = 26.861, df = 626, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6931850 0.7661166
## sample estimates:
##       cor 
## 0.7317385
plot(hosp_quebec$ACT_Si_RSS99, ylim = c(0, max(hosp_quebec$ACT_Si_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations Soins Intensifs", xlab = "Jours", main = "Quebec: Actuel - Soins Intensifs")

previsions(hosp_quebec$Date, hosp_quebec$ACT_Si_RSS99, "Québec - Prévision Soins Intensifs ", "Date", "Patients", 90)

plot(hosp_quebec$ACT_Hsi_RSS99, ylim = c(0, max(hosp_quebec$ACT_Hsi_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations", xlab = "Jours", main = "Quebec: Actuel - Hospitalisations")

previsions(hosp_quebec$Date, hosp_quebec$ACT_Hsi_RSS99, "Québec - Prévision Hospitalisations ", "Date", "Patients", 90)

plot(confirmes_quebec$Nb_Nvx_Cas, ylim = c(0, max(confirmes_quebec$Nb_Nvx_Cas)), xlim = c(0, nrow(confirmes_quebec)), ylab = "Cas", xlab = "Jours", main = "Quebec : Actuel - Cas")

previsions(confirmes_quebec$Date, confirmes_quebec$Nb_Nvx_Cas, "Québec - Prévision - Cas ", "Date", "Cas", 90)

plot(deces_quebec$total_jour, ylim = c(0, max(deces_quebec$total_jour)), xlim = c(0, nrow(deces_quebec)), ylab = "Décès", xlab = "Jours", main = "Quebec : Actuel - Décès")

previsions(deces_quebec$Date, deces_quebec$total_jour, "Québec - Prévision - Décès ", "Date", "Décès", 90)

plot(canada$total_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, max(canada$total_deaths, na.rm = T)), main = paste(country, " - Total Morts"))

plot(canada$total_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), , ylim = c(0, max(canada$total_cases)), main = paste(country, " - Total Cas"))

plot(canada$new_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, 400), main = paste(country, " - Morts / Jour"))

plot(canada$new_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), main = paste(country, " - Nouveaux Cas / Jour"))

plot(canada$total_cases_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), , main = "Total Cases /1 Million Population", col = "green", ylim = c(0, max(usa$total_cases_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_cases_per_million, col = "red")
legend(1, 40000, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$total_deaths_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), main = "Total Deaths /1 Million Population", col = "green", ylim = c(0, max(usa$total_deaths_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$new_cases_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Cases/1 Million Population per Day", col = "green", ylim = c(0, limNbCases), cex.axis = 0.75)
points(usa$new_cases_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$new_deaths_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Deaths/1 Million Population per Day", col = "green", ylim = c(0, 15), cex.axis = 0.75)
points(usa$new_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

tsCan <- ts(canada$new_deaths_per_million, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan), h = 90), sub = "Forecast 90 jours")

tsCan2 <- ts(canada$new_deaths, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan2), h = 90), sub = "Forecast 90 jours", ylab = "Nb de morts par jour", xlab = "Jours")

tsc_us <- tsc %>% filter(Country.Region == country)
tsc_us <- data.frame(t(tsc_us))
tsc_us <- cbind(rownames(tsc_us), data.frame(tsc_us, row.names = NULL))
colnames(tsc_us) <- c("Date", "Confirmed")
tsc_us <- tsc_us[-c(1:4), ]
tsc_us$Date <- ymd(tsc_us$Date)
tsc_us$Confirmed <- as.numeric(tsc_us$Confirmed)
str(tsc_us)
tsd_us <- tsd %>% filter(Country.Region == country)
tsd_us <- data.frame(t(tsd_us))
tsd_us <- cbind(rownames(tsd_us), data.frame(tsd_us, row.names = NULL))
colnames(tsd_us) <- c("Date", "Confirmed")
tsd_us <- tsd_us[-c(1:4), ]
tsd_us$Date <- ymd(tsd_us$Date)
tsd_us$Confirmed <- as.numeric(tsd_us$Confirmed)
str(tsd_us)
qplot(Date, Confirmed, data = tsc_us, main = paste("Covid19 - Number of confirmed Cases in ", country))

qplot(Date, Confirmed, data = tsd_us, main = paste("Covid19 - Number of confirmed deaths in ", country))

ds <- tsc_us$Date
y <- tsc_us$Confirmed
df <- data.frame(ds, y)
colnames(df) <- c("ds", "y")

ds2 <- tsd_us$Date
y2 <- tsd_us$Confirmed
df2 <- data.frame(ds2, y2)
colnames(df2) <- c("ds", "y")

tsd_us$delta <- c(NA, diff(tsd_us$Confirmed, lag = 1))
tsd_us$month <- format(as.Date(tsd_us$Date), "%m")
tsd_us$week <- week(as.Date(tsd_us$Date))
tsd_us$delta[is.na(tsd_us$delta)] <- 0
dfChunked <- df[-(1:100), , drop = FALSE]
m <- prophet(dfChunked, yearly.seasonality = F, daily.seasonality = F)
future <- make_future_dataframe(m, periods = predictionDays)
forcast <- predict(m, future)
plot(m, forcast, xlabel = "Date", ylabel = "Cases") + ggtitle(paste(country, " - Predicted Covid Caseses ", predictionDays, " days"))

dfChunked2 <- df2[-(1:100), , drop = FALSE]
m2 <- prophet(dfChunked2, yearly.seasonality = F, daily.seasonality = F)
future2 <- make_future_dataframe(m2, periods = predictionDays)
forcast2 <- predict(m2, future2)
plot(m2, forcast2, xlabel = "Date", ylabel = "Deaths") + ggtitle(paste(country, " - Predicted Covid Deaths ", predictionDays, " days"))

dyplot.prophet(m, forcast,
  main = paste(country, " - Predicted Covid Cases ", predictionDays, " days - FB Prophet"),
  xlab = "Date", ylab = "Cases"
) %>% dyOptions(maxNumberWidth = 20)
dyplot.prophet(m2, forcast2,
  main = paste(country, " - Predicted Covid Deaths ", predictionDays, " days - FB Prophet"),
  xlab = "Date", ylab = "Deaths"
) %>% dyOptions(maxNumberWidth = 20)

FB Prophet

prophet_plot_components(m, forcast)

US Deaths

prophet_plot_components(m2, forcast2)

l <- nrow(dfChunked)
pred <- forcast$yhat[1:l]
actual <- m$history$y
plot(actual, pred, main = paste(country, " Cases - Prediction vs Actual"))
abline(lm(pred ~ actual), col = "red")

pred2 <- forcast2$yhat[1:l]
actual2 <- m2$history$y
plot(actual2, pred2, main = paste(country, " Deaths - Prediction vs Actual"))
abline(lm(pred2 ~ actual2), col = "red")

summary(lm(pred ~ actual))
## 
## Call:
## lm(formula = pred ~ actual)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -43854  -7934  -1433   7501  57952 
## 
## Coefficients:
##               Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) 1868.07660  905.32142   2.063              0.0394 *  
## actual         0.99165    0.00311 318.818 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16030 on 765 degrees of freedom
## Multiple R-squared:  0.9925, Adjusted R-squared:  0.9925 
## F-statistic: 1.016e+05 on 1 and 765 DF,  p-value: < 0.00000000000000022
summary(lm(pred2 ~ actual2))
## 
## Call:
## lm(formula = pred2 ~ actual2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -108.873   -8.312   -0.516    9.785   99.330 
## 
## Coefficients:
##              Estimate Std. Error  t value            Pr(>|t|)    
## (Intercept) 1.1375175  1.8491694    0.615               0.539    
## actual2     0.9994374  0.0007511 1330.594 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.09 on 765 degrees of freedom
## Multiple R-squared:  0.9996, Adjusted R-squared:  0.9996 
## F-statistic: 1.77e+06 on 1 and 765 DF,  p-value: < 0.00000000000000022

rmarkdown::render(“COVID_CANADA.Rmd”)