COVID-19 cases data was obtained from the Kaggle website;

https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge

coronavirus_confirmed_cases <- read.csv("time_series_covid_19_confirmed.csv")

Inspect data.

str(coronavirus_confirmed_cases)
## 'data.frame':    258 obs. of  76 variables:
##  $ Province.State: chr  "" "" "" "" ...
##  $ Country.Region: chr  "Afghanistan" "Albania" "Algeria" "Andorra" ...
##  $ Lat           : num  33 41.2 28 42.5 -11.2 ...
##  $ Long          : num  65 20.17 1.66 1.52 17.87 ...
##  $ X1.22.20      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X1.23.20      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X1.24.20      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X1.25.20      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ X1.26.20      : int  0 0 0 0 0 0 0 0 0 3 ...
##  $ X1.27.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X1.28.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X1.29.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X1.30.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X1.31.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.1.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.2.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.3.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.4.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.5.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.6.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.7.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.8.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.9.20       : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.10.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.11.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.12.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.13.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.14.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.15.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.16.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.17.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.18.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.19.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.20.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.21.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.22.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.23.20      : int  0 0 0 0 0 0 0 0 0 4 ...
##  $ X2.24.20      : int  1 0 0 0 0 0 0 0 0 4 ...
##  $ X2.25.20      : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ X2.26.20      : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ X2.27.20      : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ X2.28.20      : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ X2.29.20      : int  1 0 1 0 0 0 0 0 0 4 ...
##  $ X3.1.20       : int  1 0 1 0 0 0 0 1 0 6 ...
##  $ X3.2.20       : int  1 0 3 1 0 0 0 1 0 6 ...
##  $ X3.3.20       : int  1 0 5 1 0 0 1 1 0 13 ...
##  $ X3.4.20       : int  1 0 12 1 0 0 1 1 0 22 ...
##  $ X3.5.20       : int  1 0 12 1 0 0 1 1 0 22 ...
##  $ X3.6.20       : int  1 0 17 1 0 0 2 1 0 26 ...
##  $ X3.7.20       : int  1 0 17 1 0 0 8 1 0 28 ...
##  $ X3.8.20       : int  4 0 19 1 0 0 12 1 0 38 ...
##  $ X3.9.20       : int  4 2 20 1 0 0 12 1 0 48 ...
##  $ X3.10.20      : int  5 10 20 1 0 0 17 1 0 55 ...
##  $ X3.11.20      : int  7 12 20 1 0 0 19 1 0 65 ...
##  $ X3.12.20      : int  7 23 24 1 0 0 19 4 0 65 ...
##  $ X3.13.20      : int  7 33 26 1 0 1 31 8 1 92 ...
##  $ X3.14.20      : int  11 38 37 1 0 1 34 18 1 112 ...
##  $ X3.15.20      : int  16 42 48 1 0 1 45 26 1 134 ...
##  $ X3.16.20      : int  21 51 54 2 0 1 56 52 2 171 ...
##  $ X3.17.20      : int  22 55 60 39 0 1 68 78 2 210 ...
##  $ X3.18.20      : int  22 59 74 39 0 1 79 84 3 267 ...
##  $ X3.19.20      : int  22 64 87 53 0 1 97 115 4 307 ...
##  $ X3.20.20      : int  24 70 90 75 1 1 128 136 6 353 ...
##  $ X3.21.20      : int  24 76 139 88 2 1 158 160 9 436 ...
##  $ X3.22.20      : int  40 89 201 113 2 1 266 194 19 669 ...
##  $ X3.23.20      : int  40 104 230 133 3 3 301 235 32 669 ...
##  $ X3.24.20      : int  74 123 264 164 3 3 387 249 39 818 ...
##  $ X3.25.20      : int  84 146 302 188 3 3 387 265 39 1029 ...
##  $ X3.26.20      : int  94 174 367 224 4 7 502 290 53 1219 ...
##  $ X3.27.20      : int  110 186 409 267 4 7 589 329 62 1405 ...
##  $ X3.28.20      : int  110 197 454 308 5 7 690 407 71 1617 ...
##  $ X3.29.20      : int  120 212 511 334 7 7 745 424 77 1791 ...
##  $ X3.30.20      : int  170 223 584 370 7 7 820 482 78 2032 ...
##  $ X3.31.20      : int  174 243 716 376 7 7 1054 532 80 2032 ...
##  $ X4.1.20       : int  237 259 847 390 8 7 1054 571 84 2182 ...
##  $ X4.2.20       : int  273 277 986 428 8 9 1133 663 87 2298 ...

Select cases for Japan and tidy the data.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.1     ✓ dplyr   1.0.6
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
japan <- coronavirus_confirmed_cases %>% filter(Country.Region=="Japan") %>%
  select(-c(1:4)) %>%
  gather("date","cases",1:72) 
japan$date <- str_sub(japan$date, 2) %>% mdy()
glimpse(japan)
## Rows: 72
## Columns: 2
## $ date  <date> 2020-01-22, 2020-01-23, 2020-01-24, 2020-01-25, 2020-01-26, 202…
## $ cases <int> 2, 2, 2, 2, 4, 4, 7, 7, 11, 15, 20, 20, 20, 22, 22, 22, 25, 25, …

Plot cases over time.

japan %>% ggplot(aes(x=date, y=cases)) +
  geom_point() +
  labs(title = "Japan Coronavirus Known Cases",
       x = "",
       y = "") +
  theme_minimal()

Plot log to the base 10 of cases.

japan %>% ggplot(aes(x=date, y=cases)) +
  geom_line() +
  scale_y_log10() +
  labs(title = "Japan Coronavirus Cases",
       subtitle = "Log Scale Base 10",
       x = "",
       y = "") +
  theme_minimal()

Are COVID-19 confirmed cases increasing exponentially in Japan?

model <- lm(log(cases)~date, data = japan)
summary(model)
## 
## Call:
## lm(formula = log(cases) ~ date, data = japan)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1067 -0.2789  0.1624  0.2921  0.5299 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.738e+03  3.979e+01  -43.68   <2e-16 ***
## date         9.514e-02  2.172e-03   43.80   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.383 on 70 degrees of freedom
## Multiple R-squared:  0.9648, Adjusted R-squared:  0.9643 
## F-statistic:  1919 on 1 and 70 DF,  p-value: < 2.2e-16

At this early stage, the growth (above) appears to be exponential.

Predict future growth based on the exponential model.

test <- data.frame(date = seq(ymd('2020-02-01'),ymd('2020-07-01'),by='day'))
test$preds <- exp(predict(model, newdata=test))

Plot estimated growth together with known cases.

test %>% ggplot(aes(x=date, y=preds)) +
  geom_line() +
  scale_y_log10() +
  labs(title = "Coronavirus predictions together with known cases (red)",
       x="",
       y="Log Scale 10") +
    geom_point(aes(x=date,y=cases), data=japan, col="red")

Conclusion

If the spread of COVID-19 continues unabated in Japan, there will be 100,000 confirmed cases by May 2020, and 10 million by July.