R Markdown

#Read csv file

Ncovid_case <- read.csv("E:/R/ncovid19.csv", header = T)

Ncovid_case %>% head( n = 6)
##        Date Day Cases Daily Previous Deaths
## 1 3/10/2020   1     2     2        0      0
## 2 3/11/2020   2     2     0        2      0
## 3 3/12/2020   3    12    10        0      0
## 4 3/13/2020   4    25    13       10      0
## 5 3/14/2020   5    33     8       13      0
## 6 3/15/2020   6    53    20        8      0
#Create sample from the dรขta

sample =  sample(1:16, size = 16*0.8)

#Visulize

Ncovid_case %>% ggplot(aes(x = Day, y = Cases, color = "Red")) +
  geom_line(size = 1) +
  geom_point(size = 1.6)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

#It looks like an exponential line, we should use the exponential linear regression

model = lm(log(Cases) ~ Day + I(Day^2), data = Ncovid_case[sample,])

model %>% summary()
## 
## Call:
## lm(formula = log(Cases) ~ Day + I(Day^2), data = Ncovid_case[sample, 
##     ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52566 -0.24344  0.08756  0.25385  0.35938 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.373252   0.364881   1.023 0.333043    
## Day          0.610037   0.096070   6.350 0.000133 ***
## I(Day^2)    -0.008648   0.005682  -1.522 0.162340    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3464 on 9 degrees of freedom
## Multiple R-squared:  0.9763, Adjusted R-squared:  0.971 
## F-statistic:   185 on 2 and 9 DF,  p-value: 4.898e-08
#Predict for new data

prediction = exp(predict(model, data.frame(Day = 1:21)))

prediction
##            1            2            3            4            5 
##     2.650216     4.752795     8.377317    14.512721    24.710455 
##            6            7            8            9           10 
##    41.352393    68.015606   109.952347   174.698207   272.810074 
##           11           12           13           14           15 
##   418.716783   631.638139   936.492563  1364.672159  1954.521000 
##           16           17           18           19           20 
##  2751.315289  3806.522430  5176.121187  6917.807764  9087.000543 
##           21 
## 11731.691378
#Gather Actual and predict information into a data frame

df = data.frame(Day = 1:21, floor(prediction), actual = c(Ncovid_case$Cases, rep(NA, 4)))

df %>% tail(n=6)
##    Day floor.prediction. actual
## 16  16              2751   2294
## 17  17              3806     NA
## 18  18              5176     NA
## 19  19              6917     NA
## 20  20              9087     NA
## 21  21             11731     NA
#Plot actual and predict results

df %>% ggplot(aes(x = Day, y = floor.prediction.)) +
  geom_point(colour = "black") +
  geom_point(na.omit(df[,-2]), colour = "red", mapping = aes(x = Day, y = actual)) +
    ggtitle("COVID-19 Prediction - Actual - Red, Pred - Black") +
    theme(plot.title = element_text(hjust = 0.5))

Including Plots