R Markdown
#Read csv file
Ncovid_case <- read.csv("E:/R/ncovid19.csv", header = T)
Ncovid_case %>% head( n = 6)
## Date Day Cases Daily Previous Deaths
## 1 3/10/2020 1 2 2 0 0
## 2 3/11/2020 2 2 0 2 0
## 3 3/12/2020 3 12 10 0 0
## 4 3/13/2020 4 25 13 10 0
## 5 3/14/2020 5 33 8 13 0
## 6 3/15/2020 6 53 20 8 0
#Create sample from the dรขta
sample = sample(1:16, size = 16*0.8)
#Visulize
Ncovid_case %>% ggplot(aes(x = Day, y = Cases, color = "Red")) +
geom_line(size = 1) +
geom_point(size = 1.6)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

#It looks like an exponential line, we should use the exponential linear regression
model = lm(log(Cases) ~ Day + I(Day^2), data = Ncovid_case[sample,])
model %>% summary()
##
## Call:
## lm(formula = log(Cases) ~ Day + I(Day^2), data = Ncovid_case[sample,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52566 -0.24344 0.08756 0.25385 0.35938
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.373252 0.364881 1.023 0.333043
## Day 0.610037 0.096070 6.350 0.000133 ***
## I(Day^2) -0.008648 0.005682 -1.522 0.162340
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3464 on 9 degrees of freedom
## Multiple R-squared: 0.9763, Adjusted R-squared: 0.971
## F-statistic: 185 on 2 and 9 DF, p-value: 4.898e-08
#Predict for new data
prediction = exp(predict(model, data.frame(Day = 1:21)))
prediction
## 1 2 3 4 5
## 2.650216 4.752795 8.377317 14.512721 24.710455
## 6 7 8 9 10
## 41.352393 68.015606 109.952347 174.698207 272.810074
## 11 12 13 14 15
## 418.716783 631.638139 936.492563 1364.672159 1954.521000
## 16 17 18 19 20
## 2751.315289 3806.522430 5176.121187 6917.807764 9087.000543
## 21
## 11731.691378
#Gather Actual and predict information into a data frame
df = data.frame(Day = 1:21, floor(prediction), actual = c(Ncovid_case$Cases, rep(NA, 4)))
df %>% tail(n=6)
## Day floor.prediction. actual
## 16 16 2751 2294
## 17 17 3806 NA
## 18 18 5176 NA
## 19 19 6917 NA
## 20 20 9087 NA
## 21 21 11731 NA
#Plot actual and predict results
df %>% ggplot(aes(x = Day, y = floor.prediction.)) +
geom_point(colour = "black") +
geom_point(na.omit(df[,-2]), colour = "red", mapping = aes(x = Day, y = actual)) +
ggtitle("COVID-19 Prediction - Actual - Red, Pred - Black") +
theme(plot.title = element_text(hjust = 0.5))
