우한-코로나 바이러스 감염자 시간 경과에 따른 추세 분석 (Not Cumulative, Count)
rm(list=ls())
library(readxl);
setwd("/Users/wooddekk/Desktop/project_R/for_fun/Wuhan")
wuhan_df = read_xlsx("data/20200207.xlsx")
## New names:
## * Released -> Released...12
## * Released -> Released...13
head(wuhan_df)
## # A tibble: 6 x 14
## Date Suspected Confirmed `Daily Incread … Serious Deaths
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2019-12-31 00:00:00 27 NA NA NA NA
## 2 2020-01-03 00:00:00 44 NA NA NA NA
## 3 2020-01-05 00:00:00 59 NA NA NA NA
## 4 2020-01-10 00:00:00 NA 41 NA NA 1
## 5 2020-01-11 00:00:00 NA 41 NA NA 1
## 6 2020-01-12 00:00:00 NA 41 NA NA 1
## # … with 8 more variables: Recovered <dbl>, `Deaths+Recovered` <dbl>,
## # `D/(D+R)` <dbl>, `D/C` <dbl>, Quarantined <dbl>, Released...12 <dbl>,
## # Released...13 <dbl>, Total <dbl>
wuhan_df$Index = seq(1,nrow(wuhan_df))
wuhan_df=data.frame(Index=wuhan_df$Index,
Date=wuhan_df$Date,
Confirmed=wuhan_df$`Confirmed`,
Deaths=wuhan_df$Deaths)
confirm_2 = wuhan_df$Confirmed[2:dim(wuhan_df)[1]]
confirm_1 = wuhan_df$Confirmed[1:dim(wuhan_df)[1]-1]
confirm_diff = confirm_2 - confirm_1
wuhan_diff = wuhan_df[2:dim(wuhan_df)[1],]
wuhan_diff$diff_confirm =confirm_diff
wuhan_diff_2 = wuhan_diff[9:dim(wuhan_diff)[1],]
fit_diff = lm(diff_confirm ~ Index, data=wuhan_diff_2)
summary(fit_diff)
##
## Call:
## lm(formula = diff_confirm ~ Index, data = wuhan_diff_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -495.20 -319.70 -97.86 285.98 795.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2586.77 267.18 -9.682 5.43e-09 ***
## Index 202.68 12.45 16.279 5.27e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 370.5 on 20 degrees of freedom
## Multiple R-squared: 0.9298, Adjusted R-squared: 0.9263
## F-statistic: 265 on 1 and 20 DF, p-value: 5.274e-13
plot(wuhan_diff_2$Index, wuhan_diff_2$diff_confirm)
abline(coef(fit_diff))
#################################################
# Future Prediction
#################################################
fut.idx.vec = seq(32,53)
fut.confirm.vec = rep(NA,22)
fut_df = data.frame(Index=fut.idx.vec, Confirmed=fut.confirm.vec)
pred_present = predict(fit_diff)
pred_fut = predict(fit_diff, fut_df)
plot(c(wuhan_diff_2$Index,fut.idx.vec), c(wuhan_diff_2$diff_confirm,fut.confirm.vec),
main="Index - Confirmed",
ylim=c(0,10000))
lines(wuhan_diff_2$Index, pred_present, col="blue", lwd=3)
lines(fut.idx.vec, pred_fut, col="red", lwd=3)
legend("topleft", legend=c("Train", "Test"),
col=c("blue", "red"), lty=1:1, cex=0.8)