台灣covid19的確診數日漸攀升,可以用什麼解釋本土確診數呢?

How about 海外確診數?

DATE <- c("324", "325", "326", "327", "328", "329", "330", "331", "401", "402", "403", "404", "405", "406", "407", "408", "409", "410", "411" ,"412")

Domestic <- c(15, 14, 21, 83, 34, 33, 56, 87, 104, 160, 183, 133, 216, 281, 382, 384, 442, 431, 439 , 551)

Oversea <- c(124, 122, 82, 120, 93, 63, 107, 152, 132, 244, 97, 142, 65, 78, 149, 123, 136, 144, 191, 112)
cvdta <- data.frame(DATE, Domestic, Oversea)

cvdta$DATE <- as.factor(cvdta$DATE)

##檢視資料

scatterplot(Domestic ~ Oversea,  data = cvdta, smooth = F)

#本土與境外確診數變異不一樣,就上圖分布來說似乎偏向正相關

ggplot(aes(x = Oversea, y = Domestic), data = cvdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'

#灰色部分大,standard error大

##Fitting the simple linear regression model

cvmod <- lm(Domestic ~ Oversea, data = cvdta)

summary(cvmod)
## 
## Call:
## lm(formula = Domestic ~ Oversea, data = cvdta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -187.66 -136.95  -97.39  156.82  360.77 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   74.210    122.743   0.605    0.553
## Oversea        1.036      0.940   1.102    0.285
## 
## Residual standard error: 174.5 on 18 degrees of freedom
## Multiple R-squared:  0.0632, Adjusted R-squared:  0.01115 
## F-statistic: 1.214 on 1 and 18 DF,  p-value: 0.285

#t value =1.102 <2

#p value=0.285 > 0.05

#R-squared:0.01115

#相關未達顯著,海外確診數僅能解釋本土確診數的1%變異而已,還有其他變項…

##how about 台北每日最高溫預測?

Tem <- c(21, 28, 25, 17, 18, 24, 26, 23, 17,15 ,17 , 21, 26,26, 23, 24, 28, 30, 29, 31)
cvtdta <- data.frame(DATE, Domestic,Tem )

cvtdta$DATE <- as.factor(cvtdta$DATE)
scatterplot(Domestic ~ Tem,  data = cvtdta, smooth = F)

##本土確診數與氣溫變異似乎沒有不一樣。而就上圖分布來說似乎偏向正相關

ggplot(aes(x = Tem, y = Domestic), data = cvtdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'

cvtmod <- lm(Domestic ~ Tem, data = cvtdta)

summary(cvtmod)
## 
## Call:
## lm(formula = Domestic ~ Tem, data = cvtdta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -279.56 -114.43   18.59  125.75  197.38 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -267.089    173.710  -1.538    0.142  
## Tem           20.023      7.266   2.756    0.013 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 151.2 on 18 degrees of freedom
## Multiple R-squared:  0.2967, Adjusted R-squared:  0.2576 
## F-statistic: 7.594 on 1 and 18 DF,  p-value: 0.01301

#t value =2.756 >2,已距兩個標準差,達顯著

#p value=0.013 < 0.05,達顯著

#R-squared: 0.2576

#相關達顯著,每日最高溫預測能解釋本土確診數的25%變異