##日期
DATE <- c("324", "325", "326", "327", "328", "329", "330", "331", "401", "402", "403", "404", "405", "406", "407", "408", "409", "410", "411", "412")
##本土案例數
Domestic <- c(15, 14, 21, 83, 34, 33, 56, 87, 104, 160, 183, 133, 216, 281, 382, 384, 442, 431, 439, 551)
##境外移入數
Oversea <- c(124, 122, 82, 120, 93, 63, 107, 152, 132, 244, 97, 142, 65, 78, 149, 123, 136, 144, 191, 112)
#高雄本土確診個案數
Kaohsiung <- c(13, 0, 4, 2, 2, 4, 1, 4, 2, 9, 1, 0, 9, 37, 59, 58, 38, 26, 29, 61)
##create a dataframe
cvdta <- data.frame(DATE, Domestic, Oversea, Kaohsiung)
cvdta$DATE <- as.factor(cvdta$DATE)
scatterplot(Domestic ~ Oversea,
data = cvdta,
smooth = F)
ggplot(aes(x = Oversea, y = Domestic), data = cvdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
cvmod <- lm(Domestic ~ Oversea, data = cvdta)
summary(cvmod)
##
## Call:
## lm(formula = Domestic ~ Oversea, data = cvdta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187.66 -136.95 -97.39 156.82 360.77
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.210 122.743 0.605 0.553
## Oversea 1.036 0.940 1.102 0.285
##
## Residual standard error: 174.5 on 18 degrees of freedom
## Multiple R-squared: 0.0632, Adjusted R-squared: 0.01115
## F-statistic: 1.214 on 1 and 18 DF, p-value: 0.285
scatterplot(Kaohsiung ~ Domestic,
data = cvdta,
smooth = F)
ggplot(aes(x = Domestic, y = Kaohsiung), data = cvdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
cvmod2 <- lm(Kaohsiung ~ Domestic, data = cvdta)
summary(cvmod2)
##
## Call:
## lm(formula = Kaohsiung ~ Domestic, data = cvdta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.053 -6.770 -1.640 5.462 22.115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.4002 3.9495 -0.861 0.401
## Domestic 0.1055 0.0149 7.077 1.34e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.4 on 18 degrees of freedom
## Multiple R-squared: 0.7356, Adjusted R-squared: 0.7209
## F-statistic: 50.08 on 1 and 18 DF, p-value: 1.342e-06
從ggplot和散佈圖來看,海外和本土的灰色地帶較寬,資料相關性較分散,所以相關程度可能較低。 相比之下,高雄本土確診數和本土的資料相關程度較高。
從簡單線性回歸模型的數字來看,海外和本土這一組的R平方為0.0632,高雄本土和本土這一組的R平方為0.7356,後者比前者更接近1,解釋力越強大。此外,前者這一組的P值未達顯著水準,後者這一組的P值<0.001,具有統計上的顯著意義。 於是,海外的確診數無法預測本土的確診數,但本土的確診數可以預測高雄本土的確診數。