DATE <- c("324", "325", "326", "327", "328", "329", "330", "331", "401", "402", "403", "404", "405", "406", "407", "408", "409", "410", "411" ,"412")
Domestic <- c(15, 14, 21, 83, 34, 33, 56, 87, 104, 160, 183, 133, 216, 281, 382, 384, 442, 431, 439 , 551)
Oversea <- c(124, 122, 82, 120, 93, 63, 107, 152, 132, 244, 97, 142, 65, 78, 149, 123, 136, 144, 191, 112)
cvdta <- data.frame(DATE, Domestic, Oversea)
cvdta$DATE <- as.factor(cvdta$DATE)
##檢視資料
scatterplot(Domestic ~ Oversea, data = cvdta, smooth = F)
#本土與境外確診數變異不一樣,就上圖分布來說似乎偏向正相關
ggplot(aes(x = Oversea, y = Domestic), data = cvdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
#灰色部分大,standard error大
##Fitting the simple linear regression model
cvmod <- lm(Domestic ~ Oversea, data = cvdta)
summary(cvmod)
##
## Call:
## lm(formula = Domestic ~ Oversea, data = cvdta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187.66 -136.95 -97.39 156.82 360.77
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.210 122.743 0.605 0.553
## Oversea 1.036 0.940 1.102 0.285
##
## Residual standard error: 174.5 on 18 degrees of freedom
## Multiple R-squared: 0.0632, Adjusted R-squared: 0.01115
## F-statistic: 1.214 on 1 and 18 DF, p-value: 0.285
#t value =1.102 <2
#p value=0.285 > 0.05
#R-squared:0.01115
#相關未達顯著,海外確診數僅能解釋本土確診數的1%變異而已,還有其他變項…
##how about 台北每日最高溫預測?
Tem <- c(21, 28, 25, 17, 18, 24, 26, 23, 17,15 ,17 , 21, 26,26, 23, 24, 28, 30, 29, 31)
cvtdta <- data.frame(DATE, Domestic,Tem )
cvtdta$DATE <- as.factor(cvtdta$DATE)
scatterplot(Domestic ~ Tem, data = cvtdta, smooth = F)
##本土確診數與氣溫變異似乎沒有不一樣。而就上圖分布來說似乎偏向正相關
ggplot(aes(x = Tem, y = Domestic), data = cvtdta) +
geom_point() +
geom_smooth(method = lm, se = T) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
cvtmod <- lm(Domestic ~ Tem, data = cvtdta)
summary(cvtmod)
##
## Call:
## lm(formula = Domestic ~ Tem, data = cvtdta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -279.56 -114.43 18.59 125.75 197.38
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -267.089 173.710 -1.538 0.142
## Tem 20.023 7.266 2.756 0.013 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 151.2 on 18 degrees of freedom
## Multiple R-squared: 0.2967, Adjusted R-squared: 0.2576
## F-statistic: 7.594 on 1 and 18 DF, p-value: 0.01301
#t value =2.756 >2,已距兩個標準差,達顯著
#p value=0.013 < 0.05,達顯著
#R-squared: 0.2576
#相關達顯著,每日最高溫預測能解釋本土確診數的25%變異