load packages
library('lattice')
library('ggplot2')
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.4
Descriptive Statistics
head(dta) #6 rows(本資料在求得迴歸式後再重新匯入,故包含residual、y-hat)
## res X Y y.hat
## 1 2.6666667 5 63 60.3335
## 2 -0.3333333 8 67 67.3334
## 3 -0.3333333 11 74 74.3333
## 4 -1.0000000 7 64 65.0001
## 5 -4.0000000 13 75 78.9999
## 6 -7.6666667 12 69 76.6666
## res X Y y.hat
## Min. :-7.6667 Min. : 5.00 Min. :60.00 Min. :60.33
## 1st Qu.:-3.0000 1st Qu.: 6.75 1st Qu.:63.75 1st Qu.:64.42
## Median :-0.6667 Median : 9.50 Median :68.00 Median :70.83
## Mean : 0.0000 Mean : 9.25 Mean :70.25 Mean :70.25
## 3rd Qu.: 0.4167 3rd Qu.:12.00 3rd Qu.:74.25 3rd Qu.:76.67
## Max. :13.3333 Max. :13.00 Max. :90.00 Max. :79.00
stat.desc(dta, basic = TRUE, desc=TRUE, norm=FALSE, p=0.95)
## res X Y y.hat
## nbr.val 8.000000e+00 8.0000000 8.0000000 8.000000
## nbr.null 0.000000e+00 0.0000000 0.0000000 0.000000
## nbr.na 0.000000e+00 0.0000000 0.0000000 0.000000
## min -7.666667e+00 5.0000000 60.0000000 60.333500
## max 1.333333e+01 13.0000000 90.0000000 78.999900
## range 2.100000e+01 8.0000000 30.0000000 18.666400
## sum -7.771561e-16 74.0000000 562.0000000 562.000200
## median -6.666667e-01 9.5000000 68.0000000 70.833350
## mean -9.714451e-17 9.2500000 70.2500000 70.250025
## SE.mean 2.187628e+00 1.0978876 3.3687111 2.561701
## CI.mean.0.95 5.172917e+00 2.5960916 7.9657359 6.057461
## var 3.828571e+01 9.6428571 90.7857143 52.498500
## std.dev 6.187545e+00 3.1052950 9.5281538 7.245585
## coef.var -6.369423e+16 0.3357076 0.1356321 0.103140
X,Y Correlation
mydata <- dta[, c(2,3)]#資料的2、3欄求相關係數(X、Y)
head(mydata, 6)#檢視資料前6行
## X Y
## 1 5 63
## 2 8 67
## 3 11 74
## 4 7 64
## 5 13 75
## 6 12 69
res <- cor(mydata)
round(res, 4)#保留四位小數
## X Y
## X 1.0000 0.7605
## Y 0.7605 1.0000
Scatter diagram
Y ~ X
xyplot(Y ~ X, data=dta,
ylab="Y",
xlab="X",
type=c("p", "g", "r"))

res ~ X
xyplot(res ~ X, data=dta,
ylab="resdual",
xlab="X",
type=c("p", "g", "r"))

res ~ y.hat
xyplot(res ~ y.hat, data=dta,
ylab="res",
xlab="y.hat",
type=c("p", "g", "r"))

linear models
model <- lm(formula= Y ~ X, data=dta)
summary(model)
##
## Call:
## lm(formula = Y ~ X, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6667 -3.0000 -0.6667 0.4167 13.3333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.6667 7.8869 6.171 0.000832 ***
## X 2.3333 0.8135 2.868 0.028487 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.683 on 6 degrees of freedom
## Multiple R-squared: 0.5783, Adjusted R-squared: 0.508
## F-statistic: 8.228 on 1 and 6 DF, p-value: 0.02849
resid(model)#把此步驟求得的residual匯回原始資料
## 1 2 3 4 5 6 7
## 2.6666667 -0.3333333 -0.3333333 -1.0000000 -4.0000000 -7.6666667 13.3333333
## 8
## -2.6666667
Data visualisation
ggplot(data = dta, aes(x=X))+
geom_smooth(aes(y=Y), method = 'lm')+
geom_point(aes(y=Y))
## `geom_smooth()` using formula 'y ~ x'

The end