1 load packages

library('lattice')
library('ggplot2')
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.4

2 input data

dta <- read.table("C:/Users/pc/Desktop/ch412.txt", h=T)

3 Descriptive Statistics

head(dta) #6 rows(本資料刪除第七個觀察值)
##    X  Y
## 1  5 63
## 2  8 67
## 3 11 74
## 4  7 64
## 5 13 75
## 6 12 69
summary(dta)
##        X                Y        
##  Min.   : 5.000   Min.   :60.00  
##  1st Qu.: 6.500   1st Qu.:63.50  
##  Median : 8.000   Median :67.00  
##  Mean   : 8.857   Mean   :67.43  
##  3rd Qu.:11.500   3rd Qu.:71.50  
##  Max.   :13.000   Max.   :75.00
stat.desc(dta, basic = TRUE, desc=TRUE, norm=FALSE, p=0.95)
##                       X            Y
## nbr.val       7.0000000   7.00000000
## nbr.null      0.0000000   0.00000000
## nbr.na        0.0000000   0.00000000
## min           5.0000000  60.00000000
## max          13.0000000  75.00000000
## range         8.0000000  15.00000000
## sum          62.0000000 472.00000000
## median        8.0000000  67.00000000
## mean          8.8571429  67.42857143
## SE.mean       1.1837908   2.12532511
## CI.mean.0.95  2.8966316   5.20048319
## var           9.8095238  31.61904762
## std.dev       3.1320159   5.62308168
## coef.var      0.3536147   0.08339316

4 X,Y Correlation

mydata <- dta[, c(1,2)]#資料的2、3欄求相關係數(X、Y)

head(mydata, 6)#檢視資料前6行
##    X  Y
## 1  5 63
## 2  8 67
## 3 11 74
## 4  7 64
## 5 13 75
## 6 12 69
res <- cor(mydata)

round(res, 4)#保留四位小數
##        X      Y
## X 1.0000 0.9031
## Y 0.9031 1.0000

5 Scatter diagram

5.1 Y ~ X

xyplot(Y ~ X, data=dta,
       ylab="Y", 
       xlab="X",
       type=c("p", "g", "r"))

6 linear models

model <- lm(formula= Y ~ X, data=dta)
summary(model)
## 
## Call:
## lm(formula = Y ~ X, data = dta)
## 
## Residuals:
##       1       2       3       4       5       6       7 
##  1.8252  0.9612  3.0971 -0.4175  0.8544 -3.5243 -2.7961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  53.0680     3.2136  16.514 1.49e-05 ***
## X             1.6214     0.3448   4.702  0.00533 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.645 on 5 degrees of freedom
## Multiple R-squared:  0.8156, Adjusted R-squared:  0.7787 
## F-statistic: 22.11 on 1 and 5 DF,  p-value: 0.005327
resid(model)
##          1          2          3          4          5          6          7 
##  1.8252427  0.9611650  3.0970874 -0.4174757  0.8543689 -3.5242718 -2.7961165

7 Data visualisation

ggplot(data = dta, aes(x=X))+
  geom_smooth(aes(y=Y), method = 'lm')+
  geom_point(aes(y=Y))
## `geom_smooth()` using formula 'y ~ x'

8 The end