1 load packages

library('lattice')
library('ggplot2')
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.4
library('Sleuth3')
## Warning: package 'Sleuth3' was built under R version 4.0.4

2 input data

dta <- read.table("C:/Users/pc/Desktop/ch421.txt", h=T)

3 Descriptive Statistics

head(dta) #6 rows(本資料將X平方後匯入原資料第三欄)
##    X   Y Xsqu
## 1 12 130  144
## 2 12 129  144
## 3 16 128  256
## 4 16 140  256
## 5 16 143  256
## 6 20 144  400
summary(dta)
##        X            Y              Xsqu      
##  Min.   :12   Min.   :128.0   Min.   :144.0  
##  1st Qu.:16   1st Qu.:134.0   1st Qu.:256.0  
##  Median :20   Median :143.0   Median :400.0  
##  Mean   :20   Mean   :139.5   Mean   :426.2  
##  3rd Qu.:23   3rd Qu.:145.0   3rd Qu.:529.0  
##  Max.   :28   Max.   :148.0   Max.   :784.0
stat.desc(dta, basic = TRUE, desc=TRUE, norm=FALSE, p=0.95)
##                        X            Y         Xsqu
## nbr.val       13.0000000 1.300000e+01 1.300000e+01
## nbr.null       0.0000000 0.000000e+00 0.000000e+00
## nbr.na         0.0000000 0.000000e+00 0.000000e+00
## min           12.0000000 1.280000e+02 1.440000e+02
## max           28.0000000 1.480000e+02 7.840000e+02
## range         16.0000000 2.000000e+01 6.400000e+02
## sum          260.0000000 1.813000e+03 5.540000e+03
## median        20.0000000 1.430000e+02 4.000000e+02
## mean          20.0000000 1.394615e+02 4.261538e+02
## SE.mean        1.4763086 2.021092e+00 5.915341e+01
## CI.mean.0.95   3.2166002 4.403581e+00 1.288842e+02
## var           28.3333333 5.310256e+01 4.548864e+04
## std.dev        5.3229065 7.287151e+00 2.132807e+02
## coef.var       0.2661453 5.225205e-02 5.004781e-01

4 X,Y Correlation

mydata <- dta[, c(1,2)]#資料的2、3欄求相關係數(X、Y)

head(mydata, 6)#檢視資料前6行
##    X   Y
## 1 12 130
## 2 12 129
## 3 16 128
## 4 16 140
## 5 16 143
## 6 20 144
res <- cor(mydata)

round(res, 4)#保留四位小數
##        X      Y
## X 1.0000 0.4168
## Y 0.4168 1.0000

5 Scatter diagram

5.1 Y ~ X

xyplot(Y ~ X, data=dta,
       ylab="Y", 
       xlab="X",
       type=c("p", "g", "r"))

6 linear models

model <- lm(formula= Y ~ X, data=dta)
summary(model)
## 
## Call:
## lm(formula = Y ~ X, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.026  -5.897   3.827   4.827   6.827 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 128.0498     7.7457  16.532 4.08e-09 ***
## X             0.5706     0.3752   1.521    0.157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.919 on 11 degrees of freedom
## Multiple R-squared:  0.1737, Adjusted R-squared:  0.09859 
## F-statistic: 2.313 on 1 and 11 DF,  p-value: 0.1565
resid(model)
##          1          2          3          4          5          6          7 
##  -4.896833  -5.896833  -9.179186   2.820814   5.820814   4.538462   4.538462 
##          8          9         10         11         12         13 
##   3.826697   5.826697   4.826697   6.826697 -10.026244  -9.026244

7 Data visualisation

ggplot(data = dta, aes(x=X))+
  geom_smooth(aes(y=Y), method = 'lm')+
  geom_point(aes(y=Y))
## `geom_smooth()` using formula 'y ~ x'

8 Multiple Linear Regression

fit <- lm(Y ~ X + Xsqu, data=dta)
summary(fit) # show results
## 
## Call:
## lm(formula = Y ~ X + Xsqu, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4263  -0.7363   0.5737   2.3374   3.5737 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 53.64815   17.48155   3.069 0.011864 *  
## X            8.58805    1.82804   4.698 0.000844 ***
## Xsqu        -0.20168    0.04562  -4.421 0.001293 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.222 on 10 degrees of freedom
## Multiple R-squared:  0.7203, Adjusted R-squared:  0.6644 
## F-statistic: 12.88 on 2 and 10 DF,  p-value: 0.001712
anova(model, fit)
## Analysis of Variance Table
## 
## Model 1: Y ~ X
## Model 2: Y ~ X + Xsqu
##   Res.Df    RSS Df Sum of Sq      F   Pr(>F)   
## 1     11 526.54                                
## 2     10 178.23  1     348.3 19.542 0.001293 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9 Data visualisation

ggplot(dta, aes(X, Y)) +
 stat_smooth(method="glm",
             formula=y ~ x,
             method.args=list(family=poisson),
             size=rel(.5)) +
 geom_point(pch=20, alpha=.5) +
 labs(y="Y", 
      x="X") +
 theme_minimal()+
 theme(legend.position = c(.95, .9))

ggplot(dta, aes(X, Y)) +
 geom_point(pch = 20) +
 stat_smooth(method = "gam", formula = y ~ s(x, k = 3)) +
 labs(x = "X", y = "Y")

10 The end