load packages
library('lattice')
library('ggplot2')
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.4
## Warning: package 'Sleuth3' was built under R version 4.0.4
Descriptive Statistics
head(dta) #6 rows(本資料將X平方後匯入原資料第三欄)
## X Y Xsqu
## 1 12 130 144
## 2 12 129 144
## 3 16 128 256
## 4 16 140 256
## 5 16 143 256
## 6 20 144 400
## X Y Xsqu
## Min. :12 Min. :128.0 Min. :144.0
## 1st Qu.:16 1st Qu.:134.0 1st Qu.:256.0
## Median :20 Median :143.0 Median :400.0
## Mean :20 Mean :139.5 Mean :426.2
## 3rd Qu.:23 3rd Qu.:145.0 3rd Qu.:529.0
## Max. :28 Max. :148.0 Max. :784.0
stat.desc(dta, basic = TRUE, desc=TRUE, norm=FALSE, p=0.95)
## X Y Xsqu
## nbr.val 13.0000000 1.300000e+01 1.300000e+01
## nbr.null 0.0000000 0.000000e+00 0.000000e+00
## nbr.na 0.0000000 0.000000e+00 0.000000e+00
## min 12.0000000 1.280000e+02 1.440000e+02
## max 28.0000000 1.480000e+02 7.840000e+02
## range 16.0000000 2.000000e+01 6.400000e+02
## sum 260.0000000 1.813000e+03 5.540000e+03
## median 20.0000000 1.430000e+02 4.000000e+02
## mean 20.0000000 1.394615e+02 4.261538e+02
## SE.mean 1.4763086 2.021092e+00 5.915341e+01
## CI.mean.0.95 3.2166002 4.403581e+00 1.288842e+02
## var 28.3333333 5.310256e+01 4.548864e+04
## std.dev 5.3229065 7.287151e+00 2.132807e+02
## coef.var 0.2661453 5.225205e-02 5.004781e-01
X,Y Correlation
mydata <- dta[, c(1,2)]#資料的2、3欄求相關係數(X、Y)
head(mydata, 6)#檢視資料前6行
## X Y
## 1 12 130
## 2 12 129
## 3 16 128
## 4 16 140
## 5 16 143
## 6 20 144
res <- cor(mydata)
round(res, 4)#保留四位小數
## X Y
## X 1.0000 0.4168
## Y 0.4168 1.0000
Scatter diagram
Y ~ X
xyplot(Y ~ X, data=dta,
ylab="Y",
xlab="X",
type=c("p", "g", "r"))

linear models
model <- lm(formula= Y ~ X, data=dta)
summary(model)
##
## Call:
## lm(formula = Y ~ X, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.026 -5.897 3.827 4.827 6.827
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 128.0498 7.7457 16.532 4.08e-09 ***
## X 0.5706 0.3752 1.521 0.157
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.919 on 11 degrees of freedom
## Multiple R-squared: 0.1737, Adjusted R-squared: 0.09859
## F-statistic: 2.313 on 1 and 11 DF, p-value: 0.1565
## 1 2 3 4 5 6 7
## -4.896833 -5.896833 -9.179186 2.820814 5.820814 4.538462 4.538462
## 8 9 10 11 12 13
## 3.826697 5.826697 4.826697 6.826697 -10.026244 -9.026244
Data visualisation
ggplot(data = dta, aes(x=X))+
geom_smooth(aes(y=Y), method = 'lm')+
geom_point(aes(y=Y))
## `geom_smooth()` using formula 'y ~ x'

Multiple Linear Regression
fit <- lm(Y ~ X + Xsqu, data=dta)
summary(fit) # show results
##
## Call:
## lm(formula = Y ~ X + Xsqu, data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.4263 -0.7363 0.5737 2.3374 3.5737
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.64815 17.48155 3.069 0.011864 *
## X 8.58805 1.82804 4.698 0.000844 ***
## Xsqu -0.20168 0.04562 -4.421 0.001293 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.222 on 10 degrees of freedom
## Multiple R-squared: 0.7203, Adjusted R-squared: 0.6644
## F-statistic: 12.88 on 2 and 10 DF, p-value: 0.001712
## Analysis of Variance Table
##
## Model 1: Y ~ X
## Model 2: Y ~ X + Xsqu
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 11 526.54
## 2 10 178.23 1 348.3 19.542 0.001293 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Data visualisation
ggplot(dta, aes(X, Y)) +
stat_smooth(method="glm",
formula=y ~ x,
method.args=list(family=poisson),
size=rel(.5)) +
geom_point(pch=20, alpha=.5) +
labs(y="Y",
x="X") +
theme_minimal()+
theme(legend.position = c(.95, .9))

ggplot(dta, aes(X, Y)) +
geom_point(pch = 20) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 3)) +
labs(x = "X", y = "Y")

The end