底下模擬鱷魚的長度與重量資料。
alligator = data.frame(
lnLength = c(3.87, 3.61, 4.33, 3.43, 3.81, 3.83, 3.46, 3.76, 3.50, 3.58, 4.19, 3.78, 3.71, 3.73, 3.78),
lnWeight = c(4.87, 3.93, 6.46, 3.33, 4.38, 4.70, 3.50, 4.50, 3.58, 3.64, 5.90, 4.43, 4.38, 4.42, 4.25)
)
plot(
alligator$lnLength,
alligator$lnWeight,
xlab = "Snout vent length (inches) on log scale",
ylab = "Weight (pounds) on log scale",
main = "Alligators in Central Florida"
)
# the prototype of lm
# subset: 選擇特徵的向量
# weights: 特徵的加權值
# lm(formula, data, subset, weights, na.action, ...)
set.seed(123)
training_idx <- sample(1:nrow(alligator), 10)
training_data <- alligator[training_idx,]
testing_data <- alligator[-training_idx,]
model = lm(lnWeight ~ lnLength, data = training_data)
summary(model)
##
## Call:
## lm(formula = lnWeight ~ lnLength, data = training_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22597 -0.03654 0.05600 0.07890 0.11871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.7292 0.6716 -13.00 1.16e-06 ***
## lnLength 3.4934 0.1741 20.07 3.97e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1299 on 8 degrees of freedom
## Multiple R-squared: 0.9805, Adjusted R-squared: 0.9781
## F-statistic: 402.7 on 1 and 8 DF, p-value: 3.966e-08
透過函式 summary 可以列出線性迴歸的相關資訊,包含 F-test, \(R^2\) 值, 與殘差分析等。
plot(
training_data$lnLength,
training_data$lnWeight,
xlab = "Snout vent length (inches) on log scale",
ylab = "Weight (pounds) on log scale",
main = "Alligators in Central Florida"
)
abline(model)
# 取得迴歸係數
beta = coef(model)
beta
## (Intercept) lnLength
## -8.729234 3.493439
由上可得迴歸方程式為
\[y=3.431098*x-8.476067\]
# 建立迴歸方程式
y1 <- function(x1) {
return(x1 * beta[2] + beta[1])
}
# 建立測試資料
new_length_data <- matrix(c(4.0, 4.1), nrow=1)
new_weight_data <- apply(new_length_data, 1, y1)
# 將新產生資料進行繪圖
plot(
training_data$lnLength,
training_data$lnWeight,
xlab = "Snout vent length (inches) on log scale",
ylab = "Weight (pounds) on log scale",
main = "Alligators in Central Florida"
)
abline(model)
abline(v = 4.0, col="red", lty=2)
abline(v = 4.1, col="red", lty=2)
points(new_length_data, new_weight_data, col="red")
lm.pred <- predict(model, testing_data)
data.frame(true=testing_data$lnWeight, pred=lm.pred)
## true pred
## 2 3.93 3.882081
## 7 3.50 3.358065
## 9 3.58 3.497803
## 10 3.64 3.777278
## 13 4.38 4.231425
# type 為殘差計算方式,有 working, response, deviance, pearson, partial 等
residuals(model, type="working")
## 5 12 6 11 14
## -0.200768996 -0.045965822 0.049362222 -0.008275862 0.118706134
## 1 15 8 4 3
## 0.079624657 -0.225965822 0.093902960 0.076737870 0.062642661