準備資料

底下模擬鱷魚的長度與重量資料。

alligator = data.frame(
  lnLength = c(3.87, 3.61, 4.33, 3.43, 3.81, 3.83, 3.46, 3.76, 3.50, 3.58, 4.19, 3.78, 3.71, 3.73, 3.78),
  lnWeight = c(4.87, 3.93, 6.46, 3.33, 4.38, 4.70, 3.50, 4.50, 3.58, 3.64, 5.90, 4.43, 4.38, 4.42, 4.25)
)

plot(
  alligator$lnLength, 
  alligator$lnWeight,
  xlab = "Snout vent length (inches) on log scale",
  ylab = "Weight (pounds) on log scale",
  main = "Alligators in Central Florida"
)

建立簡單線性迴歸模型

# the prototype of lm
# subset: 選擇特徵的向量
# weights: 特徵的加權值
# lm(formula, data, subset, weights, na.action, ...)
set.seed(123)
training_idx <- sample(1:nrow(alligator), 10)
training_data <- alligator[training_idx,]
testing_data <- alligator[-training_idx,]
model = lm(lnWeight ~ lnLength, data = training_data)
summary(model)
## 
## Call:
## lm(formula = lnWeight ~ lnLength, data = training_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22597 -0.03654  0.05600  0.07890  0.11871 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -8.7292     0.6716  -13.00 1.16e-06 ***
## lnLength      3.4934     0.1741   20.07 3.97e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1299 on 8 degrees of freedom
## Multiple R-squared:  0.9805, Adjusted R-squared:  0.9781 
## F-statistic: 402.7 on 1 and 8 DF,  p-value: 3.966e-08

透過函式 summary 可以列出線性迴歸的相關資訊,包含 F-test, \(R^2\) 值, 與殘差分析等。

繪圖檢視迴歸結果

plot(
  training_data$lnLength, 
  training_data$lnWeight,
  xlab = "Snout vent length (inches) on log scale",
  ylab = "Weight (pounds) on log scale",
  main = "Alligators in Central Florida"
)
abline(model)

自建立方程式並用於迴歸預測

# 取得迴歸係數
beta = coef(model)
beta
## (Intercept)    lnLength 
##   -8.729234    3.493439

由上可得迴歸方程式為

\[y=3.431098*x-8.476067\]

# 建立迴歸方程式
y1 <- function(x1) {
  return(x1 * beta[2] + beta[1])
}

# 建立測試資料
new_length_data <- matrix(c(4.0, 4.1), nrow=1)
new_weight_data <- apply(new_length_data, 1, y1)

# 將新產生資料進行繪圖
plot(
  training_data$lnLength, 
  training_data$lnWeight,
  xlab = "Snout vent length (inches) on log scale",
  ylab = "Weight (pounds) on log scale",
  main = "Alligators in Central Florida"
)
abline(model)
abline(v = 4.0, col="red", lty=2)
abline(v = 4.1, col="red", lty=2)
points(new_length_data, new_weight_data, col="red")

透過內建函式預測結果

lm.pred <- predict(model, testing_data)
data.frame(true=testing_data$lnWeight, pred=lm.pred)
##    true     pred
## 2  3.93 3.882081
## 7  3.50 3.358065
## 9  3.58 3.497803
## 10 3.64 3.777278
## 13 4.38 4.231425

計算殘差

# type 為殘差計算方式,有 working, response, deviance, pearson, partial 等
residuals(model, type="working")
##            5           12            6           11           14 
## -0.200768996 -0.045965822  0.049362222 -0.008275862  0.118706134 
##            1           15            8            4            3 
##  0.079624657 -0.225965822  0.093902960  0.076737870  0.062642661