carsデータ

散布図

plot(cars$speed,cars$dist)

speedを独立変数,distを従属変数とする回帰分析を実行する.

lm_cars<-lm(dist ~ speed, data=cars)
summary(lm_cars)
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## speed         3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12

結果をまとめる.

\[dist=-17.58+3.93*speed,\ \ \ \ \ \ \ \ R^2=0.651\]

回帰直線をプロット.

plot(cars$speed,cars$dist,col="lightgrey")
abline(coef=coef(lm_cars),col="red",lwd=2)

最適化optim関数を使って,自力で最小二乗法を実行する.

sumsq<-function(coef) sum((cars$dist-coef[[1]]-coef[[2]]*cars$speed)^2)
optim(c(0,0),sumsq)
## $par
## [1] -17.571729   3.931832
## 
## $value
## [1] 11353.52
## 
## $counts
## function gradient 
##       91       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

分散共分散から係数を求める.

cov_cars<-cov(cars)
mean_speed<-mean(cars$speed)
mean_dist<-mean(cars$dist)

c(mean_dist-cov_cars[1,2]*mean_speed/cov_cars[1,1],
  cov_cars[1,2]/cov_cars[1,1])
## [1] -17.579095   3.932409

回帰直線が平均の組を通ることの確認.

plot(cars$speed,cars$dist)
lm_cars<-lm(dist ~ speed, data=cars)
abline(coef=coef(lm_cars),col="red",lwd=2)
points(mean_speed,mean_dist,pch=19,col="blue")
segments(mean_speed,0,mean_speed,mean_dist,col="blue",lty=2,lwd=2)
segments(0,mean_dist,mean_speed,mean_dist,col="blue",lty=2,lwd=2)

2乗項を加えた回帰分析.

lm_sq_cars<-lm(dist ~ speed+I(speed^2), data=cars)
summary(lm_sq_cars)
## 
## Call:
## lm(formula = dist ~ speed + I(speed^2), data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.720  -9.184  -3.188   4.628  45.152 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  2.47014   14.81716   0.167    0.868
## speed        0.91329    2.03422   0.449    0.656
## I(speed^2)   0.09996    0.06597   1.515    0.136
## 
## Residual standard error: 15.18 on 47 degrees of freedom
## Multiple R-squared:  0.6673, Adjusted R-squared:  0.6532 
## F-statistic: 47.14 on 2 and 47 DF,  p-value: 5.852e-12
predict_sq_cars<-predict(lm_sq_cars,data.frame(speed=cars$speed),se.fit=FALSE)
plot(cars$speed,cars$dist)
points(cars$speed,predict_sq_cars,type="l",col="red",lwd=2)

\[dist=2.47+0.91*speed+0.10*speed^2,\ \ \ \ \ \ \ \ Adj\ R^2=0.653\]

仮想データ

x<-c(1,1,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,8,8)
y<-c(3,5,4,6,4,5,7,4,6,8,5,6,8,7,8,6,6,9,8,10)

summary(lm(y ~ x))
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.938 -1.097 -0.250  1.221  2.062 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.4438     0.6929   4.970 9.91e-05 ***
## x             0.6236     0.1394   4.473 0.000294 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.315 on 18 degrees of freedom
## Multiple R-squared:  0.5264, Adjusted R-squared:  0.5001 
## F-statistic: 20.01 on 1 and 18 DF,  p-value: 0.0002942
plot(x,y)
abline(coef=coef(lm(y ~ x)),col="red",lwd=2)

\[y=3.44+0.62*x,\ \ \ \ \ \ \ \ R^2=0.526\]