散布図
plot(cars$speed,cars$dist)
speed
を独立変数,dist
を従属変数とする回帰分析を実行する.
lm_cars<-lm(dist ~ speed, data=cars)
summary(lm_cars)
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
結果をまとめる.
\[dist=-17.58+3.93*speed,\ \ \ \ \ \ \ \ R^2=0.651\]
回帰直線をプロット.
plot(cars$speed,cars$dist,col="lightgrey")
abline(coef=coef(lm_cars),col="red",lwd=2)
最適化optim
関数を使って,自力で最小二乗法を実行する.
sumsq<-function(coef) sum((cars$dist-coef[[1]]-coef[[2]]*cars$speed)^2)
optim(c(0,0),sumsq)
## $par
## [1] -17.571729 3.931832
##
## $value
## [1] 11353.52
##
## $counts
## function gradient
## 91 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
分散共分散から係数を求める.
cov_cars<-cov(cars)
mean_speed<-mean(cars$speed)
mean_dist<-mean(cars$dist)
c(mean_dist-cov_cars[1,2]*mean_speed/cov_cars[1,1],
cov_cars[1,2]/cov_cars[1,1])
## [1] -17.579095 3.932409
回帰直線が平均の組を通ることの確認.
plot(cars$speed,cars$dist)
lm_cars<-lm(dist ~ speed, data=cars)
abline(coef=coef(lm_cars),col="red",lwd=2)
points(mean_speed,mean_dist,pch=19,col="blue")
segments(mean_speed,0,mean_speed,mean_dist,col="blue",lty=2,lwd=2)
segments(0,mean_dist,mean_speed,mean_dist,col="blue",lty=2,lwd=2)
2乗項を加えた回帰分析.
lm_sq_cars<-lm(dist ~ speed+I(speed^2), data=cars)
summary(lm_sq_cars)
##
## Call:
## lm(formula = dist ~ speed + I(speed^2), data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.720 -9.184 -3.188 4.628 45.152
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.47014 14.81716 0.167 0.868
## speed 0.91329 2.03422 0.449 0.656
## I(speed^2) 0.09996 0.06597 1.515 0.136
##
## Residual standard error: 15.18 on 47 degrees of freedom
## Multiple R-squared: 0.6673, Adjusted R-squared: 0.6532
## F-statistic: 47.14 on 2 and 47 DF, p-value: 5.852e-12
predict_sq_cars<-predict(lm_sq_cars,data.frame(speed=cars$speed),se.fit=FALSE)
plot(cars$speed,cars$dist)
points(cars$speed,predict_sq_cars,type="l",col="red",lwd=2)
\[dist=2.47+0.91*speed+0.10*speed^2,\ \ \ \ \ \ \ \ Adj\ R^2=0.653\]
x<-c(1,1,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,8,8)
y<-c(3,5,4,6,4,5,7,4,6,8,5,6,8,7,8,6,6,9,8,10)
summary(lm(y ~ x))
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.938 -1.097 -0.250 1.221 2.062
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.4438 0.6929 4.970 9.91e-05 ***
## x 0.6236 0.1394 4.473 0.000294 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.315 on 18 degrees of freedom
## Multiple R-squared: 0.5264, Adjusted R-squared: 0.5001
## F-statistic: 20.01 on 1 and 18 DF, p-value: 0.0002942
plot(x,y)
abline(coef=coef(lm(y ~ x)),col="red",lwd=2)
\[y=3.44+0.62*x,\ \ \ \ \ \ \ \ R^2=0.526\]