#chapter9 선형 회귀
#1 단순 선형 회귀(Simple Linear Regression)
#1.1 모델 생성
data(cars)
head(cars)
##   speed dist
## 1     4    2
## 2     4   10
## 3     7    4
## 4     7   22
## 5     8   16
## 6     9   10
m <- lm(dist~speed, cars)
m
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Coefficients:
## (Intercept)        speed  
##     -17.579        3.932
#1.2 선형회귀 결과 추출
coef(m)
## (Intercept)       speed 
##  -17.579095    3.932409
fitted(m)[1:4]
##         1         2         3         4 
## -1.849460 -1.849460  9.947766  9.947766
residuals(m)[1:4]
##         1         2         3         4 
##  3.849460 11.849460 -5.947766 12.052234
fitted(m)[1:4]+residuals(m)[1:4]
##  1  2  3  4 
##  2 10  4 22
confint(m)
##                  2.5 %    97.5 %
## (Intercept) -31.167850 -3.990340
## speed         3.096964  4.767853
deviance(m)
## [1] 11353.52
sum((cars$dist-predict(m,newdata=cars))^2)
## [1] 11353.52
#1.3 예측과 신뢰구간
m
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Coefficients:
## (Intercept)        speed  
##     -17.579        3.932
predict(m, newdata=data.frame(speed=3))
##         1 
## -5.781869
coef(m)
## (Intercept)       speed 
##  -17.579095    3.932409
-17.579095+3.932409*3
## [1] -5.781868
predict(m,newdata=data.frame(speed=c(3)),interval="confidence")
##         fit       lwr      upr
## 1 -5.781869 -17.02659 5.462853
predict(m, newdata=data.frame(speed=c(3)),interval="prediction")
##         fit       lwr      upr
## 1 -5.781869 -38.68565 27.12192
#1.4 모형 평가
summary(m)
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -29.069  -9.525  -2.272   9.215  43.201 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -17.5791     6.7584  -2.601   0.0123 *  
## speed         3.9324     0.4155   9.464 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438 
## F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
#1.5 ANOVA 및 모델간의 비교
anova(m)
## Analysis of Variance Table
## 
## Response: dist
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## speed      1  21186 21185.5  89.567 1.49e-12 ***
## Residuals 48  11354   236.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
full <- lm(dist~speed, data=cars)
reduced <- lm(dist~1, data=cars)
full
## 
## Call:
## lm(formula = dist ~ speed, data = cars)
## 
## Coefficients:
## (Intercept)        speed  
##     -17.579        3.932
reduced
## 
## Call:
## lm(formula = dist ~ 1, data = cars)
## 
## Coefficients:
## (Intercept)  
##       42.98
anova(reduced, full)
## Analysis of Variance Table
## 
## Model 1: dist ~ 1
## Model 2: dist ~ speed
##   Res.Df   RSS Df Sum of Sq      F   Pr(>F)    
## 1     49 32539                                 
## 2     48 11354  1     21186 89.567 1.49e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#1.6 모델 평가 차트
plot(m)

par(mfrow=c(1,2))
plot(m, which=c(4,6))

#1.7 회귀 직선의 시각화
plot(cars$speed, cars$dist)
abline(coef(m))

summary(cars$speed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     4.0    12.0    15.0    15.4    19.0    25.0
predict(m, newdata=data.frame(speed=seq(4.0,25.0,.2)),
        interval="confidence")
##            fit         lwr       upr
## 1   -1.8494599 -12.3295433  8.630624
## 2   -1.0629781 -11.3914503  9.265494
## 3   -0.2764964 -10.4538419  9.900849
## 4    0.5099854  -9.5167401 10.536711
## 5    1.2964672  -8.5801681 11.173102
## 6    2.0829489  -7.6441504 11.810048
## 7    2.8694307  -6.7087129 12.447574
## 8    3.6559124  -5.7738833 13.085708
## 9    4.4423942  -4.8396905 13.724479
## 10   5.2288759  -3.9061655 14.363917
## 11   6.0153577  -2.9733411 15.004056
## 12   6.8018394  -2.0412521 15.644931
## 13   7.5883212  -1.1099353 16.286578
## 14   8.3748029  -0.1794301 16.929036
## 15   9.1612847   0.7502220 17.572347
## 16   9.9477664   1.6789765 18.216556
## 17  10.7342482   2.6067864 18.861710
## 18  11.5207299   3.5336015 19.507858
## 19  12.3072117   4.4593685 20.155055
## 20  13.0936934   5.3840304 20.803356
## 21  13.8801752   6.3075269 21.452823
## 22  14.6666569   7.2297936 22.103520
## 23  15.4531387   8.1507617 22.755516
## 24  16.2396204   9.0703583 23.408883
## 25  17.0261022   9.9885055 24.063699
## 26  17.8125839  10.9051205 24.720047
## 27  18.5990657  11.8201149 25.378016
## 28  19.3855474  12.7333949 26.037700
## 29  20.1720292  13.6448606 26.699198
## 30  20.9585109  14.5544057 27.362616
## 31  21.7449927  15.4619173 28.028068
## 32  22.5314745  16.3672758 28.695673
## 33  23.3179562  17.2703541 29.365558
## 34  24.1044380  18.1710177 30.037858
## 35  24.8909197  19.0691247 30.712715
## 36  25.6774015  19.9645252 31.390278
## 37  26.4638832  20.8570613 32.070705
## 38  27.2503650  21.7465676 32.754162
## 39  28.0368467  22.6328708 33.440823
## 40  28.8233285  23.5157900 34.130867
## 41  29.6098102  24.3951377 34.824483
## 42  30.3962920  25.2707195 35.521864
## 43  31.1827737  26.1423359 36.223212
## 44  31.9692555  27.0097826 36.928728
## 45  32.7557372  27.8728522 37.638622
## 46  33.5422190  28.7313356 38.353102
## 47  34.3287007  29.5850240 39.072377
## 48  35.1151825  30.4337109 39.796654
## 49  35.9016642  31.2771944 40.526134
## 50  36.6881460  32.1152800 41.261012
## 51  37.4746277  32.9477831 42.001472
## 52  38.2611095  33.7745318 42.747687
## 53  39.0475912  34.5953700 43.499812
## 54  39.8340730  35.4101600 44.257986
## 55  40.6205547  36.2187852 45.022324
## 56  41.4070365  37.0211522 45.792921
## 57  42.1935182  37.8171927 46.569844
## 58  42.9800000  38.6068653 47.353135
## 59  43.7664818  39.3901562 48.142807
## 60  44.5529635  40.1670792 48.938848
## 61  45.3394453  40.9376757 49.741215
## 62  46.1259270  41.7020141 50.549840
## 63  46.9124088  42.4601875 51.364630
## 64  47.6988905  43.2123128 52.185468
## 65  48.4853723  43.9585276 53.012217
## 66  49.2718540  44.6989880 53.844720
## 67  50.0583358  45.4338660 54.682806
## 68  50.8448175  46.1633459 55.526289
## 69  51.6312993  46.8876225 56.374976
## 70  52.4177810  47.6068976 57.228664
## 71  53.2042628  48.3213777 58.087148
## 72  53.9907445  49.0312717 58.950217
## 73  54.7772263  49.7367885 59.817664
## 74  55.5637080  50.4381356 60.689280
## 75  56.3501898  51.1355172 61.564862
## 76  57.1366715  51.8291331 62.444210
## 77  57.9231533  52.5191774 63.327129
## 78  58.7096350  53.2058377 64.213432
## 79  59.4961168  53.8892949 65.102939
## 80  60.2825985  54.5697223 65.995475
## 81  61.0690803  55.2472853 66.890875
## 82  61.8555620  55.9221418 67.788982
## 83  62.6420438  56.5944417 68.689646
## 84  63.4285255  57.2643269 69.592724
## 85  64.2150073  57.9319319 70.498083
## 86  65.0014891  58.5973838 71.405594
## 87  65.7879708  59.2608022 72.315139
## 88  66.5744526  59.9223000 73.226605
## 89  67.3609343  60.5819835 74.139885
## 90  68.1474161  61.2399526 75.054880
## 91  68.9338978  61.8963012 75.971494
## 92  69.7203796  62.5511175 76.889642
## 93  70.5068613  63.2044844 77.809238
## 94  71.2933431  63.8564797 78.730206
## 95  72.0798248  64.5071766 79.652473
## 96  72.8663066  65.1566436 80.575970
## 97  73.6527883  65.8049451 81.500632
## 98  74.4392701  66.4521417 82.426398
## 99  75.2257518  67.0982901 83.353214
## 100 76.0122336  67.7434437 84.281023
## 101 76.7987153  68.3876526 85.209778
## 102 77.5851971  69.0309641 86.139430
## 103 78.3716788  69.6734223 87.069935
## 104 79.1581606  70.3150691 88.001252
## 105 79.9446423  70.9559435 88.933341
## 106 80.7311241  71.5960827 89.866166
speed <- seq(min(cars$speed),max(cars$speed), .1)
ys <- predict(m, newdata=data.frame(speed=speed), interval="confidence")
matplot(speed, ys, type='n')
matlines(speed,ys)

#2
#2.1 모델 생성 및 평가
m <- lm(Sepal.Length ~Sepal.Width + Petal.Length + Petal.Width, data=iris)
m
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width, 
##     data = iris)
## 
## Coefficients:
##  (Intercept)   Sepal.Width  Petal.Length   Petal.Width  
##       1.8560        0.6508        0.7091       -0.5565
summary(m)
## 
## Call:
## lm(formula = Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width, 
##     data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82816 -0.21989  0.01875  0.19709  0.84570 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.85600    0.25078   7.401 9.85e-12 ***
## Sepal.Width   0.65084    0.06665   9.765  < 2e-16 ***
## Petal.Length  0.70913    0.05672  12.502  < 2e-16 ***
## Petal.Width  -0.55648    0.12755  -4.363 2.41e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3145 on 146 degrees of freedom
## Multiple R-squared:  0.8586, Adjusted R-squared:  0.8557 
## F-statistic: 295.5 on 3 and 146 DF,  p-value: < 2.2e-16
#2.3 중선형회귀모형의 시각화
with(iris, plot(Sepal.Width, Sepal.Length, cex=.7, pch=as.numeric(Species)))

as.numeric(iris$Species)
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [71] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3
## [106] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
## [141] 3 3 3 3 3 3 3 3 3 3
legend("topright", levels(iris$Species), pch=1:3, bg="white")

levels(iris$Species)
## [1] "setosa"     "versicolor" "virginica"
m <- lm(Sepal.Length~Sepal.Width+Species, data=iris)
coef(m)
##       (Intercept)       Sepal.Width Speciesversicolor  Speciesvirginica 
##         2.2513932         0.8035609         1.4587431         1.9468166
abline(2.25, 0.80, lty=1)
abline(2.25 + 1.45, 0.80, lty=2)
abline(2.25 + 1.94, 0.80, lty=3)

with(iris, plot(Sepal.Width, Sepal.Length, cex=.7, pch=as.numeric(Species)))
m <- lm(Sepal.Length~Sepal.Width + Species, data=iris)
coef(m)
##       (Intercept)       Sepal.Width Speciesversicolor  Speciesvirginica 
##         2.2513932         0.8035609         1.4587431         1.9468166
abline(2.25, 0.80, lty=1)
abline(2.25 + 1.45, 0.80, lty=2)
abline(2.25 + 1.94, 0.80, lty=3)
legend("topright",levels(iris$Species),pch=1:3,bg="white")

#2.4 표현식을 위한 I()의 사용
x <- 1:1000
y <- x^2 + 3*x + 5 + rnorm(1000)
lm(y~I(x*2)+x)
## 
## Call:
## lm(formula = y ~ I(x * 2) + x)
## 
## Coefficients:
## (Intercept)     I(x * 2)            x  
##     -167162          502           NA
lm(y~x^2)
## 
## Call:
## lm(formula = y ~ x^2)
## 
## Coefficients:
## (Intercept)            x  
##     -167162         1004
x1 <- 1:1000
x2 <- 3 * x1
y <- 3* (x1+x2) + rnorm(1000)
lm(y~I(x1 + x2))
## 
## Call:
## lm(formula = y ~ I(x1 + x2))
## 
## Coefficients:
## (Intercept)   I(x1 + x2)  
##     0.04739      3.00000
lm(y~x1+x2)
## 
## Call:
## lm(formula = y ~ x1 + x2)
## 
## Coefficients:
## (Intercept)           x1           x2  
##     0.04739     12.00000           NA
#2.5 변수의 변환
x <- 101:200
y <- exp(3*x +rnorm(100))
lm(log(y)~x)
## 
## Call:
## lm(formula = log(y) ~ x)
## 
## Coefficients:
## (Intercept)            x  
##     -0.5382       3.0037
x <- 101:200
y <- log(x) + rnorm(100)
lm(y~log(x))
## 
## Call:
## lm(formula = y ~ log(x))
## 
## Coefficients:
## (Intercept)       log(x)  
##      0.5847       0.8880
#2.6 상호 작용
data(Orange)
Orange
##    Tree  age circumference
## 1     1  118            30
## 2     1  484            58
## 3     1  664            87
## 4     1 1004           115
## 5     1 1231           120
## 6     1 1372           142
## 7     1 1582           145
## 8     2  118            33
## 9     2  484            69
## 10    2  664           111
## 11    2 1004           156
## 12    2 1231           172
## 13    2 1372           203
## 14    2 1582           203
## 15    3  118            30
## 16    3  484            51
## 17    3  664            75
## 18    3 1004           108
## 19    3 1231           115
## 20    3 1372           139
## 21    3 1582           140
## 22    4  118            32
## 23    4  484            62
## 24    4  664           112
## 25    4 1004           167
## 26    4 1231           179
## 27    4 1372           209
## 28    4 1582           214
## 29    5  118            30
## 30    5  484            49
## 31    5  664            81
## 32    5 1004           125
## 33    5 1231           142
## 34    5 1372           174
## 35    5 1582           177
with(Orange, plot(Tree, circumference, xlab="tree", ylab="circumference"))

with(Orange, interaction.plot(age, Tree, circumference))

Orange[,"fTree"] <- factor(Orange[,"Tree"],ordered=FALSE)

m <- lm(circumference ~ fTree * age, data=Orange)
anova(m)
## Analysis of Variance Table
## 
## Response: circumference
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## fTree      4  11841    2960  27.2983 8.428e-09 ***
## age        1  93772   93772 864.7348 < 2.2e-16 ***
## fTree:age  4   4043    1011   9.3206 9.402e-05 ***
## Residuals 25   2711     108                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
head(model.matrix(m))
##   (Intercept) fTree1 fTree5 fTree2 fTree4  age fTree1:age fTree5:age
## 1           1      1      0      0      0  118        118          0
## 2           1      1      0      0      0  484        484          0
## 3           1      1      0      0      0  664        664          0
## 4           1      1      0      0      0 1004       1004          0
## 5           1      1      0      0      0 1231       1231          0
## 6           1      1      0      0      0 1372       1372          0
##   fTree2:age fTree4:age
## 1          0          0
## 2          0          0
## 3          0          0
## 4          0          0
## 5          0          0
## 6          0          0
#3 이상치
data(Orange)
m <- lm(circumference~age + I(age), data=Orange)
rstudent(m)
##             1             2             3             4             5 
##  6.372161e-05 -4.735583e-01 -5.474343e-02 -4.051405e-01 -1.250387e+00 
##             6             7             8             9            10 
## -9.461338e-01 -1.884773e+00  1.318100e-01 -3.258711e-03  9.737615e-01 
##            11            12            13            14            15 
##  1.359966e+00  9.960118e-01  1.744963e+00  7.283842e-01  6.372161e-05 
##            16            17            18            19            20 
## -7.773767e-01 -5.647282e-01 -7.042510e-01 -1.480911e+00 -1.080244e+00 
##            21            22            23            24            25 
## -2.143664e+00  8.788132e-02 -3.019190e-01  1.018008e+00  1.881656e+00 
##            26            27            28            29            30 
##  1.311188e+00  2.045052e+00  1.226404e+00  6.372161e-05 -8.652966e-01 
##            31            32            33            34            35 
## -3.087970e-01  1.696690e-02 -2.897485e-01  4.323381e-01 -4.040268e-01
data(Orange)
Orange <- rbind(Orange, data.frame(Tree=as.factor(c(6,6,6)),
                                   age=c(118,484,664),
                                   circumference=c(177,50,30)))
tail(Orange)
##    Tree  age circumference
## 33    5 1231           142
## 34    5 1372           174
## 35    5 1582           177
## 36    6  118           177
## 37    6  484            50
## 38    6  664            30
#4 변수 선택
#4.1 단계적 변수 선택
library(mlbench)
## Warning: package 'mlbench' was built under R version 3.3.3
data(BostonHousing)
m <- lm(medv~., data=BostonHousing)
m2 <- step(m, direction="both")
## Start:  AIC=1589.64
## medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + 
##     tax + ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - age      1      0.06 11079 1587.7
## - indus    1      2.52 11081 1587.8
## <none>                 11079 1589.6
## - chas     1    218.97 11298 1597.5
## - tax      1    242.26 11321 1598.6
## - crim     1    243.22 11322 1598.6
## - zn       1    257.49 11336 1599.3
## - b        1    270.63 11349 1599.8
## - rad      1    479.15 11558 1609.1
## - nox      1    487.16 11566 1609.4
## - ptratio  1   1194.23 12273 1639.4
## - dis      1   1232.41 12311 1641.0
## - rm       1   1871.32 12950 1666.6
## - lstat    1   2410.84 13490 1687.3
## 
## Step:  AIC=1587.65
## medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax + 
##     ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - indus    1      2.52 11081 1585.8
## <none>                 11079 1587.7
## + age      1      0.06 11079 1589.6
## - chas     1    219.91 11299 1595.6
## - tax      1    242.24 11321 1596.6
## - crim     1    243.20 11322 1596.6
## - zn       1    260.32 11339 1597.4
## - b        1    272.26 11351 1597.9
## - rad      1    481.09 11560 1607.2
## - nox      1    520.87 11600 1608.9
## - ptratio  1   1200.23 12279 1637.7
## - dis      1   1352.26 12431 1643.9
## - rm       1   1959.55 13038 1668.0
## - lstat    1   2718.88 13798 1696.7
## 
## Step:  AIC=1585.76
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio + 
##     b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## <none>                 11081 1585.8
## + indus    1      2.52 11079 1587.7
## + age      1      0.06 11081 1587.8
## - chas     1    227.21 11309 1594.0
## - crim     1    245.37 11327 1594.8
## - zn       1    257.82 11339 1595.4
## - b        1    270.82 11352 1596.0
## - tax      1    273.62 11355 1596.1
## - rad      1    500.92 11582 1606.1
## - nox      1    541.91 11623 1607.9
## - ptratio  1   1206.45 12288 1636.0
## - dis      1   1448.94 12530 1645.9
## - rm       1   1963.66 13045 1666.3
## - lstat    1   2723.48 13805 1695.0
formula(m2)
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio + 
##     b + lstat
step(m,direction="both")
## Start:  AIC=1589.64
## medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + 
##     tax + ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - age      1      0.06 11079 1587.7
## - indus    1      2.52 11081 1587.8
## <none>                 11079 1589.6
## - chas     1    218.97 11298 1597.5
## - tax      1    242.26 11321 1598.6
## - crim     1    243.22 11322 1598.6
## - zn       1    257.49 11336 1599.3
## - b        1    270.63 11349 1599.8
## - rad      1    479.15 11558 1609.1
## - nox      1    487.16 11566 1609.4
## - ptratio  1   1194.23 12273 1639.4
## - dis      1   1232.41 12311 1641.0
## - rm       1   1871.32 12950 1666.6
## - lstat    1   2410.84 13490 1687.3
## 
## Step:  AIC=1587.65
## medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax + 
##     ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - indus    1      2.52 11081 1585.8
## <none>                 11079 1587.7
## + age      1      0.06 11079 1589.6
## - chas     1    219.91 11299 1595.6
## - tax      1    242.24 11321 1596.6
## - crim     1    243.20 11322 1596.6
## - zn       1    260.32 11339 1597.4
## - b        1    272.26 11351 1597.9
## - rad      1    481.09 11560 1607.2
## - nox      1    520.87 11600 1608.9
## - ptratio  1   1200.23 12279 1637.7
## - dis      1   1352.26 12431 1643.9
## - rm       1   1959.55 13038 1668.0
## - lstat    1   2718.88 13798 1696.7
## 
## Step:  AIC=1585.76
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio + 
##     b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## <none>                 11081 1585.8
## + indus    1      2.52 11079 1587.7
## + age      1      0.06 11081 1587.8
## - chas     1    227.21 11309 1594.0
## - crim     1    245.37 11327 1594.8
## - zn       1    257.82 11339 1595.4
## - b        1    270.82 11352 1596.0
## - tax      1    273.62 11355 1596.1
## - rad      1    500.92 11582 1606.1
## - nox      1    541.91 11623 1607.9
## - ptratio  1   1206.45 12288 1636.0
## - dis      1   1448.94 12530 1645.9
## - rm       1   1963.66 13045 1666.3
## - lstat    1   2723.48 13805 1695.0
## 
## Call:
## lm(formula = medv ~ crim + zn + chas + nox + rm + dis + rad + 
##     tax + ptratio + b + lstat, data = BostonHousing)
## 
## Coefficients:
## (Intercept)         crim           zn        chas1          nox  
##   36.341145    -0.108413     0.045845     2.718716   -17.376023  
##          rm          dis          rad          tax      ptratio  
##    3.801579    -1.492711     0.299608    -0.011778    -0.946525  
##           b        lstat  
##    0.009291    -0.522553
m2 <- step(m, direction="both")
## Start:  AIC=1589.64
## medv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + 
##     tax + ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - age      1      0.06 11079 1587.7
## - indus    1      2.52 11081 1587.8
## <none>                 11079 1589.6
## - chas     1    218.97 11298 1597.5
## - tax      1    242.26 11321 1598.6
## - crim     1    243.22 11322 1598.6
## - zn       1    257.49 11336 1599.3
## - b        1    270.63 11349 1599.8
## - rad      1    479.15 11558 1609.1
## - nox      1    487.16 11566 1609.4
## - ptratio  1   1194.23 12273 1639.4
## - dis      1   1232.41 12311 1641.0
## - rm       1   1871.32 12950 1666.6
## - lstat    1   2410.84 13490 1687.3
## 
## Step:  AIC=1587.65
## medv ~ crim + zn + indus + chas + nox + rm + dis + rad + tax + 
##     ptratio + b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## - indus    1      2.52 11081 1585.8
## <none>                 11079 1587.7
## + age      1      0.06 11079 1589.6
## - chas     1    219.91 11299 1595.6
## - tax      1    242.24 11321 1596.6
## - crim     1    243.20 11322 1596.6
## - zn       1    260.32 11339 1597.4
## - b        1    272.26 11351 1597.9
## - rad      1    481.09 11560 1607.2
## - nox      1    520.87 11600 1608.9
## - ptratio  1   1200.23 12279 1637.7
## - dis      1   1352.26 12431 1643.9
## - rm       1   1959.55 13038 1668.0
## - lstat    1   2718.88 13798 1696.7
## 
## Step:  AIC=1585.76
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio + 
##     b + lstat
## 
##           Df Sum of Sq   RSS    AIC
## <none>                 11081 1585.8
## + indus    1      2.52 11079 1587.7
## + age      1      0.06 11081 1587.8
## - chas     1    227.21 11309 1594.0
## - crim     1    245.37 11327 1594.8
## - zn       1    257.82 11339 1595.4
## - b        1    270.82 11352 1596.0
## - tax      1    273.62 11355 1596.1
## - rad      1    500.92 11582 1606.1
## - nox      1    541.91 11623 1607.9
## - ptratio  1   1206.45 12288 1636.0
## - dis      1   1448.94 12530 1645.9
## - rm       1   1963.66 13045 1666.3
## - lstat    1   2723.48 13805 1695.0
formula(m2)
## medv ~ crim + zn + chas + nox + rm + dis + rad + tax + ptratio + 
##     b + lstat