ISLR Chapter 3, Saturday April 15, 2017 Linear Regression

Advertising=read.csv('/Users/russconte/advertising.csv', header=TRUE, sep=',')
head(Advertising)
##   X    TV Radio Newspaper Sales
## 1 1 230.1  37.8      69.2  22.1
## 2 2  44.5  39.3      45.1  10.4
## 3 3  17.2  45.9      69.3   9.3
## 4 4 151.5  41.3      58.5  18.5
## 5 5 180.8  10.8      58.4  12.9
## 6 6   8.7  48.9      75.0   7.2
Advertising=Advertising[,c(2:5)]
head(Advertising)
##      TV Radio Newspaper Sales
## 1 230.1  37.8      69.2  22.1
## 2  44.5  39.3      45.1  10.4
## 3  17.2  45.9      69.3   9.3
## 4 151.5  41.3      58.5  18.5
## 5 180.8  10.8      58.4  12.9
## 6   8.7  48.9      75.0   7.2
attach(Advertising)
sales.lm=lm(Sales~TV, data=Advertising)
summary(sales.lm)
## 
## Call:
## lm(formula = Sales ~ TV, data = Advertising)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.3860 -1.9545 -0.1913  2.0671  7.2124 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.032594   0.457843   15.36   <2e-16 ***
## TV          0.047537   0.002691   17.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.259 on 198 degrees of freedom
## Multiple R-squared:  0.6119, Adjusted R-squared:  0.6099 
## F-statistic: 312.1 on 1 and 198 DF,  p-value: < 2.2e-16
plot(x=TV, y=Sales)

7.032594 -2*0.457843
## [1] 6.116908
sales.radio.lm=lm(Sales~Radio, data=Advertising)
summary(sales.radio.lm)
## 
## Call:
## lm(formula = Sales ~ Radio, data = Advertising)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.7305  -2.1324   0.7707   2.7775   8.1810 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.31164    0.56290  16.542   <2e-16 ***
## Radio        0.20250    0.02041   9.921   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.275 on 198 degrees of freedom
## Multiple R-squared:  0.332,  Adjusted R-squared:  0.3287 
## F-statistic: 98.42 on 1 and 198 DF,  p-value: < 2.2e-16
sales.newspaper.lm=lm(Sales~Newspaper, data=Advertising)
summary(sales.newspaper.lm)
## 
## Call:
## lm(formula = Sales ~ Newspaper, data = Advertising)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2272  -3.3873  -0.8392   3.5059  12.7751 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.35141    0.62142   19.88  < 2e-16 ***
## Newspaper    0.05469    0.01658    3.30  0.00115 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.092 on 198 degrees of freedom
## Multiple R-squared:  0.05212,    Adjusted R-squared:  0.04733 
## F-statistic: 10.89 on 1 and 198 DF,  p-value: 0.001148
sales.total.lm=lm(Sales~TV+Radio+Newspaper, data=Advertising)
summary(sales.total.lm)
## 
## Call:
## lm(formula = Sales ~ TV + Radio + Newspaper, data = Advertising)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.8277 -0.8908  0.2418  1.1893  2.8292 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.938889   0.311908   9.422   <2e-16 ***
## TV           0.045765   0.001395  32.809   <2e-16 ***
## Radio        0.188530   0.008611  21.893   <2e-16 ***
## Newspaper   -0.001037   0.005871  -0.177     0.86    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.686 on 196 degrees of freedom
## Multiple R-squared:  0.8972, Adjusted R-squared:  0.8956 
## F-statistic: 570.3 on 3 and 196 DF,  p-value: < 2.2e-16
cor(Advertising)
##                   TV      Radio  Newspaper     Sales
## TV        1.00000000 0.05480866 0.05664787 0.7822244
## Radio     0.05480866 1.00000000 0.35410375 0.5762226
## Newspaper 0.05664787 0.35410375 1.00000000 0.2282990
## Sales     0.78222442 0.57622257 0.22829903 1.0000000
credit=read.csv('/Users/russconte/Credit.csv', header=TRUE, sep=',')
head(credit)
##   X  Income Limit Rating Cards Age Education Gender Student Married
## 1 1  14.891  3606    283     2  34        11   Male      No     Yes
## 2 2 106.025  6645    483     3  82        15 Female     Yes     Yes
## 3 3 104.593  7075    514     4  71        11   Male      No      No
## 4 4 148.924  9504    681     3  36        11 Female      No      No
## 5 5  55.882  4897    357     2  68        16   Male      No     Yes
## 6 6  80.180  8047    569     4  77        10   Male      No      No
##   Ethnicity Balance
## 1 Caucasian     333
## 2     Asian     903
## 3     Asian     580
## 4     Asian     964
## 5 Caucasian     331
## 6 Caucasian    1151
dim(credit)
## [1] 400  12
credit=credit[,c(2:12)]
head(credit)
##    Income Limit Rating Cards Age Education Gender Student Married
## 1  14.891  3606    283     2  34        11   Male      No     Yes
## 2 106.025  6645    483     3  82        15 Female     Yes     Yes
## 3 104.593  7075    514     4  71        11   Male      No      No
## 4 148.924  9504    681     3  36        11 Female      No      No
## 5  55.882  4897    357     2  68        16   Male      No     Yes
## 6  80.180  8047    569     4  77        10   Male      No      No
##   Ethnicity Balance
## 1 Caucasian     333
## 2     Asian     903
## 3     Asian     580
## 4     Asian     964
## 5 Caucasian     331
## 6 Caucasian    1151
pairs(credit)

balance1.lm=lm(Balance~Gender, data=credit)
summary(balance1.lm)
## 
## Call:
## lm(formula = Balance ~ Gender, data = credit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -529.54 -455.35  -60.17  334.71 1489.20 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    509.80      33.13  15.389   <2e-16 ***
## GenderFemale    19.73      46.05   0.429    0.669    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 460.2 on 398 degrees of freedom
## Multiple R-squared:  0.0004611,  Adjusted R-squared:  -0.00205 
## F-statistic: 0.1836 on 1 and 398 DF,  p-value: 0.6685
balance2.lm=lm(Balance~Ethnicity, data=credit)
summary(balance2.lm)
## 
## Call:
## lm(formula = Balance ~ Ethnicity, data = credit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -531.00 -457.08  -63.25  339.25 1480.50 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          531.00      46.32  11.464   <2e-16 ***
## EthnicityAsian       -18.69      65.02  -0.287    0.774    
## EthnicityCaucasian   -12.50      56.68  -0.221    0.826    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 460.9 on 397 degrees of freedom
## Multiple R-squared:  0.0002188,  Adjusted R-squared:  -0.004818 
## F-statistic: 0.04344 on 2 and 397 DF,  p-value: 0.9575
sales.lm2=lm(Sales~Radio+TV+Radio*TV, data=Advertising)
summary(sales.lm2)
## 
## Call:
## lm(formula = Sales ~ Radio + TV + Radio * TV, data = Advertising)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3366 -0.4028  0.1831  0.5948  1.5246 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.750e+00  2.479e-01  27.233   <2e-16 ***
## Radio       2.886e-02  8.905e-03   3.241   0.0014 ** 
## TV          1.910e-02  1.504e-03  12.699   <2e-16 ***
## Radio:TV    1.086e-03  5.242e-05  20.727   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9435 on 196 degrees of freedom
## Multiple R-squared:  0.9678, Adjusted R-squared:  0.9673 
## F-statistic:  1963 on 3 and 196 DF,  p-value: < 2.2e-16
balance3.lm=lm(Balance~Income+Student, data=credit)
summary(balance3.lm)
## 
## Call:
## lm(formula = Balance ~ Income + Student, data = credit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -762.37 -331.38  -45.04  323.60  818.28 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 211.1430    32.4572   6.505 2.34e-10 ***
## Income        5.9843     0.5566  10.751  < 2e-16 ***
## StudentYes  382.6705    65.3108   5.859 9.78e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 391.8 on 397 degrees of freedom
## Multiple R-squared:  0.2775, Adjusted R-squared:  0.2738 
## F-statistic: 76.22 on 2 and 397 DF,  p-value: < 2.2e-16
balance4.lm=lm(Balance~Age+Limit, data=credit)
summary(balance4.lm)
## 
## Call:
## lm(formula = Balance ~ Age + Limit, data = credit)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -696.84 -150.78  -13.01  126.68  755.56 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.734e+02  4.383e+01  -3.957 9.01e-05 ***
## Age         -2.291e+00  6.725e-01  -3.407 0.000723 ***
## Limit        1.734e-01  5.026e-03  34.496  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 230.5 on 397 degrees of freedom
## Multiple R-squared:  0.7498, Adjusted R-squared:  0.7486 
## F-statistic:   595 on 2 and 397 DF,  p-value: < 2.2e-16
balance5.lm=lm(Balance~Rating+Limit, data=credit)
summary(balance5.lm)
## 
## Call:
## lm(formula = Balance ~ Rating + Limit, data = credit)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -707.8 -135.9   -9.5  124.0  817.6 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -377.53680   45.25418  -8.343 1.21e-15 ***
## Rating         2.20167    0.95229   2.312   0.0213 *  
## Limit          0.02451    0.06383   0.384   0.7012    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 232.3 on 397 degrees of freedom
## Multiple R-squared:  0.7459, Adjusted R-squared:  0.7447 
## F-statistic: 582.8 on 2 and 397 DF,  p-value: < 2.2e-16

Lab!

library(MASS)
library(ISLR)
attach(Boston)
names(Boston)
##  [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
##  [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"
lm.fit=lm(medv~lstat, data=Boston)
summary(lm.fit)
## 
## Call:
## lm(formula = medv ~ lstat, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.168  -3.990  -1.318   2.034  24.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.55384    0.56263   61.41   <2e-16 ***
## lstat       -0.95005    0.03873  -24.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.5432 
## F-statistic: 601.6 on 1 and 504 DF,  p-value: < 2.2e-16
names(lm.fit)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
confint(lm.fit)
##                 2.5 %     97.5 %
## (Intercept) 33.448457 35.6592247
## lstat       -1.026148 -0.8739505
predict(lm.fit, data.frame(lstat=c(5,10,15)), interval="confidence")
##        fit      lwr      upr
## 1 29.80359 29.00741 30.59978
## 2 25.05335 24.47413 25.63256
## 3 20.30310 19.73159 20.87461
predict(lm.fit, data.frame(lstat=c(5,10,15)), interval="prediction")
##        fit       lwr      upr
## 1 29.80359 17.565675 42.04151
## 2 25.05335 12.827626 37.27907
## 3 20.30310  8.077742 32.52846
plot(lstat, medv)
abline(lm.fit)

plot(lstat, medv)
abline(lm.fit,lwd=3)

plot(lstat, medv, pch=20)
abline(lm.fit, col="red")

plot(lstat, medv)
abline(lm.fit,lwd=3, col="red", pch=20)

plot(lstat, medv)
abline(lm.fit,lwd=3, col="red", pch="+")

plot(1:20,1:20, pch=1:20)

par(mfrow=c(2,2))
plot(lm.fit)

plot(predict(lm.fit), residuals(lm.fit))

plot(predict(lm.fit), rstudent(lm.fit))

plot(hatvalues(lm.fit))

which.max(hatvalues(lm.fit))
## 375 
## 375
lm.fit
## 
## Call:
## lm(formula = medv ~ lstat, data = Boston)
## 
## Coefficients:
## (Intercept)        lstat  
##       34.55        -0.95
Boston[375,]
##        crim zn indus chas   nox    rm age   dis rad tax ptratio black
## 375 18.4982  0  18.1    0 0.668 4.138 100 1.137  24 666    20.2 396.9
##     lstat medv
## 375 37.97 13.8

3.6.3 Multiple Linear Regression

lm.fit.boston1=lm(medv~lstat+age, data=Boston)
summary(lm.fit.boston1)
## 
## Call:
## lm(formula = medv ~ lstat + age, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.981  -3.978  -1.283   1.968  23.158 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.22276    0.73085  45.458  < 2e-16 ***
## lstat       -1.03207    0.04819 -21.416  < 2e-16 ***
## age          0.03454    0.01223   2.826  0.00491 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.173 on 503 degrees of freedom
## Multiple R-squared:  0.5513, Adjusted R-squared:  0.5495 
## F-statistic:   309 on 2 and 503 DF,  p-value: < 2.2e-16
options(scipen=999)
lm.fit.boston2=lm(medv~., data=Boston)
summary(lm.fit.boston2)
## 
## Call:
## lm(formula = medv ~ ., data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.595  -2.730  -0.518   1.777  26.199 
## 
## Coefficients:
##                Estimate  Std. Error t value             Pr(>|t|)    
## (Intercept)  36.4594884   5.1034588   7.144    0.000000000003283 ***
## crim         -0.1080114   0.0328650  -3.287             0.001087 ** 
## zn            0.0464205   0.0137275   3.382             0.000778 ***
## indus         0.0205586   0.0614957   0.334             0.738288    
## chas          2.6867338   0.8615798   3.118             0.001925 ** 
## nox         -17.7666112   3.8197437  -4.651    0.000004245643808 ***
## rm            3.8098652   0.4179253   9.116 < 0.0000000000000002 ***
## age           0.0006922   0.0132098   0.052             0.958229    
## dis          -1.4755668   0.1994547  -7.398    0.000000000000601 ***
## rad           0.3060495   0.0663464   4.613    0.000005070529023 ***
## tax          -0.0123346   0.0037605  -3.280             0.001112 ** 
## ptratio      -0.9527472   0.1308268  -7.283    0.000000000001309 ***
## black         0.0093117   0.0026860   3.467             0.000573 ***
## lstat        -0.5247584   0.0507153 -10.347 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.745 on 492 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7338 
## F-statistic: 108.1 on 13 and 492 DF,  p-value: < 0.00000000000000022
library(car)
vif(lm.fit.boston2)
##     crim       zn    indus     chas      nox       rm      age      dis 
## 1.792192 2.298758 3.991596 1.073995 4.393720 1.933744 3.100826 3.955945 
##      rad      tax  ptratio    black    lstat 
## 7.484496 9.008554 1.799084 1.348521 2.941491
lm.fit.boston2=lm(medv~.-age, data=Boston)
summary(lm.fit.boston2)
## 
## Call:
## lm(formula = medv ~ . - age, data = Boston)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.6054  -2.7313  -0.5188   1.7601  26.2243 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)  36.436927   5.080119   7.172   0.0000000000027155 ***
## crim         -0.108006   0.032832  -3.290             0.001075 ** 
## zn            0.046334   0.013613   3.404             0.000719 ***
## indus         0.020562   0.061433   0.335             0.737989    
## chas          2.689026   0.859598   3.128             0.001863 ** 
## nox         -17.713540   3.679308  -4.814   0.0000019671100076 ***
## rm            3.814394   0.408480   9.338 < 0.0000000000000002 ***
## dis          -1.478612   0.190611  -7.757   0.0000000000000503 ***
## rad           0.305786   0.066089   4.627   0.0000047505389684 ***
## tax          -0.012329   0.003755  -3.283             0.001099 ** 
## ptratio      -0.952211   0.130294  -7.308   0.0000000000010992 ***
## black         0.009321   0.002678   3.481             0.000544 ***
## lstat        -0.523852   0.047625 -10.999 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.74 on 493 degrees of freedom
## Multiple R-squared:  0.7406, Adjusted R-squared:  0.7343 
## F-statistic: 117.3 on 12 and 493 DF,  p-value: < 0.00000000000000022
summary(lm(medv~lstat*age, data=Boston))
## 
## Call:
## lm(formula = medv ~ lstat * age, data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.806  -4.045  -1.333   2.085  27.552 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept) 36.0885359  1.4698355  24.553 < 0.0000000000000002 ***
## lstat       -1.3921168  0.1674555  -8.313 0.000000000000000878 ***
## age         -0.0007209  0.0198792  -0.036               0.9711    
## lstat:age    0.0041560  0.0018518   2.244               0.0252 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.149 on 502 degrees of freedom
## Multiple R-squared:  0.5557, Adjusted R-squared:  0.5531 
## F-statistic: 209.3 on 3 and 502 DF,  p-value: < 0.00000000000000022
lm.fit.boston3=lm(medv~lstat+I(lstat^2))
summary(lm.fit.boston3)
## 
## Call:
## lm(formula = medv ~ lstat + I(lstat^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.2834  -3.8313  -0.5295   2.3095  25.4148 
## 
## Coefficients:
##              Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) 42.862007   0.872084   49.15 <0.0000000000000002 ***
## lstat       -2.332821   0.123803  -18.84 <0.0000000000000002 ***
## I(lstat^2)   0.043547   0.003745   11.63 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.524 on 503 degrees of freedom
## Multiple R-squared:  0.6407, Adjusted R-squared:  0.6393 
## F-statistic: 448.5 on 2 and 503 DF,  p-value: < 0.00000000000000022
lm.fit.boston4=lm(medv~lstat)
anova(lm.fit, lm.fit.boston3)
## Analysis of Variance Table
## 
## Model 1: medv ~ lstat
## Model 2: medv ~ lstat + I(lstat^2)
##   Res.Df   RSS Df Sum of Sq     F                Pr(>F)    
## 1    504 19472                                             
## 2    503 15347  1    4125.1 135.2 < 0.00000000000000022 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(lm.fit.boston4)

lm.fit5=lm(medv~poly(lstat,5))
summary(lm.fit5)
## 
## Call:
## lm(formula = medv ~ poly(lstat, 5))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5433  -3.1039  -0.7052   2.0844  27.1153 
## 
## Coefficients:
##                  Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)       22.5328     0.2318  97.197 < 0.0000000000000002 ***
## poly(lstat, 5)1 -152.4595     5.2148 -29.236 < 0.0000000000000002 ***
## poly(lstat, 5)2   64.2272     5.2148  12.316 < 0.0000000000000002 ***
## poly(lstat, 5)3  -27.0511     5.2148  -5.187           0.00000031 ***
## poly(lstat, 5)4   25.4517     5.2148   4.881           0.00000142 ***
## poly(lstat, 5)5  -19.2524     5.2148  -3.692             0.000247 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.215 on 500 degrees of freedom
## Multiple R-squared:  0.6817, Adjusted R-squared:  0.6785 
## F-statistic: 214.2 on 5 and 500 DF,  p-value: < 0.00000000000000022
summary(lm(medv~log(rm), data=Boston))
## 
## Call:
## lm(formula = medv ~ log(rm), data = Boston)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.487  -2.875  -0.104   2.837  39.816 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  -76.488      5.028  -15.21 <0.0000000000000002 ***
## log(rm)       54.055      2.739   19.73 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.915 on 504 degrees of freedom
## Multiple R-squared:  0.4358, Adjusted R-squared:  0.4347 
## F-statistic: 389.3 on 1 and 504 DF,  p-value: < 0.00000000000000022

3.6, qualitative predictors

attach(Carseats)
## The following object is masked _by_ .GlobalEnv:
## 
##     Advertising
## The following object is masked from Advertising:
## 
##     Sales
names(Carseats)
##  [1] "Sales"       "CompPrice"   "Income"      "Advertising" "Population" 
##  [6] "Price"       "ShelveLoc"   "Age"         "Education"   "Urban"      
## [11] "US"
lm.fit.carseats1=lm(Sales~.+Income:Advertising+Price:Age, data=Carseats)
summary(lm.fit.carseats1)
## 
## Call:
## lm(formula = Sales ~ . + Income:Advertising + Price:Age, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9208 -0.7503  0.0177  0.6754  3.3413 
## 
## Coefficients:
##                      Estimate Std. Error t value             Pr(>|t|)    
## (Intercept)         6.5755654  1.0087470   6.519       0.000000000222 ***
## CompPrice           0.0929371  0.0041183  22.567 < 0.0000000000000002 ***
## Income              0.0108940  0.0026044   4.183       0.000035665275 ***
## Advertising         0.0702462  0.0226091   3.107             0.002030 ** 
## Population          0.0001592  0.0003679   0.433             0.665330    
## Price              -0.1008064  0.0074399 -13.549 < 0.0000000000000002 ***
## ShelveLocGood       4.8486762  0.1528378  31.724 < 0.0000000000000002 ***
## ShelveLocMedium     1.9532620  0.1257682  15.531 < 0.0000000000000002 ***
## Age                -0.0579466  0.0159506  -3.633             0.000318 ***
## Education          -0.0208525  0.0196131  -1.063             0.288361    
## UrbanYes            0.1401597  0.1124019   1.247             0.213171    
## USYes              -0.1575571  0.1489234  -1.058             0.290729    
## Income:Advertising  0.0007510  0.0002784   2.698             0.007290 ** 
## Price:Age           0.0001068  0.0001333   0.801             0.423812    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.011 on 386 degrees of freedom
## Multiple R-squared:  0.8761, Adjusted R-squared:  0.8719 
## F-statistic:   210 on 13 and 386 DF,  p-value: < 0.00000000000000022
contrasts(ShelveLoc)
##        Good Medium
## Bad       0      0
## Good      1      0
## Medium    0      1