Libraries
library (MASS)
library (ISLR)
Simple Linear Regression
fix(Boston)
names(Boston)
[1] "crim" "zn" "indus" "chas" "nox"
[6] "rm" "age" "dis" "rad" "tax"
[11] "ptratio" "black" "lstat" "medv"
lm.fit<-lm(medv~lstat,data=Boston)
lm.fit
Call:
lm(formula = medv ~ lstat, data = Boston)
Coefficients:
(Intercept) lstat
34.55 -0.95
summary(lm.fit)
Call:
lm(formula = medv ~ lstat, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.168 -3.990 -1.318 2.034 24.500
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 34.55384 0.56263 61.41 <2e-16 ***
lstat -0.95005 0.03873 -24.53 <2e-16 ***
---
Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 6.216 on 504 degrees of freedom
Multiple R-squared: 0.5441, Adjusted R-squared: 0.5432
F-statistic: 601.6 on 1 and 504 DF, p-value: < 2.2e-16
names(lm.fit)
[1] "coefficients" "residuals"
[3] "effects" "rank"
[5] "fitted.values" "assign"
[7] "qr" "df.residual"
[9] "xlevels" "call"
[11] "terms" "model"
lm.fit$coefficients
(Intercept) lstat
34.5538409 -0.9500494
confint(lm.fit)
2.5 % 97.5 %
(Intercept) 33.448457 35.6592247
lstat -1.026148 -0.8739505
predict (lm.fit ,data.frame(lstat =(c(5 ,10 ,15) )),interval ="confidence")
fit lwr upr
1 29.80359 29.00741 30.59978
2 25.05335 24.47413 25.63256
3 20.30310 19.73159 20.87461
predict (lm.fit ,data.frame(lstat =(c(5 ,10 ,15) )),interval ="prediction")
fit lwr upr
1 29.80359 17.565675 42.04151
2 25.05335 12.827626 37.27907
3 20.30310 8.077742 32.52846
plot(Boston$lstat,Boston$medv,col="blue",pch=20)
abline(lm.fit,col="red",lwd=3)

par(mfrow =c(2,2))
plot(lm.fit)

par(mfrow =c(1,2))
plot(predict (lm.fit), residuals (lm.fit))
plot(predict (lm.fit), rstudent (lm.fit))

plot(hatvalues (lm.fit ))

which.max (hatvalues (lm.fit))
375
375
Multiple Linear Regression
lm.fit =lm(medv~lstat+age,data=Boston)
summary(lm.fit)
Call:
lm(formula = medv ~ lstat + age, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.981 -3.978 -1.283 1.968 23.158
Coefficients:
Estimate Std. Error t value
(Intercept) 33.22276 0.73085 45.458
lstat -1.03207 0.04819 -21.416
age 0.03454 0.01223 2.826
Pr(>|t|)
(Intercept) < 2e-16 ***
lstat < 2e-16 ***
age 0.00491 **
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 6.173 on 503 degrees of freedom
Multiple R-squared: 0.5513, Adjusted R-squared: 0.5495
F-statistic: 309 on 2 and 503 DF, p-value: < 2.2e-16
lm.fit =lm(medv~.,data=Boston )
summary (lm.fit)
Call:
lm(formula = medv ~ ., data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.595 -2.730 -0.518 1.777 26.199
Coefficients:
Estimate Std. Error t value
(Intercept) 3.646e+01 5.103e+00 7.144
crim -1.080e-01 3.286e-02 -3.287
zn 4.642e-02 1.373e-02 3.382
indus 2.056e-02 6.150e-02 0.334
chas 2.687e+00 8.616e-01 3.118
nox -1.777e+01 3.820e+00 -4.651
rm 3.810e+00 4.179e-01 9.116
age 6.922e-04 1.321e-02 0.052
dis -1.476e+00 1.995e-01 -7.398
rad 3.060e-01 6.635e-02 4.613
tax -1.233e-02 3.760e-03 -3.280
ptratio -9.527e-01 1.308e-01 -7.283
black 9.312e-03 2.686e-03 3.467
lstat -5.248e-01 5.072e-02 -10.347
Pr(>|t|)
(Intercept) 3.28e-12 ***
crim 0.001087 **
zn 0.000778 ***
indus 0.738288
chas 0.001925 **
nox 4.25e-06 ***
rm < 2e-16 ***
age 0.958229
dis 6.01e-13 ***
rad 5.07e-06 ***
tax 0.001112 **
ptratio 1.31e-12 ***
black 0.000573 ***
lstat < 2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 4.745 on 492 degrees of freedom
Multiple R-squared: 0.7406, Adjusted R-squared: 0.7338
F-statistic: 108.1 on 13 and 492 DF, p-value: < 2.2e-16
summary(lm.fit)$sigma
[1] 4.745298
library (car)
vif(lm.fit)
crim zn indus chas nox
1.792192 2.298758 3.991596 1.073995 4.393720
rm age dis rad tax
1.933744 3.100826 3.955945 7.484496 9.008554
ptratio black lstat
1.799084 1.348521 2.941491
lm.fit1=lm(medv~.-age,data=Boston )
summary (lm.fit1)
Call:
lm(formula = medv ~ . - age, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.6054 -2.7313 -0.5188 1.7601 26.2243
Coefficients:
Estimate Std. Error t value
(Intercept) 36.436927 5.080119 7.172
crim -0.108006 0.032832 -3.290
zn 0.046334 0.013613 3.404
indus 0.020562 0.061433 0.335
chas 2.689026 0.859598 3.128
nox -17.713540 3.679308 -4.814
rm 3.814394 0.408480 9.338
dis -1.478612 0.190611 -7.757
rad 0.305786 0.066089 4.627
tax -0.012329 0.003755 -3.283
ptratio -0.952211 0.130294 -7.308
black 0.009321 0.002678 3.481
lstat -0.523852 0.047625 -10.999
Pr(>|t|)
(Intercept) 2.72e-12 ***
crim 0.001075 **
zn 0.000719 ***
indus 0.737989
chas 0.001863 **
nox 1.97e-06 ***
rm < 2e-16 ***
dis 5.03e-14 ***
rad 4.75e-06 ***
tax 0.001099 **
ptratio 1.10e-12 ***
black 0.000544 ***
lstat < 2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 4.74 on 493 degrees of freedom
Multiple R-squared: 0.7406, Adjusted R-squared: 0.7343
F-statistic: 117.3 on 12 and 493 DF, p-value: < 2.2e-16
lm.fit1=update (lm.fit,~.-age)
summary(lm.fit1)
Call:
lm(formula = medv ~ crim + zn + indus + chas + nox + rm + dis +
rad + tax + ptratio + black + lstat, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.6054 -2.7313 -0.5188 1.7601 26.2243
Coefficients:
Estimate Std. Error t value
(Intercept) 36.436927 5.080119 7.172
crim -0.108006 0.032832 -3.290
zn 0.046334 0.013613 3.404
indus 0.020562 0.061433 0.335
chas 2.689026 0.859598 3.128
nox -17.713540 3.679308 -4.814
rm 3.814394 0.408480 9.338
dis -1.478612 0.190611 -7.757
rad 0.305786 0.066089 4.627
tax -0.012329 0.003755 -3.283
ptratio -0.952211 0.130294 -7.308
black 0.009321 0.002678 3.481
lstat -0.523852 0.047625 -10.999
Pr(>|t|)
(Intercept) 2.72e-12 ***
crim 0.001075 **
zn 0.000719 ***
indus 0.737989
chas 0.001863 **
nox 1.97e-06 ***
rm < 2e-16 ***
dis 5.03e-14 ***
rad 4.75e-06 ***
tax 0.001099 **
ptratio 1.10e-12 ***
black 0.000544 ***
lstat < 2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 4.74 on 493 degrees of freedom
Multiple R-squared: 0.7406, Adjusted R-squared: 0.7343
F-statistic: 117.3 on 12 and 493 DF, p-value: < 2.2e-16
Interaction Terms
summary (lm(medv~lstat*age ,data=Boston))
Call:
lm(formula = medv ~ lstat * age, data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.806 -4.045 -1.333 2.085 27.552
Coefficients:
Estimate Std. Error t value
(Intercept) 36.0885359 1.4698355 24.553
lstat -1.3921168 0.1674555 -8.313
age -0.0007209 0.0198792 -0.036
lstat:age 0.0041560 0.0018518 2.244
Pr(>|t|)
(Intercept) < 2e-16 ***
lstat 8.78e-16 ***
age 0.9711
lstat:age 0.0252 *
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 6.149 on 502 degrees of freedom
Multiple R-squared: 0.5557, Adjusted R-squared: 0.5531
F-statistic: 209.3 on 3 and 502 DF, p-value: < 2.2e-16
Non-Linear Transformations of predictors
lm.fit2<-lm(medv~lstat+I(lstat^2),data=Boston)
summary (lm.fit2)
Call:
lm(formula = medv ~ lstat + I(lstat^2), data = Boston)
Residuals:
Min 1Q Median 3Q Max
-15.2834 -3.8313 -0.5295 2.3095 25.4148
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 42.862007 0.872084 49.15 <2e-16 ***
lstat -2.332821 0.123803 -18.84 <2e-16 ***
I(lstat^2) 0.043547 0.003745 11.63 <2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 5.524 on 503 degrees of freedom
Multiple R-squared: 0.6407, Adjusted R-squared: 0.6393
F-statistic: 448.5 on 2 and 503 DF, p-value: < 2.2e-16
lm.fit =lm(medv~lstat,data=Boston)
anova(lm.fit ,lm.fit2)
Analysis of Variance Table
Model 1: medv ~ lstat
Model 2: medv ~ lstat + I(lstat^2)
Res.Df RSS Df Sum of Sq F Pr(>F)
1 504 19472
2 503 15347 1 4125.1 135.2 < 2.2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
par(mfrow=c(2,2))
plot(lm.fit2)

lm.fit5=lm(medv~poly(lstat ,5),data=Boston)
summary (lm.fit5)
Call:
lm(formula = medv ~ poly(lstat, 5), data = Boston)
Residuals:
Min 1Q Median 3Q Max
-13.5433 -3.1039 -0.7052 2.0844 27.1153
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 22.5328 0.2318 97.197 < 2e-16
poly(lstat, 5)1 -152.4595 5.2148 -29.236 < 2e-16
poly(lstat, 5)2 64.2272 5.2148 12.316 < 2e-16
poly(lstat, 5)3 -27.0511 5.2148 -5.187 3.10e-07
poly(lstat, 5)4 25.4517 5.2148 4.881 1.42e-06
poly(lstat, 5)5 -19.2524 5.2148 -3.692 0.000247
(Intercept) ***
poly(lstat, 5)1 ***
poly(lstat, 5)2 ***
poly(lstat, 5)3 ***
poly(lstat, 5)4 ***
poly(lstat, 5)5 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 5.215 on 500 degrees of freedom
Multiple R-squared: 0.6817, Adjusted R-squared: 0.6785
F-statistic: 214.2 on 5 and 500 DF, p-value: < 2.2e-16
summary (lm(medv~log(rm),data=Boston))
Call:
lm(formula = medv ~ log(rm), data = Boston)
Residuals:
Min 1Q Median 3Q Max
-19.487 -2.875 -0.104 2.837 39.816
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -76.488 5.028 -15.21 <2e-16 ***
log(rm) 54.055 2.739 19.73 <2e-16 ***
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 6.915 on 504 degrees of freedom
Multiple R-squared: 0.4358, Adjusted R-squared: 0.4347
F-statistic: 389.3 on 1 and 504 DF, p-value: < 2.2e-16
Qualitative Predictors
fix( Carseats )
names(Carseats )
[1] "Sales" "CompPrice" "Income"
[4] "Advertising" "Population" "Price"
[7] "ShelveLoc" "Age" "Education"
[10] "Urban" "US"
lm.fit =lm(Sales~.+ Income :Advertising +Price :Age ,data=Carseats )
summary (lm.fit)
Call:
lm(formula = Sales ~ . + Income:Advertising + Price:Age, data = Carseats)
Residuals:
Min 1Q Median 3Q Max
-2.9208 -0.7503 0.0177 0.6754 3.3413
Coefficients:
Estimate Std. Error t value
(Intercept) 6.5755654 1.0087470 6.519
CompPrice 0.0929371 0.0041183 22.567
Income 0.0108940 0.0026044 4.183
Advertising 0.0702462 0.0226091 3.107
Population 0.0001592 0.0003679 0.433
Price -0.1008064 0.0074399 -13.549
ShelveLocGood 4.8486762 0.1528378 31.724
ShelveLocMedium 1.9532620 0.1257682 15.531
Age -0.0579466 0.0159506 -3.633
Education -0.0208525 0.0196131 -1.063
UrbanYes 0.1401597 0.1124019 1.247
USYes -0.1575571 0.1489234 -1.058
Income:Advertising 0.0007510 0.0002784 2.698
Price:Age 0.0001068 0.0001333 0.801
Pr(>|t|)
(Intercept) 2.22e-10 ***
CompPrice < 2e-16 ***
Income 3.57e-05 ***
Advertising 0.002030 **
Population 0.665330
Price < 2e-16 ***
ShelveLocGood < 2e-16 ***
ShelveLocMedium < 2e-16 ***
Age 0.000318 ***
Education 0.288361
UrbanYes 0.213171
USYes 0.290729
Income:Advertising 0.007290 **
Price:Age 0.423812
---
Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 1.011 on 386 degrees of freedom
Multiple R-squared: 0.8761, Adjusted R-squared: 0.8719
F-statistic: 210 on 13 and 386 DF, p-value: < 2.2e-16
attach (Carseats )
contrasts (ShelveLoc )
Good Medium
Bad 0 0
Good 1 0
Medium 0 1
LoadLibraries
Error: object 'LoadLibraries' not found
LoadLibraries=function (){
library (ISLR)
library (MASS)
print (" The libraries have been loaded .")
}
LoadLibraries
function (){
library (ISLR)
library (MASS)
print (" The libraries have been loaded .")
}
function (){
library (ISLR)
library (MASS)
print ("The libraries have been loaded .")
}
function (){
library (ISLR)
library (MASS)
print ("The libraries have been loaded .")
}
LoadLibraries()
[1] " The libraries have been loaded ."
LS0tDQp0aXRsZTogIkxhYm9yYXRvcmlvIENhcO10dWxvIDMiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIyNMaWJyYXJpZXMNCg0KYGBge3IsIHdhcm5pbmc9RkFMU0UsZXJyb3I9RkFMU0V9DQpsaWJyYXJ5IChNQVNTKQ0KbGlicmFyeSAoSVNMUikNCmBgYA0KDQojIyNTaW1wbGUgTGluZWFyIFJlZ3Jlc3Npb24NCg0KYGBge3J9DQpmaXgoQm9zdG9uKQ0KbmFtZXMoQm9zdG9uKQ0KYGBgDQoNCmBgYHtyfQ0KbG0uZml0PC1sbShtZWR2fmxzdGF0LGRhdGE9Qm9zdG9uKQ0KbG0uZml0DQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5KGxtLmZpdCkNCmBgYA0KDQpgYGB7cn0NCm5hbWVzKGxtLmZpdCkNCmBgYA0KDQpgYGB7cn0NCmxtLmZpdCRjb2VmZmljaWVudHMNCmBgYA0KDQpgYGB7cn0NCmNvbmZpbnQobG0uZml0KQ0KYGBgDQoNCmBgYHtyfQ0KcHJlZGljdCAobG0uZml0ICxkYXRhLmZyYW1lKGxzdGF0ID0oYyg1ICwxMCAsMTUpICkpLGludGVydmFsID0iY29uZmlkZW5jZSIpDQpgYGANCg0KYGBge3J9DQpwcmVkaWN0IChsbS5maXQgLGRhdGEuZnJhbWUobHN0YXQgPShjKDUgLDEwICwxNSkgKSksaW50ZXJ2YWwgPSJwcmVkaWN0aW9uIikNCmBgYA0KDQpgYGB7cn0NCnBsb3QoQm9zdG9uJGxzdGF0LEJvc3RvbiRtZWR2LGNvbD0iYmx1ZSIscGNoPTIwKQ0KYWJsaW5lKGxtLmZpdCxjb2w9InJlZCIsbHdkPTMpDQpgYGANCg0KYGBge3J9DQpwYXIobWZyb3cgPWMoMiwyKSkNCnBsb3QobG0uZml0KQ0KYGBgDQoNCmBgYHtyfQ0KcGFyKG1mcm93ID1jKDEsMikpDQpwbG90KHByZWRpY3QgKGxtLmZpdCksIHJlc2lkdWFscyAobG0uZml0KSkNCnBsb3QocHJlZGljdCAobG0uZml0KSwgcnN0dWRlbnQgKGxtLmZpdCkpDQpgYGANCg0KYGBge3J9DQpwbG90KGhhdHZhbHVlcyAobG0uZml0ICkpDQp3aGljaC5tYXggKGhhdHZhbHVlcyAobG0uZml0KSkNCmBgYA0KDQojIyNNdWx0aXBsZSBMaW5lYXIgUmVncmVzc2lvbg0KDQpgYGB7cn0NCmxtLmZpdCA9bG0obWVkdn5sc3RhdCthZ2UsZGF0YT1Cb3N0b24pDQpzdW1tYXJ5KGxtLmZpdCkNCmBgYA0KDQpgYGB7cn0NCmxtLmZpdCA9bG0obWVkdn4uLGRhdGE9Qm9zdG9uICkNCnN1bW1hcnkgKGxtLmZpdCkNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkobG0uZml0KSRzaWdtYQ0KYGBgDQoNCmBgYHtyLHdhcm5pbmc9RkFMU0UsZXJyb3I9RkFMU0V9DQpsaWJyYXJ5IChjYXIpDQp2aWYobG0uZml0KQ0KYGBgDQpgYGB7cn0NCmxtLmZpdDE9bG0obWVkdn4uLWFnZSxkYXRhPUJvc3RvbiApDQpzdW1tYXJ5IChsbS5maXQxKQ0KYGBgDQoNCmBgYHtyfQ0KbG0uZml0MT11cGRhdGUgKGxtLmZpdCx+Li1hZ2UpDQpzdW1tYXJ5KGxtLmZpdDEpDQpgYGANCg0KIyMjSW50ZXJhY3Rpb24gVGVybXMNCmBgYHtyfQ0Kc3VtbWFyeSAobG0obWVkdn5sc3RhdCphZ2UgLGRhdGE9Qm9zdG9uKSkNCmBgYA0KDQojIyNOb24tTGluZWFyIFRyYW5zZm9ybWF0aW9ucyBvZiBwcmVkaWN0b3JzIA0KDQpgYGB7cn0NCmxtLmZpdDI8LWxtKG1lZHZ+bHN0YXQrSShsc3RhdF4yKSxkYXRhPUJvc3RvbikNCnN1bW1hcnkgKGxtLmZpdDIpDQpgYGANCmBgYHtyfQ0KbG0uZml0ID1sbShtZWR2fmxzdGF0LGRhdGE9Qm9zdG9uKQ0KYW5vdmEobG0uZml0ICxsbS5maXQyKQ0KYGBgDQoNCmBgYHtyfQ0KcGFyKG1mcm93PWMoMiwyKSkNCnBsb3QobG0uZml0MikNCmBgYA0KDQpgYGB7cn0NCmxtLmZpdDU9bG0obWVkdn5wb2x5KGxzdGF0ICw1KSxkYXRhPUJvc3RvbikNCnN1bW1hcnkgKGxtLmZpdDUpDQpgYGANCg0KYGBge3J9DQpzdW1tYXJ5IChsbShtZWR2Pz8/bG9nKHJtKSxkYXRhPUJvc3RvbikpDQpgYGANCg0KIyMjUXVhbGl0YXRpdmUgUHJlZGljdG9ycw0KDQpgYGB7cn0NCmZpeCggQ2Fyc2VhdHMgKQ0KbmFtZXMoQ2Fyc2VhdHMgKQ0KYGBgDQoNCmBgYHtyfQ0KbG0uZml0ID1sbShTYWxlcz8/Py4rIEluY29tZSA6QWR2ZXJ0aXNpbmcgK1ByaWNlIDpBZ2UgLGRhdGE9Q2Fyc2VhdHMgKQ0Kc3VtbWFyeSAobG0uZml0KQ0KYGBgDQoNCmBgYHtyfQ0KYXR0YWNoIChDYXJzZWF0cyApDQpjb250cmFzdHMgKFNoZWx2ZUxvYyApDQpgYGANCg0KYGBge3J9DQpMb2FkTGlicmFyaWVzDQpMb2FkTGlicmFyaWVzKCkNCmBgYA0KYGBge3J9DQpMb2FkTGlicmFyaWVzPWZ1bmN0aW9uICgpew0KICBsaWJyYXJ5IChJU0xSKQ0KICBsaWJyYXJ5IChNQVNTKQ0KICBwcmludCAoIiBUaGUgbGlicmFyaWVzIGhhdmUgYmVlbiBsb2FkZWQgLiIpDQp9DQpgYGANCg0KYGBge3J9DQpMb2FkTGlicmFyaWVzDQpmdW5jdGlvbiAoKXsNCmxpYnJhcnkgKElTTFIpDQpsaWJyYXJ5IChNQVNTKQ0KcHJpbnQgKCJUaGUgbGlicmFyaWVzIGhhdmUgYmVlbiBsb2FkZWQgLiIpDQp9DQpgYGANCg0KYGBge3J9DQpMb2FkTGlicmFyaWVzKCkNCmBgYA0KDQo=