library("ISLR")
pairs(Auto)
cor(Auto[, names(Auto)!="name"])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
Use the summary() function to print the results. Comment on the output. For instance:
mod_1=lm(mpg~. -name, data=Auto)
summary(mod_1)
##
## Call:
## lm(formula = mpg ~ . - name, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(mod_1)
Do the residual plots suggest any unusually large outliers? ## No outliers fund outside of the range -3,3
Does the leverage plot identify any observations with unusually high leverage?
mod_2 = lm(mpg ~ displacement * weight + origin * year + displacement * origin, data = Auto[, 1:8])
summary(mod_2)
##
## Call:
## lm(formula = mpg ~ displacement * weight + origin * year + displacement *
## origin, data = Auto[, 1:8])
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4232 -1.5709 -0.0384 1.3298 13.3393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.844e+01 8.195e+00 2.250 0.025045 *
## displacement -7.873e-02 1.399e-02 -5.629 3.51e-08 ***
## weight -1.048e-02 7.425e-04 -14.110 < 2e-16 ***
## origin -1.521e+01 4.273e+00 -3.560 0.000418 ***
## year 4.878e-01 1.019e-01 4.787 2.42e-06 ***
## displacement:weight 2.133e-05 2.420e-06 8.811 < 2e-16 ***
## origin:year 1.966e-01 5.458e-02 3.602 0.000357 ***
## displacement:origin 2.583e-03 7.758e-03 0.333 0.739343
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.973 on 384 degrees of freedom
## Multiple R-squared: 0.8576, Adjusted R-squared: 0.855
## F-statistic: 330.2 on 7 and 384 DF, p-value: < 2.2e-16
mod_log = lm(data = Auto[,1:8], mpg ~ log(displacement)*log(weight)*log(origin))
summary(mod_log)
##
## Call:
## lm(formula = mpg ~ log(displacement) * log(weight) * log(origin),
## data = Auto[, 1:8])
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.0334 -2.3867 -0.4177 1.7467 18.4256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 244.746 110.085 2.223 0.0268
## log(displacement) -25.990 19.751 -1.316 0.1890
## log(weight) -23.378 14.114 -1.656 0.0985
## log(origin) 143.021 362.386 0.395 0.6933
## log(displacement):log(weight) 2.395 2.490 0.962 0.3366
## log(displacement):log(origin) -5.578 77.487 -0.072 0.9427
## log(weight):log(origin) -25.732 46.601 -0.552 0.5811
## log(displacement):log(weight):log(origin) 2.275 9.916 0.229 0.8187
##
## (Intercept) *
## log(displacement)
## log(weight) .
## log(origin)
## log(displacement):log(weight)
## log(displacement):log(origin)
## log(weight):log(origin)
## log(displacement):log(weight):log(origin)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.074 on 384 degrees of freedom
## Multiple R-squared: 0.7325, Adjusted R-squared: 0.7276
## F-statistic: 150.2 on 7 and 384 DF, p-value: < 2.2e-16
data(Carseats)
model_1=lm(Sales ~ Price + Urban + US, data=Carseats)
summary(model_1)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
sig_mod=lm(Sales ~ Price + US, data = Carseats)
summary(sig_mod)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
confint(sig_mod)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
par(mfrow=c(2,2))
plot(sig_mod)
hatvalues(sig_mod)[order(hatvalues(sig_mod), decreasing = T)][1]
## 43
## 0.04333766
x=rnorm(100)
y=rnorm(100)
sum(x^2)
## [1] 85.86661
sum(y^2)
## [1] 102.1562
summary(lm(y~x))
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4107 -0.5898 0.0080 0.6190 2.5394
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.03658 0.10212 0.358 0.721
## x -0.06057 0.11021 -0.550 0.584
##
## Residual standard error: 1.019 on 98 degrees of freedom
## Multiple R-squared: 0.003073, Adjusted R-squared: -0.007099
## F-statistic: 0.3021 on 1 and 98 DF, p-value: 0.5838
summary(lm(y~x-1))
##
## Call:
## lm(formula = y ~ x - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3707 -0.5542 0.0436 0.6540 2.5744
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x -0.0634 0.1094 -0.579 0.564
##
## Residual standard error: 1.014 on 99 degrees of freedom
## Multiple R-squared: 0.003378, Adjusted R-squared: -0.006688
## F-statistic: 0.3356 on 1 and 99 DF, p-value: 0.5637
summary(lm(x~y))
##
## Call:
## lm(formula = x ~ y)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7088 -0.6920 -0.1723 0.7280 2.7741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06424 0.09330 -0.689 0.493
## y -0.05074 0.09231 -0.550 0.584
##
## Residual standard error: 0.9322 on 98 degrees of freedom
## Multiple R-squared: 0.003073, Adjusted R-squared: -0.007099
## F-statistic: 0.3021 on 1 and 98 DF, p-value: 0.5838
summary(lm(x~y-1))
##
## Call:
## lm(formula = x ~ y - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7738 -0.7564 -0.2377 0.6658 2.7099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y -0.05329 0.09199 -0.579 0.564
##
## Residual standard error: 0.9297 on 99 degrees of freedom
## Multiple R-squared: 0.003378, Adjusted R-squared: -0.006688
## F-statistic: 0.3356 on 1 and 99 DF, p-value: 0.5637
x=rnorm(100)
y=-sample(x)
head(x)
## [1] -0.6788538 -1.2351503 -1.0936383 0.9673890 1.6168616 -0.4077981
head(x)
## [1] -0.6788538 -1.2351503 -1.0936383 0.9673890 1.6168616 -0.4077981
head(y)
## [1] 0.2537250 -2.2477557 -0.2200013 -0.1415669 -0.4047063 -0.1539842
sum(x^2)
## [1] 115.4475
sum(y^2)
## [1] 115.4475