2.) The KNN regression method tries to predict the value of the output variable by using a local average. The KNN classification method attempts to predict the class to which the output variable belongs to by computing the local probability.
9.)
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.1.3
View(Auto)
auto = Auto
#A
plot(auto)
#B
cor(auto[,-9])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
#C
lm1 = lm(mpg ~ . - name, auto)
summary(lm1)
##
## Call:
## lm(formula = mpg ~ . - name, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
# i. The adjusted R-Squared of 0.82 suggests there is a fairly strong relationship.
# ii. Variables with statistically significant relationships are: displacement, weight, year, origin.
# iii. The coefficient for year suggests that as the year gets higher, the mpg gets higher.
lm2 = lm(mpg ~ weight + year + origin, auto) #simplified model based on variable significane
summary(lm2)
##
## Call:
## lm(formula = mpg ~ weight + year + origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.9440 -2.0948 -0.0389 1.7255 13.2722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.805e+01 4.001e+00 -4.510 8.60e-06 ***
## weight -5.994e-03 2.541e-04 -23.588 < 2e-16 ***
## year 7.571e-01 4.832e-02 15.668 < 2e-16 ***
## origin 1.150e+00 2.591e-01 4.439 1.18e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.348 on 388 degrees of freedom
## Multiple R-squared: 0.8175, Adjusted R-squared: 0.816
## F-statistic: 579.2 on 3 and 388 DF, p-value: < 2.2e-16
#D
par(mfrow=c(2,2))
plot(lm1)
abline(lm1, lwd = 3, col="red")
## Warning in abline(lm1, lwd = 3, col = "red"): only using the first two of 8
## regression coefficients
#There don't appear to be too many problems with the fit. There are a handful of outliers, including
#observations 323, 327, 326, and a few others. There is one observation, 14, with an unusually high leverage.
plot(lm2)
abline(lm2, lwd = 3, col="red")
## Warning in abline(lm2, lwd = 3, col = "red"): only using the first two of 4
## regression coefficients
#using the simplified model didn't alter results much, except for the leverage chart which looks much better.
#E
par(mfrow=c(1,1))
summary(lm(mpg ~ cylinders*displacement, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * displacement, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.0432 -2.4308 -0.2263 2.2048 20.9051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.22040 2.34712 20.545 < 2e-16 ***
## cylinders -2.41838 0.53456 -4.524 8.08e-06 ***
## displacement -0.13436 0.01615 -8.321 1.50e-15 ***
## cylinders:displacement 0.01182 0.00207 5.711 2.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.454 on 388 degrees of freedom
## Multiple R-squared: 0.6769, Adjusted R-squared: 0.6744
## F-statistic: 271 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*horsepower, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * horsepower, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5862 -2.1945 -0.5617 1.9541 16.3329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.815097 3.071314 23.708 <2e-16 ***
## cylinders -6.492462 0.510560 -12.716 <2e-16 ***
## horsepower -0.416007 0.034521 -12.051 <2e-16 ***
## cylinders:horsepower 0.047247 0.004732 9.984 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.094 on 388 degrees of freedom
## Multiple R-squared: 0.727, Adjusted R-squared: 0.7249
## F-statistic: 344.4 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*weight, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * weight, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.4916 -2.6225 -0.3927 1.7794 16.7087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.3864559 3.7333137 17.514 < 2e-16 ***
## cylinders -4.2097950 0.7238315 -5.816 1.26e-08 ***
## weight -0.0128348 0.0013628 -9.418 < 2e-16 ***
## cylinders:weight 0.0010979 0.0002101 5.226 2.83e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.165 on 388 degrees of freedom
## Multiple R-squared: 0.7174, Adjusted R-squared: 0.7152
## F-statistic: 328.3 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*acceleration, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * acceleration, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2257 -3.1788 -0.7045 2.4031 17.4642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.37192 5.27599 5.946 6.13e-09 ***
## cylinders -1.84692 0.85564 -2.159 0.0315 *
## acceleration 0.73498 0.33724 2.179 0.0299 *
## cylinders:acceleration -0.11179 0.05806 -1.926 0.0549 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.895 on 388 degrees of freedom
## Multiple R-squared: 0.6097, Adjusted R-squared: 0.6067
## F-statistic: 202 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*year, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.2164 -2.5792 -0.1558 2.2569 15.2532
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -61.61775 15.10277 -4.080 5.47e-05 ***
## cylinders 5.51044 2.73705 2.013 0.04478 *
## year 1.34054 0.19909 6.733 5.99e-11 ***
## cylinders:year -0.11350 0.03647 -3.112 0.00199 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.131 on 388 degrees of freedom
## Multiple R-squared: 0.722, Adjusted R-squared: 0.7199
## F-statistic: 335.9 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ cylinders * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.1561 -2.8078 -0.5199 2.1001 17.1922
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.6909 2.4851 16.374 < 2e-16 ***
## cylinders -3.8032 0.5042 -7.542 3.31e-13 ***
## origin -1.0195 1.8708 -0.545 0.586
## cylinders:origin 0.6592 0.4354 1.514 0.131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.771 on 388 degrees of freedom
## Multiple R-squared: 0.6292, Adjusted R-squared: 0.6263
## F-statistic: 219.4 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ displacement*horsepower, data = auto))
##
## Call:
## lm(formula = mpg ~ displacement * horsepower, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.9391 -2.3373 -0.5816 2.1698 17.5771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.305e+01 1.526e+00 34.77 <2e-16 ***
## displacement -9.805e-02 6.682e-03 -14.67 <2e-16 ***
## horsepower -2.343e-01 1.959e-02 -11.96 <2e-16 ***
## displacement:horsepower 5.828e-04 5.193e-05 11.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.944 on 388 degrees of freedom
## Multiple R-squared: 0.7466, Adjusted R-squared: 0.7446
## F-statistic: 381 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ displacement*weight, data = auto))
##
## Call:
## lm(formula = mpg ~ displacement * weight, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.8664 -2.4801 -0.3355 1.8071 17.9429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.372e+01 1.940e+00 27.697 < 2e-16 ***
## displacement -7.831e-02 1.131e-02 -6.922 1.85e-11 ***
## weight -8.931e-03 8.474e-04 -10.539 < 2e-16 ***
## displacement:weight 1.744e-05 2.789e-06 6.253 1.06e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.097 on 388 degrees of freedom
## Multiple R-squared: 0.7265, Adjusted R-squared: 0.7244
## F-statistic: 343.6 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ displacement*acceleration, data = auto))
##
## Call:
## lm(formula = mpg ~ displacement * acceleration, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.1540 -2.2872 -0.2687 2.0308 20.4099
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.0532678 2.9221224 7.889 3.13e-14 ***
## displacement 0.0031393 0.0113352 0.277 0.782
## acceleration 0.8303377 0.1815300 4.574 6.44e-06 ***
## displacement:acceleration -0.0045805 0.0007899 -5.799 1.38e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.456 on 388 degrees of freedom
## Multiple R-squared: 0.6766, Adjusted R-squared: 0.6741
## F-statistic: 270.5 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ displacement*year, data = auto))
##
## Call:
## lm(formula = mpg ~ displacement * year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.8530 -2.4250 -0.2234 2.0823 16.9933
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.288e+01 8.368e+00 -8.709 < 2e-16 ***
## displacement 2.523e-01 4.059e-02 6.216 1.32e-09 ***
## year 1.408e+00 1.102e-01 12.779 < 2e-16 ***
## displacement:year -4.080e-03 5.453e-04 -7.482 4.96e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.729 on 388 degrees of freedom
## Multiple R-squared: 0.7735, Adjusted R-squared: 0.7718
## F-statistic: 441.7 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ displacement*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ displacement * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.1742 -2.8223 -0.5893 2.2531 18.8420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 28.41854 1.53883 18.468 < 2e-16 ***
## displacement -0.01887 0.01082 -1.745 0.08183 .
## origin 4.79247 1.13249 4.232 2.9e-05 ***
## displacement:origin -0.03476 0.01010 -3.442 0.00064 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.526 on 388 degrees of freedom
## Multiple R-squared: 0.6664, Adjusted R-squared: 0.6638
## F-statistic: 258.3 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*weight, data = auto))
##
## Call:
## lm(formula = mpg ~ horsepower * weight, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7725 -2.2074 -0.2708 1.9973 14.7314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.356e+01 2.343e+00 27.127 < 2e-16 ***
## horsepower -2.508e-01 2.728e-02 -9.195 < 2e-16 ***
## weight -1.077e-02 7.738e-04 -13.921 < 2e-16 ***
## horsepower:weight 5.355e-05 6.649e-06 8.054 9.93e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared: 0.7484, Adjusted R-squared: 0.7465
## F-statistic: 384.8 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*acceleration, data = auto))
##
## Call:
## lm(formula = mpg ~ horsepower * acceleration, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.3442 -2.7324 -0.4049 2.4210 15.8840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.512440 3.420187 9.798 < 2e-16 ***
## horsepower 0.017590 0.027425 0.641 0.521664
## acceleration 0.800296 0.211899 3.777 0.000184 ***
## horsepower:acceleration -0.015698 0.002003 -7.838 4.45e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.426 on 388 degrees of freedom
## Multiple R-squared: 0.6809, Adjusted R-squared: 0.6784
## F-statistic: 275.9 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*year, data = auto))
##
## Call:
## lm(formula = mpg ~ horsepower * year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.3492 -2.4509 -0.4557 2.4056 14.4437
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.266e+02 1.212e+01 -10.449 <2e-16 ***
## horsepower 1.046e+00 1.154e-01 9.063 <2e-16 ***
## year 2.192e+00 1.613e-01 13.585 <2e-16 ***
## horsepower:year -1.596e-02 1.562e-03 -10.217 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.901 on 388 degrees of freedom
## Multiple R-squared: 0.7522, Adjusted R-squared: 0.7503
## F-statistic: 392.5 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ horsepower * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.8206 -3.1504 -0.5536 2.3682 15.2386
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.79098 1.69728 15.785 < 2e-16 ***
## horsepower -0.05942 0.01662 -3.574 0.000396 ***
## origin 7.87119 1.13907 6.910 2.00e-11 ***
## horsepower:origin -0.06338 0.01312 -4.832 1.95e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.424 on 388 degrees of freedom
## Multiple R-squared: 0.6812, Adjusted R-squared: 0.6788
## F-statistic: 276.4 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ weight*acceleration, data = auto))
##
## Call:
## lm(formula = mpg ~ weight * acceleration, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5823 -2.6411 -0.3517 2.2611 15.6704
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.814e+01 4.872e+00 5.776 1.57e-08 ***
## weight -3.168e-03 1.461e-03 -2.168 0.03076 *
## acceleration 1.117e+00 3.097e-01 3.608 0.00035 ***
## weight:acceleration -2.787e-04 9.694e-05 -2.875 0.00426 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.249 on 388 degrees of freedom
## Multiple R-squared: 0.706, Adjusted R-squared: 0.7037
## F-statistic: 310.5 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ weight*year, data = auto))
##
## Call:
## lm(formula = mpg ~ weight * year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.0397 -1.9956 -0.0983 1.6525 12.9896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.105e+02 1.295e+01 -8.531 3.30e-16 ***
## weight 2.755e-02 4.413e-03 6.242 1.14e-09 ***
## year 2.040e+00 1.718e-01 11.876 < 2e-16 ***
## weight:year -4.579e-04 5.907e-05 -7.752 8.02e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.193 on 388 degrees of freedom
## Multiple R-squared: 0.8339, Adjusted R-squared: 0.8326
## F-statistic: 649.3 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ weight*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ weight * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4126 -2.8476 -0.4004 2.1815 15.5139
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.8991363 2.2031615 17.656 < 2e-16 ***
## weight -0.0055411 0.0007845 -7.064 7.56e-12 ***
## origin 4.1312744 1.4980510 2.758 0.00609 **
## weight:origin -0.0012729 0.0006248 -2.037 0.04230 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.255 on 388 degrees of freedom
## Multiple R-squared: 0.7051, Adjusted R-squared: 0.7028
## F-statistic: 309.3 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ acceleration*year, data = auto))
##
## Call:
## lm(formula = mpg ~ acceleration * year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9341 -4.9339 -0.6187 4.7066 18.0828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -89.41449 34.07514 -2.624 0.00903 **
## acceleration 2.09675 2.17707 0.963 0.33609
## year 1.32728 0.45386 2.924 0.00365 **
## acceleration:year -0.01738 0.02885 -0.602 0.54727
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.026 on 388 degrees of freedom
## Multiple R-squared: 0.4085, Adjusted R-squared: 0.4039
## F-statistic: 89.31 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ acceleration*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ acceleration * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.220 -4.073 -1.296 3.411 18.082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3620 4.1728 0.806 0.42092
## acceleration 0.8036 0.2673 3.007 0.00281 **
## origin 3.7844 2.6919 1.406 0.16057
## acceleration:origin 0.0652 0.1674 0.390 0.69711
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.988 on 388 degrees of freedom
## Multiple R-squared: 0.4159, Adjusted R-squared: 0.4114
## F-statistic: 92.09 on 3 and 388 DF, p-value: < 2.2e-16
summary(lm(mpg ~ year*origin, data = auto))
##
## Call:
## lm(formula = mpg ~ year * origin, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.3141 -3.7120 -0.6513 3.3621 15.5859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83.3809 12.0000 -6.948 1.57e-11 ***
## year 1.3089 0.1576 8.305 1.68e-15 ***
## origin 17.3752 6.8325 2.543 0.0114 *
## year:origin -0.1663 0.0889 -1.871 0.0621 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.199 on 388 degrees of freedom
## Multiple R-squared: 0.5596, Adjusted R-squared: 0.5562
## F-statistic: 164.4 on 3 and 388 DF, p-value: < 2.2e-16
#All of the interactions appear to be significant except for the following:
#cylinders*acceleration, cylinders*origin, acceleration*year, acceleration*origin, year*origin
#F
lm3 = lm(mpg ~ weight + I(weight^2), data = auto)
lm.weight= lm(mpg ~ weight, data = auto)
anova(lm.weight, lm3)
## Analysis of Variance Table
##
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + I(weight^2)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 390 7321.2
## 2 389 6784.9 1 536.34 30.75 5.429e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with squared term is superior
lm.sqrt = lm(mpg ~ weight + sqrt(weight), data = auto)
anova(lm.weight, lm.sqrt)
## Analysis of Variance Table
##
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + sqrt(weight)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 390 7321.2
## 2 389 6800.9 1 520.35 29.764 8.702e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with square root is superior
lm.log = lm(mpg ~ weight + log(weight), data = auto)
anova(lm.weight, lm.log)
## Analysis of Variance Table
##
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + log(weight)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 390 7321.2
## 2 389 6812.2 1 509.05 29.069 1.214e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with log is superior
#It appears altering the variables by squaring, square-rooting, or taking the log can improve model performance
10.)
carseats = Carseats
#a.)
lm.car = lm(Sales ~ Price + Urban + US, data = carseats)
summary(lm.car)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
#b.) As price goes up, sales go down. If the store is in the US, sales are higher. The Urban variable is
# not significant.
#c.) Salesi=β0+β1⋅Pricei+β2⋅di+β3⋅ei+ϵi
# Where di=1 if store i is in an urban location and 0 otherwise
# and ei=1 if store i in the US 0 otherwise.
#d.) The "Urban" variable is not significant
#e.)
lm.car2 = lm(Sales ~ Price + US, data = carseats)
summary(lm.car2)
##
## Call:
## lm(formula = Sales ~ Price + US, data = carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
#f.) Based on the R-squared values, neither model is great. (roughly 0.24 for both)
#g.)
confint(lm.car2)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632
#h.)
par(mfrow=c(2,2))
plot(lm.car2)
#no
12.) a.) This circumstance holds true if the sum of squares of observed y values = sum of squares of observed x values.
#b.)
x <- rnorm(100)
y <- x^2
coefficients(lm(x ~ y))
## (Intercept) y
## -0.1519262 0.2260585
coefficients(lm(y ~ x))
## (Intercept) x
## 0.9999405 0.4949669
#c.)
x2 <- rnorm(100)
y2 <- x2
coefficients(lm(x2 ~ y2))
## (Intercept) y2
## 0 1
coefficients(lm(y2 ~ x2))
## (Intercept) x2
## 0 1