2.) The KNN regression method tries to predict the value of the output variable by using a local average. The KNN classification method attempts to predict the class to which the output variable belongs to by computing the local probability.

9.)

library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.1.3
View(Auto)
auto = Auto

#A
plot(auto)

#B
cor(auto[,-9])
##                     mpg  cylinders displacement horsepower     weight
## mpg           1.0000000 -0.7776175   -0.8051269 -0.7784268 -0.8322442
## cylinders    -0.7776175  1.0000000    0.9508233  0.8429834  0.8975273
## displacement -0.8051269  0.9508233    1.0000000  0.8972570  0.9329944
## horsepower   -0.7784268  0.8429834    0.8972570  1.0000000  0.8645377
## weight       -0.8322442  0.8975273    0.9329944  0.8645377  1.0000000
## acceleration  0.4233285 -0.5046834   -0.5438005 -0.6891955 -0.4168392
## year          0.5805410 -0.3456474   -0.3698552 -0.4163615 -0.3091199
## origin        0.5652088 -0.5689316   -0.6145351 -0.4551715 -0.5850054
##              acceleration       year     origin
## mpg             0.4233285  0.5805410  0.5652088
## cylinders      -0.5046834 -0.3456474 -0.5689316
## displacement   -0.5438005 -0.3698552 -0.6145351
## horsepower     -0.6891955 -0.4163615 -0.4551715
## weight         -0.4168392 -0.3091199 -0.5850054
## acceleration    1.0000000  0.2903161  0.2127458
## year            0.2903161  1.0000000  0.1815277
## origin          0.2127458  0.1815277  1.0000000
#C
lm1 = lm(mpg ~ . - name, auto)
summary(lm1)
## 
## Call:
## lm(formula = mpg ~ . - name, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5903 -2.1565 -0.1169  1.8690 13.0604 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.218435   4.644294  -3.707  0.00024 ***
## cylinders     -0.493376   0.323282  -1.526  0.12780    
## displacement   0.019896   0.007515   2.647  0.00844 ** 
## horsepower    -0.016951   0.013787  -1.230  0.21963    
## weight        -0.006474   0.000652  -9.929  < 2e-16 ***
## acceleration   0.080576   0.098845   0.815  0.41548    
## year           0.750773   0.050973  14.729  < 2e-16 ***
## origin         1.426141   0.278136   5.127 4.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8182 
## F-statistic: 252.4 on 7 and 384 DF,  p-value: < 2.2e-16
# i. The adjusted R-Squared of 0.82 suggests there is a fairly strong relationship.
# ii. Variables with statistically significant relationships are: displacement, weight, year, origin.
# iii. The coefficient for year suggests that as the year gets higher, the mpg gets higher.

lm2 = lm(mpg ~ weight + year + origin, auto) #simplified model based on variable significane
summary(lm2)
## 
## Call:
## lm(formula = mpg ~ weight + year + origin, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9440 -2.0948 -0.0389  1.7255 13.2722 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.805e+01  4.001e+00  -4.510 8.60e-06 ***
## weight      -5.994e-03  2.541e-04 -23.588  < 2e-16 ***
## year         7.571e-01  4.832e-02  15.668  < 2e-16 ***
## origin       1.150e+00  2.591e-01   4.439 1.18e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.348 on 388 degrees of freedom
## Multiple R-squared:  0.8175, Adjusted R-squared:  0.816 
## F-statistic: 579.2 on 3 and 388 DF,  p-value: < 2.2e-16
#D
par(mfrow=c(2,2))
plot(lm1)
abline(lm1, lwd = 3, col="red")
## Warning in abline(lm1, lwd = 3, col = "red"): only using the first two of 8
## regression coefficients

#There don't appear to be too many problems with the fit. There are a handful of outliers, including 
#observations 323, 327, 326, and a few others. There is one observation, 14, with an unusually high leverage.
plot(lm2)
abline(lm2, lwd = 3, col="red")
## Warning in abline(lm2, lwd = 3, col = "red"): only using the first two of 4
## regression coefficients

#using the simplified model didn't alter results much, except for the leverage chart which looks much better.


#E
par(mfrow=c(1,1))
summary(lm(mpg ~ cylinders*displacement, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * displacement, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.0432  -2.4308  -0.2263   2.2048  20.9051 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            48.22040    2.34712  20.545  < 2e-16 ***
## cylinders              -2.41838    0.53456  -4.524 8.08e-06 ***
## displacement           -0.13436    0.01615  -8.321 1.50e-15 ***
## cylinders:displacement  0.01182    0.00207   5.711 2.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.454 on 388 degrees of freedom
## Multiple R-squared:  0.6769, Adjusted R-squared:  0.6744 
## F-statistic:   271 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*horsepower, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * horsepower, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.5862  -2.1945  -0.5617   1.9541  16.3329 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          72.815097   3.071314  23.708   <2e-16 ***
## cylinders            -6.492462   0.510560 -12.716   <2e-16 ***
## horsepower           -0.416007   0.034521 -12.051   <2e-16 ***
## cylinders:horsepower  0.047247   0.004732   9.984   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.094 on 388 degrees of freedom
## Multiple R-squared:  0.727,  Adjusted R-squared:  0.7249 
## F-statistic: 344.4 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*weight, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * weight, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.4916  -2.6225  -0.3927   1.7794  16.7087 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      65.3864559  3.7333137  17.514  < 2e-16 ***
## cylinders        -4.2097950  0.7238315  -5.816 1.26e-08 ***
## weight           -0.0128348  0.0013628  -9.418  < 2e-16 ***
## cylinders:weight  0.0010979  0.0002101   5.226 2.83e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.165 on 388 degrees of freedom
## Multiple R-squared:  0.7174, Adjusted R-squared:  0.7152 
## F-statistic: 328.3 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*acceleration, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * acceleration, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2257  -3.1788  -0.7045   2.4031  17.4642 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            31.37192    5.27599   5.946 6.13e-09 ***
## cylinders              -1.84692    0.85564  -2.159   0.0315 *  
## acceleration            0.73498    0.33724   2.179   0.0299 *  
## cylinders:acceleration -0.11179    0.05806  -1.926   0.0549 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.895 on 388 degrees of freedom
## Multiple R-squared:  0.6097, Adjusted R-squared:  0.6067 
## F-statistic:   202 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*year, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * year, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2164  -2.5792  -0.1558   2.2569  15.2532 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -61.61775   15.10277  -4.080 5.47e-05 ***
## cylinders        5.51044    2.73705   2.013  0.04478 *  
## year             1.34054    0.19909   6.733 5.99e-11 ***
## cylinders:year  -0.11350    0.03647  -3.112  0.00199 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.131 on 388 degrees of freedom
## Multiple R-squared:  0.722,  Adjusted R-squared:  0.7199 
## F-statistic: 335.9 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ cylinders*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ cylinders * origin, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.1561  -2.8078  -0.5199   2.1001  17.1922 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       40.6909     2.4851  16.374  < 2e-16 ***
## cylinders         -3.8032     0.5042  -7.542 3.31e-13 ***
## origin            -1.0195     1.8708  -0.545    0.586    
## cylinders:origin   0.6592     0.4354   1.514    0.131    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.771 on 388 degrees of freedom
## Multiple R-squared:  0.6292, Adjusted R-squared:  0.6263 
## F-statistic: 219.4 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ displacement*horsepower, data = auto))
## 
## Call:
## lm(formula = mpg ~ displacement * horsepower, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9391  -2.3373  -0.5816   2.1698  17.5771 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              5.305e+01  1.526e+00   34.77   <2e-16 ***
## displacement            -9.805e-02  6.682e-03  -14.67   <2e-16 ***
## horsepower              -2.343e-01  1.959e-02  -11.96   <2e-16 ***
## displacement:horsepower  5.828e-04  5.193e-05   11.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.944 on 388 degrees of freedom
## Multiple R-squared:  0.7466, Adjusted R-squared:  0.7446 
## F-statistic:   381 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ displacement*weight, data = auto))
## 
## Call:
## lm(formula = mpg ~ displacement * weight, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.8664  -2.4801  -0.3355   1.8071  17.9429 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          5.372e+01  1.940e+00  27.697  < 2e-16 ***
## displacement        -7.831e-02  1.131e-02  -6.922 1.85e-11 ***
## weight              -8.931e-03  8.474e-04 -10.539  < 2e-16 ***
## displacement:weight  1.744e-05  2.789e-06   6.253 1.06e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.097 on 388 degrees of freedom
## Multiple R-squared:  0.7265, Adjusted R-squared:  0.7244 
## F-statistic: 343.6 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ displacement*acceleration, data = auto))
## 
## Call:
## lm(formula = mpg ~ displacement * acceleration, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.1540  -2.2872  -0.2687   2.0308  20.4099 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               23.0532678  2.9221224   7.889 3.13e-14 ***
## displacement               0.0031393  0.0113352   0.277    0.782    
## acceleration               0.8303377  0.1815300   4.574 6.44e-06 ***
## displacement:acceleration -0.0045805  0.0007899  -5.799 1.38e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.456 on 388 degrees of freedom
## Multiple R-squared:  0.6766, Adjusted R-squared:  0.6741 
## F-statistic: 270.5 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ displacement*year, data = auto))
## 
## Call:
## lm(formula = mpg ~ displacement * year, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8530  -2.4250  -0.2234   2.0823  16.9933 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -7.288e+01  8.368e+00  -8.709  < 2e-16 ***
## displacement       2.523e-01  4.059e-02   6.216 1.32e-09 ***
## year               1.408e+00  1.102e-01  12.779  < 2e-16 ***
## displacement:year -4.080e-03  5.453e-04  -7.482 4.96e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.729 on 388 degrees of freedom
## Multiple R-squared:  0.7735, Adjusted R-squared:  0.7718 
## F-statistic: 441.7 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ displacement*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ displacement * origin, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.1742  -2.8223  -0.5893   2.2531  18.8420 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         28.41854    1.53883  18.468  < 2e-16 ***
## displacement        -0.01887    0.01082  -1.745  0.08183 .  
## origin               4.79247    1.13249   4.232  2.9e-05 ***
## displacement:origin -0.03476    0.01010  -3.442  0.00064 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.526 on 388 degrees of freedom
## Multiple R-squared:  0.6664, Adjusted R-squared:  0.6638 
## F-statistic: 258.3 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*weight, data = auto))
## 
## Call:
## lm(formula = mpg ~ horsepower * weight, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7725  -2.2074  -0.2708   1.9973  14.7314 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.356e+01  2.343e+00  27.127  < 2e-16 ***
## horsepower        -2.508e-01  2.728e-02  -9.195  < 2e-16 ***
## weight            -1.077e-02  7.738e-04 -13.921  < 2e-16 ***
## horsepower:weight  5.355e-05  6.649e-06   8.054 9.93e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared:  0.7484, Adjusted R-squared:  0.7465 
## F-statistic: 384.8 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*acceleration, data = auto))
## 
## Call:
## lm(formula = mpg ~ horsepower * acceleration, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3442  -2.7324  -0.4049   2.4210  15.8840 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             33.512440   3.420187   9.798  < 2e-16 ***
## horsepower               0.017590   0.027425   0.641 0.521664    
## acceleration             0.800296   0.211899   3.777 0.000184 ***
## horsepower:acceleration -0.015698   0.002003  -7.838 4.45e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.426 on 388 degrees of freedom
## Multiple R-squared:  0.6809, Adjusted R-squared:  0.6784 
## F-statistic: 275.9 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*year, data = auto))
## 
## Call:
## lm(formula = mpg ~ horsepower * year, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.3492  -2.4509  -0.4557   2.4056  14.4437 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.266e+02  1.212e+01 -10.449   <2e-16 ***
## horsepower       1.046e+00  1.154e-01   9.063   <2e-16 ***
## year             2.192e+00  1.613e-01  13.585   <2e-16 ***
## horsepower:year -1.596e-02  1.562e-03 -10.217   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.901 on 388 degrees of freedom
## Multiple R-squared:  0.7522, Adjusted R-squared:  0.7503 
## F-statistic: 392.5 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ horsepower*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ horsepower * origin, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8206  -3.1504  -0.5536   2.3682  15.2386 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       26.79098    1.69728  15.785  < 2e-16 ***
## horsepower        -0.05942    0.01662  -3.574 0.000396 ***
## origin             7.87119    1.13907   6.910 2.00e-11 ***
## horsepower:origin -0.06338    0.01312  -4.832 1.95e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.424 on 388 degrees of freedom
## Multiple R-squared:  0.6812, Adjusted R-squared:  0.6788 
## F-statistic: 276.4 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ weight*acceleration, data = auto))
## 
## Call:
## lm(formula = mpg ~ weight * acceleration, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.5823  -2.6411  -0.3517   2.2611  15.6704 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.814e+01  4.872e+00   5.776 1.57e-08 ***
## weight              -3.168e-03  1.461e-03  -2.168  0.03076 *  
## acceleration         1.117e+00  3.097e-01   3.608  0.00035 ***
## weight:acceleration -2.787e-04  9.694e-05  -2.875  0.00426 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.249 on 388 degrees of freedom
## Multiple R-squared:  0.706,  Adjusted R-squared:  0.7037 
## F-statistic: 310.5 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ weight*year, data = auto))
## 
## Call:
## lm(formula = mpg ~ weight * year, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0397 -1.9956 -0.0983  1.6525 12.9896 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.105e+02  1.295e+01  -8.531 3.30e-16 ***
## weight       2.755e-02  4.413e-03   6.242 1.14e-09 ***
## year         2.040e+00  1.718e-01  11.876  < 2e-16 ***
## weight:year -4.579e-04  5.907e-05  -7.752 8.02e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.193 on 388 degrees of freedom
## Multiple R-squared:  0.8339, Adjusted R-squared:  0.8326 
## F-statistic: 649.3 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ weight*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ weight * origin, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.4126  -2.8476  -0.4004   2.1815  15.5139 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   38.8991363  2.2031615  17.656  < 2e-16 ***
## weight        -0.0055411  0.0007845  -7.064 7.56e-12 ***
## origin         4.1312744  1.4980510   2.758  0.00609 ** 
## weight:origin -0.0012729  0.0006248  -2.037  0.04230 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.255 on 388 degrees of freedom
## Multiple R-squared:  0.7051, Adjusted R-squared:  0.7028 
## F-statistic: 309.3 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ acceleration*year, data = auto))
## 
## Call:
## lm(formula = mpg ~ acceleration * year, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.9341  -4.9339  -0.6187   4.7066  18.0828 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -89.41449   34.07514  -2.624  0.00903 **
## acceleration        2.09675    2.17707   0.963  0.33609   
## year                1.32728    0.45386   2.924  0.00365 **
## acceleration:year  -0.01738    0.02885  -0.602  0.54727   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.026 on 388 degrees of freedom
## Multiple R-squared:  0.4085, Adjusted R-squared:  0.4039 
## F-statistic: 89.31 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ acceleration*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ acceleration * origin, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.220  -4.073  -1.296   3.411  18.082 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)           3.3620     4.1728   0.806  0.42092   
## acceleration          0.8036     0.2673   3.007  0.00281 **
## origin                3.7844     2.6919   1.406  0.16057   
## acceleration:origin   0.0652     0.1674   0.390  0.69711   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.988 on 388 degrees of freedom
## Multiple R-squared:  0.4159, Adjusted R-squared:  0.4114 
## F-statistic: 92.09 on 3 and 388 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ year*origin, data = auto))
## 
## Call:
## lm(formula = mpg ~ year * origin, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.3141  -3.7120  -0.6513   3.3621  15.5859 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -83.3809    12.0000  -6.948 1.57e-11 ***
## year          1.3089     0.1576   8.305 1.68e-15 ***
## origin       17.3752     6.8325   2.543   0.0114 *  
## year:origin  -0.1663     0.0889  -1.871   0.0621 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.199 on 388 degrees of freedom
## Multiple R-squared:  0.5596, Adjusted R-squared:  0.5562 
## F-statistic: 164.4 on 3 and 388 DF,  p-value: < 2.2e-16
#All of the interactions appear to be significant except for the following: 
#cylinders*acceleration, cylinders*origin, acceleration*year, acceleration*origin, year*origin

#F
lm3 = lm(mpg ~ weight + I(weight^2), data = auto)
lm.weight= lm(mpg ~ weight, data = auto)
anova(lm.weight, lm3)
## Analysis of Variance Table
## 
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + I(weight^2)
##   Res.Df    RSS Df Sum of Sq     F    Pr(>F)    
## 1    390 7321.2                                 
## 2    389 6784.9  1    536.34 30.75 5.429e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with squared term is superior
lm.sqrt = lm(mpg ~ weight + sqrt(weight), data = auto)
anova(lm.weight, lm.sqrt)
## Analysis of Variance Table
## 
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + sqrt(weight)
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    390 7321.2                                  
## 2    389 6800.9  1    520.35 29.764 8.702e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with square root is superior
lm.log = lm(mpg ~ weight + log(weight), data = auto)
anova(lm.weight, lm.log)
## Analysis of Variance Table
## 
## Model 1: mpg ~ weight
## Model 2: mpg ~ weight + log(weight)
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1    390 7321.2                                  
## 2    389 6812.2  1    509.05 29.069 1.214e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Model with log is superior
#It appears altering the variables by squaring, square-rooting, or taking the log can improve model performance

10.)

carseats = Carseats
#a.)
lm.car = lm(Sales ~ Price + Urban + US, data = carseats)
summary(lm.car)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
#b.) As price goes up, sales go down. If the store is in the US, sales are higher. The Urban variable is
# not significant.

#c.) Salesi=β0+β1⋅Pricei+β2⋅di+β3⋅ei+ϵi
#    Where di=1 if store i is in an urban location and 0 otherwise 
#    and ei=1 if store i in the US 0 otherwise.

#d.) The "Urban" variable is not significant

#e.) 
lm.car2 = lm(Sales ~ Price + US, data = carseats)
summary(lm.car2)
## 
## Call:
## lm(formula = Sales ~ Price + US, data = carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16
#f.) Based on the R-squared values, neither model is great. (roughly 0.24 for both)

#g.) 
confint(lm.car2)
##                   2.5 %      97.5 %
## (Intercept) 11.79032020 14.27126531
## Price       -0.06475984 -0.04419543
## USYes        0.69151957  1.70776632
#h.) 
par(mfrow=c(2,2))
plot(lm.car2)

#no

12.) a.) This circumstance holds true if the sum of squares of observed y values = sum of squares of observed x values.

#b.)
x <- rnorm(100)
y <- x^2

coefficients(lm(x ~ y))
## (Intercept)           y 
##  -0.1519262   0.2260585
coefficients(lm(y ~ x))
## (Intercept)           x 
##   0.9999405   0.4949669
#c.)
x2 <- rnorm(100)
y2 <- x2

coefficients(lm(x2 ~ y2))
## (Intercept)          y2 
##           0           1
coefficients(lm(y2 ~ x2))
## (Intercept)          x2 
##           0           1