Problem 2

KNN Classifier – this identifies the closest items to K to determine the intended item. In the case of K=3, you would find information about 3 closest neighbors to determine the information.

KNN Regression Methods – This is the method in which we model as opposed to linear regression

Problem 9

Auto <- read.table("~/1 Data/Auto.data", stringsAsFactors = T, na.strings = "?", header = T)
attach(Auto)
par(mfrow = c(2,2))
summary(Auto)
##       mpg          cylinders      displacement     horsepower        weight    
##  Min.   : 9.00   Min.   :3.000   Min.   : 68.0   Min.   : 46.0   Min.   :1613  
##  1st Qu.:17.50   1st Qu.:4.000   1st Qu.:104.0   1st Qu.: 75.0   1st Qu.:2223  
##  Median :23.00   Median :4.000   Median :146.0   Median : 93.5   Median :2800  
##  Mean   :23.52   Mean   :5.458   Mean   :193.5   Mean   :104.5   Mean   :2970  
##  3rd Qu.:29.00   3rd Qu.:8.000   3rd Qu.:262.0   3rd Qu.:126.0   3rd Qu.:3609  
##  Max.   :46.60   Max.   :8.000   Max.   :455.0   Max.   :230.0   Max.   :5140  
##                                                  NA's   :5                     
##   acceleration        year           origin                  name    
##  Min.   : 8.00   Min.   :70.00   Min.   :1.000   ford pinto    :  6  
##  1st Qu.:13.80   1st Qu.:73.00   1st Qu.:1.000   amc matador   :  5  
##  Median :15.50   Median :76.00   Median :1.000   ford maverick :  5  
##  Mean   :15.56   Mean   :75.99   Mean   :1.574   toyota corolla:  5  
##  3rd Qu.:17.10   3rd Qu.:79.00   3rd Qu.:2.000   amc gremlin   :  4  
##  Max.   :24.80   Max.   :82.00   Max.   :3.000   amc hornet    :  4  
##                                                  (Other)       :368
pairs(Auto)

(b)

cor(Auto[,-9], use = "pairwise.complete.obs")
##                     mpg  cylinders displacement horsepower     weight
## mpg           1.0000000 -0.7762599   -0.8044430 -0.7784268 -0.8317389
## cylinders    -0.7762599  1.0000000    0.9509199  0.8429834  0.8970169
## displacement -0.8044430  0.9509199    1.0000000  0.8972570  0.9331044
## horsepower   -0.7784268  0.8429834    0.8972570  1.0000000  0.8645377
## weight       -0.8317389  0.8970169    0.9331044  0.8645377  1.0000000
## acceleration  0.4222974 -0.5040606   -0.5441618 -0.6891955 -0.4195023
## year          0.5814695 -0.3467172   -0.3698041 -0.4163615 -0.3079004
## origin        0.5636979 -0.5649716   -0.6106643 -0.4551715 -0.5812652
##              acceleration       year     origin
## mpg             0.4222974  0.5814695  0.5636979
## cylinders      -0.5040606 -0.3467172 -0.5649716
## displacement   -0.5441618 -0.3698041 -0.6106643
## horsepower     -0.6891955 -0.4163615 -0.4551715
## weight         -0.4195023 -0.3079004 -0.5812652
## acceleration    1.0000000  0.2829009  0.2100836
## year            0.2829009  1.0000000  0.1843141
## origin          0.2100836  0.1843141  1.0000000
lm.fit=lm(mpg~.,data = Auto[,-9])
summary(lm.fit)
## 
## Call:
## lm(formula = mpg ~ ., data = Auto[, -9])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5903 -2.1565 -0.1169  1.8690 13.0604 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.218435   4.644294  -3.707  0.00024 ***
## cylinders     -0.493376   0.323282  -1.526  0.12780    
## displacement   0.019896   0.007515   2.647  0.00844 ** 
## horsepower    -0.016951   0.013787  -1.230  0.21963    
## weight        -0.006474   0.000652  -9.929  < 2e-16 ***
## acceleration   0.080576   0.098845   0.815  0.41548    
## year           0.750773   0.050973  14.729  < 2e-16 ***
## origin         1.426141   0.278136   5.127 4.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 384 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8182 
## F-statistic: 252.4 on 7 and 384 DF,  p-value: < 2.2e-16

i.There are relationships between the predictors and response. ii.Displacement, Acceleration, Year, and origin all have a significant relationship iii.That newer cars have increased MPG. (d)

plot(lm.fit)

Residuals vs fitted start off very close to the abline, but start to balloon out toward the right. This also shows a non-linear abline

Normal Q-Q appears to be very linear and normally distributed

Outliers appear to be very inconsistent here at first, but they all fall within the standard deviation, so no issues here.

Residuals vs Leverage plot shows that even with observation 14 removed, it doesn’t appear it would change the model drastically.

interact.fit1 = lm(mpg~.-name + horsepower*displacement, data=Auto)
interact.fit2 = lm(mpg~.-name + horsepower*origin, data=Auto)
interact.fit3 = lm(mpg~.-name + displacement*origin, data=Auto)
interact.fit4 = lm(mpg~.-name + weight*origin, data=Auto)
interact.fit5 = lm(mpg~.-name + weight*displacement, data=Auto)
interact.fit6 = lm(mpg~.-name + year*origin, data=Auto)
interact.fit7 = lm(mpg~.-name + acceleration*origin, data=Auto)
interact.fit8 = lm(mpg~.-name + acceleration*weight, data=Auto)
interact.fit9 = lm(mpg~.-name + acceleration*year, data=Auto)
interact.fit10 = lm(mpg~.-name + acceleration*displacement, data=Auto)
interact.fit11 = lm(mpg~.-name + acceleration*horsepower, data=Auto)
summary(interact.fit1)
## 
## Call:
## lm(formula = mpg ~ . - name + horsepower * displacement, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7010 -1.6009 -0.0967  1.4119 12.6734 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -1.894e+00  4.302e+00  -0.440  0.66007    
## cylinders                6.466e-01  3.017e-01   2.143  0.03275 *  
## displacement            -7.487e-02  1.092e-02  -6.859 2.80e-11 ***
## horsepower              -1.975e-01  2.052e-02  -9.624  < 2e-16 ***
## weight                  -3.147e-03  6.475e-04  -4.861 1.71e-06 ***
## acceleration            -2.131e-01  9.062e-02  -2.351  0.01921 *  
## year                     7.379e-01  4.463e-02  16.534  < 2e-16 ***
## origin                   6.891e-01  2.527e-01   2.727  0.00668 ** 
## displacement:horsepower  5.236e-04  4.813e-05  10.878  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.912 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8636, Adjusted R-squared:  0.8608 
## F-statistic: 303.1 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit2)
## 
## Call:
## lm(formula = mpg ~ . - name + horsepower * origin, data = Auto)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.277 -1.875 -0.225  1.570 12.080 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -2.196e+01  4.396e+00  -4.996 8.94e-07 ***
## cylinders         -5.275e-01  3.028e-01  -1.742   0.0823 .  
## displacement      -1.486e-03  7.607e-03  -0.195   0.8452    
## horsepower         8.173e-02  1.856e-02   4.404 1.38e-05 ***
## weight            -4.710e-03  6.555e-04  -7.186 3.52e-12 ***
## acceleration      -1.124e-01  9.617e-02  -1.168   0.2434    
## year               7.327e-01  4.780e-02  15.328  < 2e-16 ***
## origin             7.695e+00  8.858e-01   8.687  < 2e-16 ***
## horsepower:origin -7.955e-02  1.074e-02  -7.405 8.44e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.116 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8438, Adjusted R-squared:  0.8406 
## F-statistic: 258.7 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit3)
## 
## Call:
## lm(formula = mpg ~ . - name + displacement * origin, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2312  -2.0927  -0.1295   1.7287  12.2761 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -2.140e+01  4.827e+00  -4.434 1.21e-05 ***
## cylinders           -4.731e-01  3.204e-01  -1.477 0.140577    
## displacement         3.802e-02  9.768e-03   3.892 0.000117 ***
## horsepower          -1.523e-02  1.367e-02  -1.114 0.265920    
## weight              -5.940e-03  6.723e-04  -8.835  < 2e-16 ***
## acceleration         4.578e-02  9.868e-02   0.464 0.642939    
## year                 7.682e-01  5.087e-02  15.103  < 2e-16 ***
## origin               3.810e+00  8.762e-01   4.349 1.76e-05 ***
## displacement:origin -2.262e-02  7.891e-03  -2.866 0.004383 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.297 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8252, Adjusted R-squared:  0.8216 
## F-statistic: 226.1 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit4)
## 
## Call:
## lm(formula = mpg ~ . - name + weight * origin, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6056  -2.0253  -0.1315   1.6295  12.5217 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -2.482e+01  4.891e+00  -5.075 6.06e-07 ***
## cylinders     -5.799e-01  3.171e-01  -1.829   0.0682 .  
## displacement   8.833e-03  7.810e-03   1.131   0.2588    
## horsepower    -1.068e-02  1.358e-02  -0.787   0.4321    
## weight        -2.782e-03  1.084e-03  -2.566   0.0107 *  
## acceleration   3.766e-02  9.729e-02   0.387   0.6989    
## year           7.632e-01  4.998e-02  15.270  < 2e-16 ***
## origin         6.633e+00  1.265e+00   5.244 2.60e-07 ***
## weight:origin -2.309e-03  5.477e-04  -4.215 3.11e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.257 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8294, Adjusted R-squared:  0.8258 
## F-statistic: 232.7 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit5)
## 
## Call:
## lm(formula = mpg ~ . - name + weight * displacement, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.9027 -1.8092 -0.0946  1.5549 12.1687 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.389e+00  4.301e+00  -1.253   0.2109    
## cylinders            1.175e-01  2.943e-01   0.399   0.6899    
## displacement        -6.837e-02  1.104e-02  -6.193 1.52e-09 ***
## horsepower          -3.280e-02  1.238e-02  -2.649   0.0084 ** 
## weight              -1.064e-02  7.136e-04 -14.915  < 2e-16 ***
## acceleration         6.724e-02  8.805e-02   0.764   0.4455    
## year                 7.852e-01  4.553e-02  17.246  < 2e-16 ***
## origin               5.610e-01  2.622e-01   2.139   0.0331 *  
## displacement:weight  2.269e-05  2.257e-06  10.054  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.964 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8588, Adjusted R-squared:  0.8558 
## F-statistic: 291.1 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit6)
## 
## Call:
## lm(formula = mpg ~ . - name + year * origin, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6072 -2.0439 -0.0596  1.7121 12.3368 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.492e+00  9.044e+00   0.939 0.348353    
## cylinders    -5.042e-01  3.192e-01  -1.579 0.115082    
## displacement  1.567e-02  7.530e-03   2.081 0.038060 *  
## horsepower   -1.399e-02  1.364e-02  -1.025 0.305786    
## weight       -6.352e-03  6.449e-04  -9.851  < 2e-16 ***
## acceleration  9.185e-02  9.766e-02   0.941 0.347546    
## year          4.189e-01  1.125e-01   3.723 0.000226 ***
## origin       -1.405e+01  4.699e+00  -2.989 0.002978 ** 
## year:origin   1.989e-01  6.030e-02   3.298 0.001064 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.286 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8264, Adjusted R-squared:  0.8228 
## F-statistic: 227.9 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit7)
## 
## Call:
## lm(formula = mpg ~ . - name + acceleration * origin, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4106 -1.8805 -0.2471  1.7891 11.9680 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.3327273  5.0570077  -0.066   0.9476    
## cylinders           -0.5881258  0.3063127  -1.920   0.0556 .  
## displacement         0.0086251  0.0073062   1.181   0.2385    
## horsepower          -0.0250843  0.0131049  -1.914   0.0564 .  
## weight              -0.0052351  0.0006439  -8.131 5.98e-15 ***
## acceleration        -1.0340600  0.1896960  -5.451 8.98e-08 ***
## year                 0.7623813  0.0482774  15.792  < 2e-16 ***
## origin              -9.3089774  1.6109675  -5.779 1.56e-08 ***
## acceleration:origin  0.6546959  0.0969263   6.755 5.34e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.15 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8405, Adjusted R-squared:  0.8371 
## F-statistic: 252.2 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit8)
## 
## Call:
## lm(formula = mpg ~ . - name + acceleration * weight, data = Auto)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.247 -2.048 -0.045  1.619 12.193 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -4.364e+01  5.811e+00  -7.511 4.18e-13 ***
## cylinders           -2.141e-01  3.078e-01  -0.696 0.487117    
## displacement         3.138e-03  7.495e-03   0.419 0.675622    
## horsepower          -4.141e-02  1.348e-02  -3.071 0.002287 ** 
## weight               4.027e-03  1.636e-03   2.462 0.014268 *  
## acceleration         1.629e+00  2.422e-01   6.726 6.36e-11 ***
## year                 7.821e-01  4.833e-02  16.184  < 2e-16 ***
## origin               1.033e+00  2.686e-01   3.846 0.000141 ***
## weight:acceleration -5.826e-04  8.408e-05  -6.928 1.81e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.141 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8414, Adjusted R-squared:  0.838 
## F-statistic: 253.9 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit9)
## 
## Call:
## lm(formula = mpg ~ . - name + acceleration * year, data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2277  -2.1428  -0.0839   1.8216  12.2997 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       97.6199866 20.8291407   4.687 3.86e-06 ***
## cylinders         -0.2163880  0.3148719  -0.687  0.49236    
## displacement       0.0087776  0.0074936   1.171  0.24219    
## horsepower        -0.0269212  0.0133812  -2.012  0.04493 *  
## weight            -0.0058738  0.0006363  -9.232  < 2e-16 ***
## acceleration      -7.1818103  1.2900806  -5.567 4.88e-08 ***
## year              -0.7461986  0.2696927  -2.767  0.00593 ** 
## origin             1.2630333  0.2691459   4.693 3.76e-06 ***
## acceleration:year  0.0945502  0.0167501   5.645 3.22e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.202 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8352, Adjusted R-squared:  0.8317 
## F-statistic: 242.6 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit10)
## 
## Call:
## lm(formula = mpg ~ . - name + acceleration * displacement, data = Auto)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.129 -1.899 -0.135  1.755 12.119 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -3.005e+01  4.737e+00  -6.343 6.36e-10 ***
## cylinders                  2.136e-03  3.125e-01   0.007 0.994550    
## displacement               7.022e-02  1.005e-02   6.989 1.24e-11 ***
## horsepower                -5.515e-02  1.407e-02  -3.920 0.000105 ***
## weight                    -4.211e-03  6.929e-04  -6.077 2.96e-09 ***
## acceleration               7.530e-01  1.332e-01   5.653 3.09e-08 ***
## year                       7.722e-01  4.811e-02  16.051  < 2e-16 ***
## origin                     1.057e+00  2.671e-01   3.958 9.01e-05 ***
## displacement:acceleration -4.855e-03  6.879e-04  -7.058 7.99e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.134 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.842,  Adjusted R-squared:  0.8387 
## F-statistic: 255.2 on 8 and 383 DF,  p-value: < 2.2e-16
summary(interact.fit11)
## 
## Call:
## lm(formula = mpg ~ . - name + acceleration * horsepower, data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.0329 -1.8177 -0.1183  1.7247 12.4870 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -32.499820   4.923380  -6.601 1.36e-10 ***
## cylinders                 0.083489   0.316913   0.263 0.792350    
## displacement             -0.007649   0.008161  -0.937 0.349244    
## horsepower                0.127188   0.024746   5.140 4.40e-07 ***
## weight                   -0.003976   0.000716  -5.552 5.27e-08 ***
## acceleration              0.983282   0.161513   6.088 2.78e-09 ***
## year                      0.755919   0.048179  15.690  < 2e-16 ***
## origin                    1.035733   0.268962   3.851 0.000138 ***
## horsepower:acceleration  -0.012139   0.001772  -6.851 2.93e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.145 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.841,  Adjusted R-squared:  0.8376 
## F-statistic: 253.2 on 8 and 383 DF,  p-value: < 2.2e-16

Yes, displacement and horse as well as displacement and weight power have a T stat value.

summary(lm(mpg ~ . -name + log(acceleration), data=Auto))
## 
## Call:
## lm(formula = mpg ~ . - name + log(acceleration), data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.7931 -2.0052 -0.1279  1.9299 13.1085 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.552e+01  1.479e+01   3.077  0.00224 ** 
## cylinders         -2.796e-01  3.193e-01  -0.876  0.38172    
## displacement       8.042e-03  7.805e-03   1.030  0.30344    
## horsepower        -3.434e-02  1.401e-02  -2.450  0.01473 *  
## weight            -5.343e-03  6.854e-04  -7.795 6.15e-14 ***
## acceleration       2.167e+00  4.782e-01   4.532 7.82e-06 ***
## year               7.560e-01  4.978e-02  15.186  < 2e-16 ***
## origin             1.329e+00  2.724e-01   4.877 1.58e-06 ***
## log(acceleration) -3.513e+01  7.886e+00  -4.455 1.10e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.249 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8303, Adjusted R-squared:  0.8267 
## F-statistic: 234.2 on 8 and 383 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ . -name + sqrt(displacement), data=Auto))
## 
## Call:
## lm(formula = mpg ~ . - name + sqrt(displacement), data = Auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.6874  -1.8084  -0.0046   1.6599  11.7939 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        13.3425501  5.4548602   2.446 0.014895 *  
## cylinders           0.1791258  0.3043002   0.589 0.556444    
## displacement        0.1628037  0.0174800   9.314  < 2e-16 ***
## horsepower         -0.0479788  0.0130456  -3.678 0.000269 ***
## weight             -0.0044272  0.0006375  -6.944 1.64e-11 ***
## acceleration       -0.0160378  0.0907715  -0.177 0.859851    
## year                0.7781542  0.0465748  16.708  < 2e-16 ***
## origin              0.4130678  0.2780263   1.486 0.138177    
## sqrt(displacement) -4.7444871  0.5338893  -8.887  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.034 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.852,  Adjusted R-squared:  0.8489 
## F-statistic: 275.6 on 8 and 383 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ . -name + I(horsepower^2), data=Auto))
## 
## Call:
## lm(formula = mpg ~ . - name + I(horsepower^2), data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.5497 -1.7311 -0.2236  1.5877 11.9955 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.3236564  4.6247696   0.286 0.774872    
## cylinders        0.3489063  0.3048310   1.145 0.253094    
## displacement    -0.0075649  0.0073733  -1.026 0.305550    
## horsepower      -0.3194633  0.0343447  -9.302  < 2e-16 ***
## weight          -0.0032712  0.0006787  -4.820 2.07e-06 ***
## acceleration    -0.3305981  0.0991849  -3.333 0.000942 ***
## year             0.7353414  0.0459918  15.989  < 2e-16 ***
## origin           1.0144130  0.2545545   3.985 8.08e-05 ***
## I(horsepower^2)  0.0010060  0.0001065   9.449  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.001 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8552, Adjusted R-squared:  0.8522 
## F-statistic: 282.8 on 8 and 383 DF,  p-value: < 2.2e-16
summary(lm(mpg ~ . -name + log(weight), data=Auto))
## 
## Call:
## lm(formula = mpg ~ . - name + log(weight), data = Auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.6516 -1.6398 -0.1671  1.5973 12.7247 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  269.474171  31.136919   8.654  < 2e-16 ***
## cylinders     -0.498204   0.292415  -1.704  0.08924 .  
## displacement   0.013527   0.006832   1.980  0.04843 *  
## horsepower    -0.022137   0.012483  -1.773  0.07696 .  
## weight         0.007657   0.001631   4.694 3.73e-06 ***
## acceleration   0.045763   0.089486   0.511  0.60936    
## year           0.797808   0.046383  17.200  < 2e-16 ***
## origin         0.719552   0.262819   2.738  0.00647 ** 
## log(weight)  -41.320927   4.446725  -9.292  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.01 on 383 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.8543, Adjusted R-squared:  0.8513 
## F-statistic: 280.8 on 8 and 383 DF,  p-value: < 2.2e-16

The weight of the car vs log(weight) has a drastic impact.

Problem 10

Carseats<- read.csv("~/1 Data/Carseats.csv", stringsAsFactors = T, header = T, na.strings = "?")
View(Carseats)
par(mfrow = c(2,2))
summary(Carseats)
##      Sales          CompPrice       Income        Advertising    
##  Min.   : 0.000   Min.   : 77   Min.   : 21.00   Min.   : 0.000  
##  1st Qu.: 5.390   1st Qu.:115   1st Qu.: 42.75   1st Qu.: 0.000  
##  Median : 7.490   Median :125   Median : 69.00   Median : 5.000  
##  Mean   : 7.496   Mean   :125   Mean   : 68.66   Mean   : 6.635  
##  3rd Qu.: 9.320   3rd Qu.:135   3rd Qu.: 91.00   3rd Qu.:12.000  
##  Max.   :16.270   Max.   :175   Max.   :120.00   Max.   :29.000  
##    Population        Price        ShelveLoc        Age          Education   
##  Min.   : 10.0   Min.   : 24.0   Bad   : 96   Min.   :25.00   Min.   :10.0  
##  1st Qu.:139.0   1st Qu.:100.0   Good  : 85   1st Qu.:39.75   1st Qu.:12.0  
##  Median :272.0   Median :117.0   Medium:219   Median :54.50   Median :14.0  
##  Mean   :264.8   Mean   :115.8                Mean   :53.32   Mean   :13.9  
##  3rd Qu.:398.5   3rd Qu.:131.0                3rd Qu.:66.00   3rd Qu.:16.0  
##  Max.   :509.0   Max.   :191.0                Max.   :80.00   Max.   :18.0  
##  Urban       US     
##  No :118   No :142  
##  Yes:282   Yes:258  
##                     
##                     
##                     
## 
attach(Carseats)
lm.fit = lm(Sales ~ Price+Urban+US, data= Carseats)
summary(lm.fit)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
  1. Price – It appears that there is an inverse relationship between price and sales. I.E. if price goes up, sales go down, but if sales go up, price goes down.

  2. UrbanYes – There does not seem to be a strong relationship here.

  3. USYes – There seems to be a relationship to sales and whether the store is in the U.S. The Estimate shows there are likely to be ~1,200 more units sold for U.S. stores (1.200573*1000)

  1. Sales = 13.04-.05Price-.02UrbanYes+1.20USYes

  2. Price and US

lm.fit2 = lm(Sales ~ Price + US, data = Carseats)
summary(lm.fit2)
## 
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16

(f)The models seem to be very close, neither fitting much better than the other. (g)

confint(lm.fit2)
##                   2.5 %      97.5 %
## (Intercept) 11.79032020 14.27126531
## Price       -0.06475984 -0.04419543
## USYes        0.69151957  1.70776632

(h)There do not appear to be any outliers or high leverage observations

plot(lm.fit2)

Problem 12 (a)Pass (b)

x<-1:100
sum(x^2)
## [1] 338350
y <- 2 * x + rnorm(100, sd = 0.1)
sum(y^2)
## [1] 1353395
fit.y <- lm(y ~ x + 0)
fit.x <- lm(x ~ y + 0)
summary(fit.y)
## 
## Call:
## lm(formula = y ~ x + 0)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.227533 -0.070909  0.002441  0.078863  0.203463 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## x 1.9999958  0.0001679   11909   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09769 on 99 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.418e+08 on 1 and 99 DF,  p-value: < 2.2e-16
summary(fit.x)
## 
## Call:
## lm(formula = x ~ y + 0)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.101710 -0.039425 -0.001187  0.035474  0.113785 
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)    
## y 5.000e-01  4.199e-05   11909   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04885 on 99 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.418e+08 on 1 and 99 DF,  p-value: < 2.2e-16
x <- 1:100
y <- 1:100
fit.y <- lm(y ~ x + 0)
fit.x <- lm(x ~ x + 0)
## Warning in model.matrix.default(mt, mf, contrasts): the response appeared on the
## right-hand side and was dropped
## Warning in model.matrix.default(mt, mf, contrasts): problem with term 1 in
## model.matrix: no columns are assigned
summary(fit.y)
## Warning in summary.lm(fit.y): essentially perfect fit: summary may be unreliable
## 
## Call:
## lm(formula = y ~ x + 0)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.082e-13 -2.094e-15  2.900e-17  2.218e-15  1.294e-14 
## 
## Coefficients:
##    Estimate Std. Error   t value Pr(>|t|)    
## x 1.000e+00  5.379e-17 1.859e+16   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.129e-14 on 99 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 3.457e+32 on 1 and 99 DF,  p-value: < 2.2e-16
summary(fit.x)
## 
## Call:
## lm(formula = x ~ x + 0)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##   1.00  25.75  50.50  75.25 100.00 
## 
## No Coefficients
## 
## Residual standard error: 58.17 on 100 degrees of freedom