Question 2

Auto <- read.csv("https://www.statlearning.com/s/Auto.csv", 
                 header = TRUE, na.strings = "?")
Auto <- na.omit(Auto)
plot(Auto)

Auto_no_name <- Auto[, 1:8]
cor(Auto_no_name)
##                     mpg  cylinders displacement horsepower     weight
## mpg           1.0000000 -0.7776175   -0.8051269 -0.7784268 -0.8322442
## cylinders    -0.7776175  1.0000000    0.9508233  0.8429834  0.8975273
## displacement -0.8051269  0.9508233    1.0000000  0.8972570  0.9329944
## horsepower   -0.7784268  0.8429834    0.8972570  1.0000000  0.8645377
## weight       -0.8322442  0.8975273    0.9329944  0.8645377  1.0000000
## acceleration  0.4233285 -0.5046834   -0.5438005 -0.6891955 -0.4168392
## year          0.5805410 -0.3456474   -0.3698552 -0.4163615 -0.3091199
## origin        0.5652088 -0.5689316   -0.6145351 -0.4551715 -0.5850054
##              acceleration       year     origin
## mpg             0.4233285  0.5805410  0.5652088
## cylinders      -0.5046834 -0.3456474 -0.5689316
## displacement   -0.5438005 -0.3698552 -0.6145351
## horsepower     -0.6891955 -0.4163615 -0.4551715
## weight         -0.4168392 -0.3091199 -0.5850054
## acceleration    1.0000000  0.2903161  0.2127458
## year            0.2903161  1.0000000  0.1815277
## origin          0.2127458  0.1815277  1.0000000
mpg_regression <- lm(mpg ~ cylinders + displacement + horsepower + weight + acceleration + year + origin, data = Auto_no_name)
summary(mpg_regression)
## 
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight + 
##     acceleration + year + origin, data = Auto_no_name)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5903 -2.1565 -0.1169  1.8690 13.0604 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.218435   4.644294  -3.707  0.00024 ***
## cylinders     -0.493376   0.323282  -1.526  0.12780    
## displacement   0.019896   0.007515   2.647  0.00844 ** 
## horsepower    -0.016951   0.013787  -1.230  0.21963    
## weight        -0.006474   0.000652  -9.929  < 2e-16 ***
## acceleration   0.080576   0.098845   0.815  0.41548    
## year           0.750773   0.050973  14.729  < 2e-16 ***
## origin         1.426141   0.278136   5.127 4.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8182 
## F-statistic: 252.4 on 7 and 384 DF,  p-value: < 2.2e-16
plot(mpg_regression)

mpg_regression_interactions <- lm(mpg ~ cylinders + displacement + horsepower + weight + 
                                        acceleration + year + origin + cylinders*displacement + 
                                        cylinders*horsepower + cylinders*weight + cylinders*acceleration + 
                                        cylinders*year + cylinders*origin + displacement*horsepower + 
                                        displacement*weight + displacement*acceleration + displacement*year, data = Auto_no_name)
summary(mpg_regression_interactions)
## 
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight + 
##     acceleration + year + origin + cylinders * displacement + 
##     cylinders * horsepower + cylinders * weight + cylinders * 
##     acceleration + cylinders * year + cylinders * origin + displacement * 
##     horsepower + displacement * weight + displacement * acceleration + 
##     displacement * year, data = Auto_no_name)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3915 -1.3790 -0.0618  1.1901 12.6516 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -3.772e+01  2.242e+01  -1.682   0.0934 .  
## cylinders                  4.843e+00  8.399e+00   0.577   0.5646    
## displacement               6.672e-03  1.413e-01   0.047   0.9624    
## horsepower                -1.734e-01  6.898e-02  -2.514   0.0123 *  
## weight                    -7.246e-03  3.129e-03  -2.316   0.0211 *  
## acceleration              -6.182e-01  4.343e-01  -1.423   0.1554    
## year                       1.415e+00  2.497e-01   5.667 2.91e-08 ***
## origin                    -1.253e+00  1.412e+00  -0.888   0.3751    
## cylinders:displacement    -1.572e-03  3.734e-03  -0.421   0.6740    
## cylinders:horsepower       2.249e-02  1.944e-02   1.157   0.2481    
## cylinders:weight          -1.017e-04  8.260e-04  -0.123   0.9021    
## cylinders:acceleration     2.972e-01  1.606e-01   1.851   0.0649 .  
## cylinders:year            -1.495e-01  9.694e-02  -1.542   0.1239    
## cylinders:origin           4.544e-01  3.307e-01   1.374   0.1703    
## displacement:horsepower   -1.124e-04  2.729e-04  -0.412   0.6808    
## displacement:weight        1.540e-05  1.049e-05   1.468   0.1429    
## displacement:acceleration -5.566e-03  2.733e-03  -2.037   0.0424 *  
## displacement:year          6.069e-04  1.636e-03   0.371   0.7108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.827 on 374 degrees of freedom
## Multiple R-squared:  0.8746, Adjusted R-squared:  0.8689 
## F-statistic: 153.4 on 17 and 374 DF,  p-value: < 2.2e-16
mpg_regression_squared <- lm(mpg ~ (cylinders)^2 + (displacement)^2 + (horsepower)^2 + (weight)^2 + 
                                  (acceleration)^2 + (year)^2 + (origin)^2, data = Auto_no_name)
summary(mpg_regression_squared)
## 
## Call:
## lm(formula = mpg ~ (cylinders)^2 + (displacement)^2 + (horsepower)^2 + 
##     (weight)^2 + (acceleration)^2 + (year)^2 + (origin)^2, data = Auto_no_name)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5903 -2.1565 -0.1169  1.8690 13.0604 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -17.218435   4.644294  -3.707  0.00024 ***
## cylinders     -0.493376   0.323282  -1.526  0.12780    
## displacement   0.019896   0.007515   2.647  0.00844 ** 
## horsepower    -0.016951   0.013787  -1.230  0.21963    
## weight        -0.006474   0.000652  -9.929  < 2e-16 ***
## acceleration   0.080576   0.098845   0.815  0.41548    
## year           0.750773   0.050973  14.729  < 2e-16 ***
## origin         1.426141   0.278136   5.127 4.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared:  0.8215, Adjusted R-squared:  0.8182 
## F-statistic: 252.4 on 7 and 384 DF,  p-value: < 2.2e-16
mpg_regression_log <- lm(mpg ~ log(cylinders) + log(displacement) + log(horsepower) + log(weight) + 
                                  log(acceleration) + log(year) + log(origin), data = Auto_no_name)
summary(mpg_regression_log)
## 
## Call:
## lm(formula = mpg ~ log(cylinders) + log(displacement) + log(horsepower) + 
##     log(weight) + log(acceleration) + log(year) + log(origin), 
##     data = Auto_no_name)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5987 -1.8172 -0.0181  1.5906 12.8132 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -66.5643    17.5053  -3.803 0.000167 ***
## log(cylinders)      1.4818     1.6589   0.893 0.372273    
## log(displacement)  -1.0551     1.5385  -0.686 0.493230    
## log(horsepower)    -6.9657     1.5569  -4.474 1.01e-05 ***
## log(weight)       -12.5728     2.2251  -5.650 3.12e-08 ***
## log(acceleration)  -4.9831     1.6078  -3.099 0.002082 ** 
## log(year)          54.9857     3.5555  15.465  < 2e-16 ***
## log(origin)         1.5822     0.5083   3.113 0.001991 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.069 on 384 degrees of freedom
## Multiple R-squared:  0.8482, Adjusted R-squared:  0.8454 
## F-statistic: 306.5 on 7 and 384 DF,  p-value: < 2.2e-16
mpg_regression_sqrt <- lm(mpg ~ sqrt(cylinders) + sqrt(displacement) + sqrt(horsepower) + sqrt(weight) + 
                                  sqrt(acceleration) + sqrt(year) + sqrt(origin), data = Auto_no_name)
summary(mpg_regression_sqrt)
## 
## Call:
## lm(formula = mpg ~ sqrt(cylinders) + sqrt(displacement) + sqrt(horsepower) + 
##     sqrt(weight) + sqrt(acceleration) + sqrt(year) + sqrt(origin), 
##     data = Auto_no_name)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5250 -1.9822 -0.1111  1.7347 13.0681 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -49.79814    9.17832  -5.426 1.02e-07 ***
## sqrt(cylinders)     -0.23699    1.53753  -0.154   0.8776    
## sqrt(displacement)   0.22580    0.22940   0.984   0.3256    
## sqrt(horsepower)    -0.77976    0.30788  -2.533   0.0117 *  
## sqrt(weight)        -0.62172    0.07898  -7.872 3.59e-14 ***
## sqrt(acceleration)  -0.82529    0.83443  -0.989   0.3233    
## sqrt(year)          12.79030    0.85891  14.891  < 2e-16 ***
## sqrt(origin)         3.26036    0.76767   4.247 2.72e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.21 on 384 degrees of freedom
## Multiple R-squared:  0.8338, Adjusted R-squared:  0.8308 
## F-statistic: 275.3 on 7 and 384 DF,  p-value: < 2.2e-16

Question 3

carseats <- ISLR::Carseats
carseats_new <- carseats %>%
                  mutate(US_yes = ifelse(US == "No", 0, 1)) %>%
                  mutate(Urban_yes = ifelse(Urban == "No", 0, 1)) %>%
                  mutate(Urban_no = ifelse(Urban == "Yes", 0, 1)) %>%
                  mutate(US_no = ifelse(US == "Yes", 0, 1)) %>%
                  select(-"Urban", -"US")
sales_regression <- lm(Sales ~ Price + Urban_yes + Urban_no + US_yes + US_no, data = carseats_new)
summary(sales_regression)
## 
## Call:
## lm(formula = Sales ~ Price + Urban_yes + Urban_no + US_yes + 
##     US_no, data = carseats_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients: (2 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## Urban_yes   -0.021916   0.271650  -0.081    0.936    
## Urban_no           NA         NA      NA       NA    
## US_yes       1.200573   0.259042   4.635 4.86e-06 ***
## US_no              NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
sales_regression_smaller <- lm(Sales ~ Price + US_yes, data = carseats_new)
summary(sales_regression_smaller)
## 
## Call:
## lm(formula = Sales ~ Price + US_yes, data = carseats_new)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## US_yes       1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16
#confint(sales_regression, intercept,  level = 0.95)
confint(sales_regression, 'Price',  level = 0.95)
##             2.5 %      97.5 %
## Price -0.06476419 -0.04415351
confint(sales_regression, 'Urban_yes',  level = 0.95)
##                2.5 %    97.5 %
## Urban_yes -0.5559732 0.5121409
confint(sales_regression, 'US_yes',  level = 0.95)
##            2.5 %   97.5 %
## US_yes 0.6913042 1.709841
plot(sales_regression)

Question 4

x <- rnorm(100)
y <- rnorm (100)
sum(x^2)
## [1] 92.97771
sum(y^2)
## [1] 95.77641
lm (y ~ x)
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##    -0.04572      0.07586
lm (x ~ y)
## 
## Call:
## lm(formula = x ~ y)
## 
## Coefficients:
## (Intercept)            y  
##     0.08322      0.07325
x <- rnorm(100)
y <- - sample(x)
sum(x^2)
## [1] 99.52325
sum(y^2)
## [1] 99.52325
lm (y ~ x)
## 
## Call:
## lm(formula = y ~ x)
## 
## Coefficients:
## (Intercept)            x  
##    -0.13059     -0.07878
lm (x ~ y)
## 
## Call:
## lm(formula = x ~ y)
## 
## Coefficients:
## (Intercept)            y  
##     0.13059     -0.07878