Problem Set 5

Problem 1

a.

auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",
header=TRUE,
na.strings = "?")

auto = na.omit(auto)
auto <- auto[,-c(8:9)]

###Cylinders / displacement interaction
mod_mpg1 <- lm(mpg~ cylinders:displacement, data = auto)
summary(mod_mpg1)

## 
## Call:
## lm(formula = mpg ~ cylinders:displacement, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.705  -3.426  -0.450   2.704  17.715 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            30.9896203  0.3905111   79.36   <2e-16 ***
## cylinders:displacement -0.0061177  0.0002462  -24.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.863 on 390 degrees of freedom
## Multiple R-squared:  0.6128, Adjusted R-squared:  0.6119 
## F-statistic: 617.4 on 1 and 390 DF,  p-value: < 2.2e-16

###Cylinders / horsepower full model
mod_mpg2 <- lm(mpg~ cylinders*horsepower, data = auto)
summary(mod_mpg2)

## 
## Call:
## lm(formula = mpg ~ cylinders * horsepower, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.5862  -2.1945  -0.5617   1.9541  16.3329 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          72.815097   3.071314  23.708   <2e-16 ***
## cylinders            -6.492462   0.510560 -12.716   <2e-16 ***
## horsepower           -0.416007   0.034521 -12.051   <2e-16 ***
## cylinders:horsepower  0.047247   0.004732   9.984   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.094 on 388 degrees of freedom
## Multiple R-squared:  0.727,  Adjusted R-squared:  0.7249 
## F-statistic: 344.4 on 3 and 388 DF,  p-value: < 2.2e-16

### weight / acceleration interaction
mod_mpg3 <- lm(mpg~ weight:acceleration, data = auto)
summary(mod_mpg3)

## 
## Call:
## lm(formula = mpg ~ weight:acceleration, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.6424  -4.1342  -0.5959   3.8714  23.7401 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.053e+01  1.245e+00   32.55   <2e-16 ***
## weight:acceleration -3.772e-04  2.656e-05  -14.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.345 on 390 degrees of freedom
## Multiple R-squared:  0.3407, Adjusted R-squared:  0.3391 
## F-statistic: 201.6 on 1 and 390 DF,  p-value: < 2.2e-16

mod_mpg4 <- lm(mpg~ horsepower*weight, data = auto)
summary(mod_mpg4)

## 
## Call:
## lm(formula = mpg ~ horsepower * weight, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7725  -2.2074  -0.2708   1.9973  14.7314 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.356e+01  2.343e+00  27.127  < 2e-16 ***
## horsepower        -2.508e-01  2.728e-02  -9.195  < 2e-16 ***
## weight            -1.077e-02  7.738e-04 -13.921  < 2e-16 ***
## horsepower:weight  5.355e-05  6.649e-06   8.054 9.93e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared:  0.7484, Adjusted R-squared:  0.7465 
## F-statistic: 384.8 on 3 and 388 DF,  p-value: < 2.2e-16

####b.

### quadratic transformation
modQ <- lm(mpg~horsepower+I(horsepower^2), data=auto)
summary(modQ)

## 
## Call:
## lm(formula = mpg ~ horsepower + I(horsepower^2), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.7135  -2.5943  -0.0859   2.2868  15.8961 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     56.9000997  1.8004268   31.60   <2e-16 ***
## horsepower      -0.4661896  0.0311246  -14.98   <2e-16 ***
## I(horsepower^2)  0.0012305  0.0001221   10.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared:  0.6876, Adjusted R-squared:  0.686 
## F-statistic:   428 on 2 and 389 DF,  p-value: < 2.2e-16

### root transformation
modR <- lm(mpg~horsepower+I(sqrt(horsepower)), data=auto)
summary(modR)

## 
## Call:
## lm(formula = mpg ~ horsepower + I(sqrt(horsepower)), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.5479  -2.5677  -0.2663   2.2998  15.5098 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         105.31581    6.64657  15.845  < 2e-16 ***
## horsepower            0.41913    0.05867   7.144 4.49e-12 ***
## I(sqrt(horsepower)) -12.48574    1.26337  -9.883  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.392 on 389 degrees of freedom
## Multiple R-squared:  0.685,  Adjusted R-squared:  0.6834 
## F-statistic:   423 on 2 and 389 DF,  p-value: < 2.2e-16

### log transformation
modL <- lm(mpg~horsepower+I(log(horsepower)), data=auto)
summary(modL)

## 
## Call:
## lm(formula = mpg ~ horsepower + I(log(horsepower)), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.5118  -2.5018  -0.2533   2.4446  15.3102 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        156.04057   12.08267  12.914  < 2e-16 ***
## horsepower           0.11846    0.02929   4.044 6.34e-05 ***
## I(log(horsepower)) -31.59815    3.28363  -9.623  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.415 on 389 degrees of freedom
## Multiple R-squared:  0.6817, Adjusted R-squared:  0.6801 
## F-statistic: 416.6 on 2 and 389 DF,  p-value: < 2.2e-16

Problem 2

a.

Carseats <- Carseats

carMod2 <- lm(Sales~Price+Urban+US, data = Carseats)
summary(carMod2)

## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16

ggplot(Carseats, aes(x=Price, y=Sales, color=Urban:US))+
  geom_point()+
  geom_abline(intercept = carMod2$coefficients[1], slope=carMod2$coefficients[2],
              color="red", lwd=1)+
  geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[2], slope=carMod2$coefficients[3],
              color="forestgreen", lwd=1)+
  geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[3], slope=carMod2$coefficients[2],
              color="blue", lwd=1)

#### b. #### The intercept coefficient for the model gives the base for adjustments based on the interactions of the given variables. The coefficient of Price and Urban both provide negative transformations to the model while the variable US provides a positive interactions to the model. Since this is an a prediction model, the qualitative variables provide ‘stepwise’ influences on the model that are added to the intercept coeffient. #### c. #### d.

anova(carMod2)

## Analysis of Variance Table
## 
## Response: Sales
##            Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Price       1  630.03  630.03 103.0603 < 2.2e-16 ***
## Urban       1    0.10    0.10   0.0158    0.9001    
## US          1  131.31  131.31  21.4802  4.86e-06 ***
## Residuals 396 2420.83    6.11                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Based on the model, the predictor of No:No can be used to reject the null beccause it is not that significant to the model #### e.

ggplot(Carseats, aes(x=Price, y=Sales, color=US))+
  geom_point()+
  geom_abline(intercept = carMod2$coefficients[1], slope=carMod2$coefficients[2],
              color="red", lwd=1)+
  geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[2], slope=carMod2$coefficients[3],
              color="forestgreen", lwd=1)+
  geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[3], slope=carMod2$coefficients[2],
              color="blue", lwd=1)

#### f. Both models fir the data relatively well, but there is alot of noise around the line of best fit that shows some interferance and interaction of the variables. #### g.

confint(carMod2)

##                   2.5 %      97.5 %
## (Intercept) 11.76359670 14.32334118
## Price       -0.06476419 -0.04415351
## UrbanYes    -0.55597316  0.51214085
## USYes        0.69130419  1.70984121

Problem Set 5

Noah Snizik

10/12/2019

Problem 1

a.

Problem 2

a.