auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",
header=TRUE,
na.strings = "?")
auto = na.omit(auto)
auto <- auto[,-c(8:9)]
###Cylinders / displacement interaction
mod_mpg1 <- lm(mpg~ cylinders:displacement, data = auto)
summary(mod_mpg1)
##
## Call:
## lm(formula = mpg ~ cylinders:displacement, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.705 -3.426 -0.450 2.704 17.715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.9896203 0.3905111 79.36 <2e-16 ***
## cylinders:displacement -0.0061177 0.0002462 -24.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.863 on 390 degrees of freedom
## Multiple R-squared: 0.6128, Adjusted R-squared: 0.6119
## F-statistic: 617.4 on 1 and 390 DF, p-value: < 2.2e-16
###Cylinders / horsepower full model
mod_mpg2 <- lm(mpg~ cylinders*horsepower, data = auto)
summary(mod_mpg2)
##
## Call:
## lm(formula = mpg ~ cylinders * horsepower, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.5862 -2.1945 -0.5617 1.9541 16.3329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.815097 3.071314 23.708 <2e-16 ***
## cylinders -6.492462 0.510560 -12.716 <2e-16 ***
## horsepower -0.416007 0.034521 -12.051 <2e-16 ***
## cylinders:horsepower 0.047247 0.004732 9.984 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.094 on 388 degrees of freedom
## Multiple R-squared: 0.727, Adjusted R-squared: 0.7249
## F-statistic: 344.4 on 3 and 388 DF, p-value: < 2.2e-16
### weight / acceleration interaction
mod_mpg3 <- lm(mpg~ weight:acceleration, data = auto)
summary(mod_mpg3)
##
## Call:
## lm(formula = mpg ~ weight:acceleration, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.6424 -4.1342 -0.5959 3.8714 23.7401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.053e+01 1.245e+00 32.55 <2e-16 ***
## weight:acceleration -3.772e-04 2.656e-05 -14.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.345 on 390 degrees of freedom
## Multiple R-squared: 0.3407, Adjusted R-squared: 0.3391
## F-statistic: 201.6 on 1 and 390 DF, p-value: < 2.2e-16
mod_mpg4 <- lm(mpg~ horsepower*weight, data = auto)
summary(mod_mpg4)
##
## Call:
## lm(formula = mpg ~ horsepower * weight, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7725 -2.2074 -0.2708 1.9973 14.7314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.356e+01 2.343e+00 27.127 < 2e-16 ***
## horsepower -2.508e-01 2.728e-02 -9.195 < 2e-16 ***
## weight -1.077e-02 7.738e-04 -13.921 < 2e-16 ***
## horsepower:weight 5.355e-05 6.649e-06 8.054 9.93e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared: 0.7484, Adjusted R-squared: 0.7465
## F-statistic: 384.8 on 3 and 388 DF, p-value: < 2.2e-16
####b.
### quadratic transformation
modQ <- lm(mpg~horsepower+I(horsepower^2), data=auto)
summary(modQ)
##
## Call:
## lm(formula = mpg ~ horsepower + I(horsepower^2), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.7135 -2.5943 -0.0859 2.2868 15.8961
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.9000997 1.8004268 31.60 <2e-16 ***
## horsepower -0.4661896 0.0311246 -14.98 <2e-16 ***
## I(horsepower^2) 0.0012305 0.0001221 10.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.374 on 389 degrees of freedom
## Multiple R-squared: 0.6876, Adjusted R-squared: 0.686
## F-statistic: 428 on 2 and 389 DF, p-value: < 2.2e-16
### root transformation
modR <- lm(mpg~horsepower+I(sqrt(horsepower)), data=auto)
summary(modR)
##
## Call:
## lm(formula = mpg ~ horsepower + I(sqrt(horsepower)), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.5479 -2.5677 -0.2663 2.2998 15.5098
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 105.31581 6.64657 15.845 < 2e-16 ***
## horsepower 0.41913 0.05867 7.144 4.49e-12 ***
## I(sqrt(horsepower)) -12.48574 1.26337 -9.883 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.392 on 389 degrees of freedom
## Multiple R-squared: 0.685, Adjusted R-squared: 0.6834
## F-statistic: 423 on 2 and 389 DF, p-value: < 2.2e-16
### log transformation
modL <- lm(mpg~horsepower+I(log(horsepower)), data=auto)
summary(modL)
##
## Call:
## lm(formula = mpg ~ horsepower + I(log(horsepower)), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.5118 -2.5018 -0.2533 2.4446 15.3102
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 156.04057 12.08267 12.914 < 2e-16 ***
## horsepower 0.11846 0.02929 4.044 6.34e-05 ***
## I(log(horsepower)) -31.59815 3.28363 -9.623 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.415 on 389 degrees of freedom
## Multiple R-squared: 0.6817, Adjusted R-squared: 0.6801
## F-statistic: 416.6 on 2 and 389 DF, p-value: < 2.2e-16
Carseats <- Carseats
carMod2 <- lm(Sales~Price+Urban+US, data = Carseats)
summary(carMod2)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
ggplot(Carseats, aes(x=Price, y=Sales, color=Urban:US))+
geom_point()+
geom_abline(intercept = carMod2$coefficients[1], slope=carMod2$coefficients[2],
color="red", lwd=1)+
geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[2], slope=carMod2$coefficients[3],
color="forestgreen", lwd=1)+
geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[3], slope=carMod2$coefficients[2],
color="blue", lwd=1)
#### b. #### The intercept coefficient for the model gives the base for adjustments based on the interactions of the given variables. The coefficient of Price and Urban both provide negative transformations to the model while the variable US provides a positive interactions to the model. Since this is an a prediction model, the qualitative variables provide ‘stepwise’ influences on the model that are added to the intercept coeffient. #### c. #### d.
anova(carMod2)
## Analysis of Variance Table
##
## Response: Sales
## Df Sum Sq Mean Sq F value Pr(>F)
## Price 1 630.03 630.03 103.0603 < 2.2e-16 ***
## Urban 1 0.10 0.10 0.0158 0.9001
## US 1 131.31 131.31 21.4802 4.86e-06 ***
## Residuals 396 2420.83 6.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Based on the model, the predictor of No:No can be used to reject the null beccause it is not that significant to the model #### e.
ggplot(Carseats, aes(x=Price, y=Sales, color=US))+
geom_point()+
geom_abline(intercept = carMod2$coefficients[1], slope=carMod2$coefficients[2],
color="red", lwd=1)+
geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[2], slope=carMod2$coefficients[3],
color="forestgreen", lwd=1)+
geom_abline(intercept = carMod2$coefficients[1]+carMod2$coefficients[3], slope=carMod2$coefficients[2],
color="blue", lwd=1)
#### f. Both models fir the data relatively well, but there is alot of noise around the line of best fit that shows some interferance and interaction of the variables. #### g.
confint(carMod2)
## 2.5 % 97.5 %
## (Intercept) 11.76359670 14.32334118
## Price -0.06476419 -0.04415351
## UrbanYes -0.55597316 0.51214085
## USYes 0.69130419 1.70984121