auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",header=TRUE,na.strings = "?")
auto=na.omit(auto)
auto<-auto[,-c(8:9)]
names(auto)
## [1] "mpg" "cylinders" "displacement" "horsepower"
## [5] "weight" "acceleration" "year"
attach(auto)
#1a
modint1=lm(mpg~cylinders:displacement, data=auto)
summary(modint1)
##
## Call:
## lm(formula = mpg ~ cylinders:displacement, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.705 -3.426 -0.450 2.704 17.715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.9896203 0.3905111 79.36 <2e-16 ***
## cylinders:displacement -0.0061177 0.0002462 -24.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.863 on 390 degrees of freedom
## Multiple R-squared: 0.6128, Adjusted R-squared: 0.6119
## F-statistic: 617.4 on 1 and 390 DF, p-value: < 2.2e-16
modint2=lm(mpg~weight*horsepower)
summary(modint2)
##
## Call:
## lm(formula = mpg ~ weight * horsepower)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.7725 -2.2074 -0.2708 1.9973 14.7314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.356e+01 2.343e+00 27.127 < 2e-16 ***
## weight -1.077e-02 7.738e-04 -13.921 < 2e-16 ***
## horsepower -2.508e-01 2.728e-02 -9.195 < 2e-16 ***
## weight:horsepower 5.355e-05 6.649e-06 8.054 9.93e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared: 0.7484, Adjusted R-squared: 0.7465
## F-statistic: 384.8 on 3 and 388 DF, p-value: < 2.2e-16
The interactions between cyldiners and displacemnt, and weight and horspower seem to signifcant in their effect on mpg.
#1b
squared=lm(mpg~cylinders+I(cylinders^2))
summary(squared)
##
## Call:
## lm(formula = mpg ~ cylinders + I(cylinders^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.5191 -2.8303 -0.7641 2.2992 17.7891
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.5154 4.6675 10.823 < 2e-16 ***
## cylinders -6.3833 1.7135 -3.725 0.000224 ***
## I(cylinders^2) 0.2393 0.1446 1.655 0.098766 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.903 on 389 degrees of freedom
## Multiple R-squared: 0.6075, Adjusted R-squared: 0.6054
## F-statistic: 301 on 2 and 389 DF, p-value: < 2.2e-16
root=lm(mpg~cylinders+I(sqrt(cylinders)))
summary(root)
##
## Call:
## lm(formula = mpg ~ cylinders + I(sqrt(cylinders)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3015 -3.0362 -0.6476 2.3615 17.8441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.47659 18.44014 3.388 0.000776 ***
## cylinders -0.06007 3.29739 -0.018 0.985474
## I(sqrt(cylinders)) -16.74019 15.76472 -1.062 0.288950
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.913 on 389 degrees of freedom
## Multiple R-squared: 0.6058, Adjusted R-squared: 0.6038
## F-statistic: 298.9 on 2 and 389 DF, p-value: < 2.2e-16
log=lm(mpg~cylinders+I(log(cylinders)))
summary(log)
##
## Call:
## lm(formula = mpg ~ cylinders + I(log(cylinders)))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.1215 -3.1530 -0.6131 2.4018 17.8649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.035 6.412 7.492 4.61e-13 ***
## cylinders -2.243 1.639 -1.369 0.172
## I(log(cylinders)) -7.449 9.249 -0.805 0.421
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.916 on 389 degrees of freedom
## Multiple R-squared: 0.6053, Adjusted R-squared: 0.6033
## F-statistic: 298.3 on 2 and 389 DF, p-value: < 2.2e-16
None of the transformations apper to improve the model between mg and cylinders as the p value is too high.
#2a
install.packages("ISLR", repos= "https://CRAN.R-project.org/package=ISLR")
## Installing package into 'C:/Users/aleks/OneDrive/Documents/R/win-library/3.6'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=ISLR/src/contrib:
## cannot open URL 'https://CRAN.R-project.org/package=ISLR/src/contrib/PACKAGES'
## Warning: package 'ISLR' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=ISLR/bin/windows/contrib/3.6:
## cannot open URL 'https://CRAN.R-project.org/package=ISLR/bin/windows/contrib/3.6/PACKAGES'
library(ISLR)
data(Carseats)
names(Carseats)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
attach(Carseats)
mlr=lm(Sales~Price+Urban+US)
summary(mlr)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
#2b Price and US are significant, Urban is not. Urbn and US are qualitative so their betas are 0 or 1
#2c y=B0+B1x1+B2x2+B3X3 where B1 and B2 can only be 0 or 1 fog Urabn and US
#2d We can reject the nullnhypothesis on price and US
#2e
mlr2=lm(Sales~Price+US)
summary(mlr2)
##
## Call:
## lm(formula = Sales ~ Price + US)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
#2f The 2nd model does a slightly better fit, but neither are particulary good as the multiple r^2 is only around .23 for both.
#2g
confint(mlr2)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632