auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",header=TRUE,na.strings = "?")
auto=na.omit(auto)
auto<-auto[,-c(8:9)]

names(auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"  
## [5] "weight"       "acceleration" "year"
attach(auto)

#1a

modint1=lm(mpg~cylinders:displacement, data=auto)
summary(modint1)
## 
## Call:
## lm(formula = mpg ~ cylinders:displacement, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.705  -3.426  -0.450   2.704  17.715 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            30.9896203  0.3905111   79.36   <2e-16 ***
## cylinders:displacement -0.0061177  0.0002462  -24.85   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.863 on 390 degrees of freedom
## Multiple R-squared:  0.6128, Adjusted R-squared:  0.6119 
## F-statistic: 617.4 on 1 and 390 DF,  p-value: < 2.2e-16
modint2=lm(mpg~weight*horsepower)
summary(modint2)
## 
## Call:
## lm(formula = mpg ~ weight * horsepower)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.7725  -2.2074  -0.2708   1.9973  14.7314 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.356e+01  2.343e+00  27.127  < 2e-16 ***
## weight            -1.077e-02  7.738e-04 -13.921  < 2e-16 ***
## horsepower        -2.508e-01  2.728e-02  -9.195  < 2e-16 ***
## weight:horsepower  5.355e-05  6.649e-06   8.054 9.93e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.93 on 388 degrees of freedom
## Multiple R-squared:  0.7484, Adjusted R-squared:  0.7465 
## F-statistic: 384.8 on 3 and 388 DF,  p-value: < 2.2e-16

The interactions between cyldiners and displacemnt, and weight and horspower seem to signifcant in their effect on mpg.

#1b

squared=lm(mpg~cylinders+I(cylinders^2))
summary(squared)
## 
## Call:
## lm(formula = mpg ~ cylinders + I(cylinders^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.5191  -2.8303  -0.7641   2.2992  17.7891 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     50.5154     4.6675  10.823  < 2e-16 ***
## cylinders       -6.3833     1.7135  -3.725 0.000224 ***
## I(cylinders^2)   0.2393     0.1446   1.655 0.098766 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.903 on 389 degrees of freedom
## Multiple R-squared:  0.6075, Adjusted R-squared:  0.6054 
## F-statistic:   301 on 2 and 389 DF,  p-value: < 2.2e-16
root=lm(mpg~cylinders+I(sqrt(cylinders)))
summary(root)
## 
## Call:
## lm(formula = mpg ~ cylinders + I(sqrt(cylinders)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3015  -3.0362  -0.6476   2.3615  17.8441 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         62.47659   18.44014   3.388 0.000776 ***
## cylinders           -0.06007    3.29739  -0.018 0.985474    
## I(sqrt(cylinders)) -16.74019   15.76472  -1.062 0.288950    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.913 on 389 degrees of freedom
## Multiple R-squared:  0.6058, Adjusted R-squared:  0.6038 
## F-statistic: 298.9 on 2 and 389 DF,  p-value: < 2.2e-16
log=lm(mpg~cylinders+I(log(cylinders)))
summary(log) 
## 
## Call:
## lm(formula = mpg ~ cylinders + I(log(cylinders)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.1215  -3.1530  -0.6131   2.4018  17.8649 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         48.035      6.412   7.492 4.61e-13 ***
## cylinders           -2.243      1.639  -1.369    0.172    
## I(log(cylinders))   -7.449      9.249  -0.805    0.421    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.916 on 389 degrees of freedom
## Multiple R-squared:  0.6053, Adjusted R-squared:  0.6033 
## F-statistic: 298.3 on 2 and 389 DF,  p-value: < 2.2e-16

None of the transformations apper to improve the model between mg and cylinders as the p value is too high.

#2a

install.packages("ISLR", repos= "https://CRAN.R-project.org/package=ISLR")
## Installing package into 'C:/Users/aleks/OneDrive/Documents/R/win-library/3.6'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=ISLR/src/contrib:
##   cannot open URL 'https://CRAN.R-project.org/package=ISLR/src/contrib/PACKAGES'
## Warning: package 'ISLR' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=ISLR/bin/windows/contrib/3.6:
##   cannot open URL 'https://CRAN.R-project.org/package=ISLR/bin/windows/contrib/3.6/PACKAGES'
library(ISLR)
data(Carseats)
names(Carseats)
##  [1] "Sales"       "CompPrice"   "Income"      "Advertising" "Population" 
##  [6] "Price"       "ShelveLoc"   "Age"         "Education"   "Urban"      
## [11] "US"
attach(Carseats)



mlr=lm(Sales~Price+Urban+US)
summary(mlr)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16

#2b Price and US are significant, Urban is not. Urbn and US are qualitative so their betas are 0 or 1

#2c y=B0+B1x1+B2x2+B3X3 where B1 and B2 can only be 0 or 1 fog Urabn and US

#2d We can reject the nullnhypothesis on price and US

#2e

mlr2=lm(Sales~Price+US)
summary(mlr2)
## 
## Call:
## lm(formula = Sales ~ Price + US)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16

#2f The 2nd model does a slightly better fit, but neither are particulary good as the multiple r^2 is only around .23 for both.

#2g

confint(mlr2)
##                   2.5 %      97.5 %
## (Intercept) 11.79032020 14.27126531
## Price       -0.06475984 -0.04419543
## USYes        0.69151957  1.70776632