#Problem 1

auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",
               header=TRUE,
               na.strings = "?")
# OMIT NAs
auto=na.omit(auto)

# TAKE OUT COLUMNS FOR ORIGIN AND NAME
auto<-auto[,-c(8:9)]

autodf<-data.frame(mpg=auto$mpg, 
                   cylinders=auto$cylinders,
                   displacement=auto$displacement,
                   horsepower=auto$horsepower,
                   weight=auto$weight,
                   acceleration=auto$acceleration,
                   year=auto$year)
attach(auto)
names(auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"  
## [5] "weight"       "acceleration" "year"
head(auto)
##   mpg cylinders displacement horsepower weight acceleration year
## 1  18         8          307        130   3504         12.0   70
## 2  15         8          350        165   3693         11.5   70
## 3  18         8          318        150   3436         11.0   70
## 4  16         8          304        150   3433         12.0   70
## 5  17         8          302        140   3449         10.5   70
## 6  15         8          429        198   4341         10.0   70
pairs(autodf)

mlr_mod<-lm(mpg ~ cylinders+displacement+horsepower+weight+acceleration+year, data=auto)
summary(mlr_mod)
## 
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight + 
##     acceleration + year, data = auto)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6927 -2.3864 -0.0801  2.0291 14.3607 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.454e+01  4.764e+00  -3.051  0.00244 ** 
## cylinders    -3.299e-01  3.321e-01  -0.993  0.32122    
## displacement  7.678e-03  7.358e-03   1.044  0.29733    
## horsepower   -3.914e-04  1.384e-02  -0.028  0.97745    
## weight       -6.795e-03  6.700e-04 -10.141  < 2e-16 ***
## acceleration  8.527e-02  1.020e-01   0.836  0.40383    
## year          7.534e-01  5.262e-02  14.318  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.435 on 385 degrees of freedom
## Multiple R-squared:  0.8093, Adjusted R-squared:  0.8063 
## F-statistic: 272.2 on 6 and 385 DF,  p-value: < 2.2e-16
autoMod<-lm(mpg~cylinders:displacement + horsepower:weight + acceleration:year)
summary(autoMod)
## 
## Call:
## lm(formula = mpg ~ cylinders:displacement + horsepower:weight + 
##     acceleration:year)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1672  -3.3748  -0.3619   2.6899  17.2209 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             2.980e+01  1.897e+00  15.710  < 2e-16 ***
## cylinders:displacement -2.838e-03  6.915e-04  -4.104 4.94e-05 ***
## horsepower:weight      -1.467e-05  3.231e-06  -4.541 7.50e-06 ***
## acceleration:year       1.791e-03  1.313e-03   1.364    0.173    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.714 on 388 degrees of freedom
## Multiple R-squared:  0.6381, Adjusted R-squared:  0.6353 
## F-statistic:   228 on 3 and 388 DF,  p-value: < 2.2e-16
auto_Mod<-lm(mpg~cylinders*displacement, data=auto)
summary(auto_Mod)
## 
## Call:
## lm(formula = mpg ~ cylinders * displacement, data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -16.0432  -2.4308  -0.2263   2.2048  20.9051 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            48.22040    2.34712  20.545  < 2e-16 ***
## cylinders              -2.41838    0.53456  -4.524 8.08e-06 ***
## displacement           -0.13436    0.01615  -8.321 1.50e-15 ***
## cylinders:displacement  0.01182    0.00207   5.711 2.24e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.454 on 388 degrees of freedom
## Multiple R-squared:  0.6769, Adjusted R-squared:  0.6744 
## F-statistic:   271 on 3 and 388 DF,  p-value: < 2.2e-16
#model with a quadratic term
modwq<-lm(mpg~weight+I(weight^2), data=auto)
summary(modwq)
## 
## Call:
## lm(formula = mpg ~ weight + I(weight^2), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.6246  -2.7134  -0.3485   1.8267  16.0866 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.226e+01  2.993e+00  20.800  < 2e-16 ***
## weight      -1.850e-02  1.972e-03  -9.379  < 2e-16 ***
## I(weight^2)  1.697e-06  3.059e-07   5.545 5.43e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.176 on 389 degrees of freedom
## Multiple R-squared:  0.7151, Adjusted R-squared:  0.7137 
## F-statistic: 488.3 on 2 and 389 DF,  p-value: < 2.2e-16
modyq<-lm(mpg~year+I(year^2), data=auto)
summary(modyq)
## 
## Call:
## lm(formula = mpg ~ year + I(year^2), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2681  -5.0887  -0.8619   4.6922  18.2275 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 565.72999  146.93208   3.850 0.000138 ***
## year        -15.53584    3.87206  -4.012 7.21e-05 ***
## I(year^2)     0.11028    0.02546   4.331 1.89e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.223 on 389 degrees of freedom
## Multiple R-squared:  0.3675, Adjusted R-squared:  0.3643 
## F-statistic:   113 on 2 and 389 DF,  p-value: < 2.2e-16
#model with a log term 
modwlog<-lm(mpg~log(weight), data=auto)
summary(modwlog)
## 
## Call:
## lm(formula = mpg ~ log(weight), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.4315  -2.6752  -0.2888   1.9429  16.0136 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 209.9433     6.0002   34.99   <2e-16 ***
## log(weight) -23.4317     0.7534  -31.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.189 on 390 degrees of freedom
## Multiple R-squared:  0.7127, Adjusted R-squared:  0.7119 
## F-statistic: 967.3 on 1 and 390 DF,  p-value: < 2.2e-16
modylog<-lm(mpg~log(year), data=auto)
summary(modylog)
## 
## Call:
## lm(formula = mpg ~ log(year), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.8860  -5.5076  -0.4014   4.8925  18.2655 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -377.873     28.803  -13.12   <2e-16 ***
## log(year)     92.699      6.653   13.93   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.386 on 390 degrees of freedom
## Multiple R-squared:  0.3324, Adjusted R-squared:  0.3307 
## F-statistic: 194.2 on 1 and 390 DF,  p-value: < 2.2e-16
#model with a square term 
modws<-lm(mpg~weight+I(weight^(1/2)), data=auto)
summary(modws)
## 
## Call:
## lm(formula = mpg ~ weight + I(weight^(1/2)), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.5660  -2.6552  -0.4161   1.7373  16.1001 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     109.218284  11.573797   9.437  < 2e-16 ***
## weight            0.013191   0.003828   3.446 0.000631 ***
## I(weight^(1/2))  -2.314535   0.424250  -5.456  8.7e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.181 on 389 degrees of freedom
## Multiple R-squared:  0.7145, Adjusted R-squared:  0.713 
## F-statistic: 486.7 on 2 and 389 DF,  p-value: < 2.2e-16
modys<-lm(mpg~year+I(year^(1/2)), data=auto)
summary(modys)
## 
## Call:
## lm(formula = mpg ~ year + I(year^(1/2)), data = auto)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2719  -5.0997  -0.8405   4.7305  18.2006 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2480.086    586.528   4.228 2.94e-05 ***
## year            34.833      7.729   4.507 8.71e-06 ***
## I(year^(1/2)) -585.630    134.688  -4.348 1.76e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.222 on 389 degrees of freedom
## Multiple R-squared:  0.3678, Adjusted R-squared:  0.3645 
## F-statistic: 113.1 on 2 and 389 DF,  p-value: < 2.2e-16
detach(auto)

#Problem 2

#install.packages('ISLR')
library(ISLR)
data(Carseats)
names(Carseats)
##  [1] "Sales"       "CompPrice"   "Income"      "Advertising" "Population" 
##  [6] "Price"       "ShelveLoc"   "Age"         "Education"   "Urban"      
## [11] "US"
carseats<-data.frame(Sales=Carseats$Sales, 
                   Price=Carseats$Price,
                   Urban=Carseats$Urban,
                   US=Carseats$US)

pairs(carseats)

#multiple regression model to predict Sales
mlr_modCar<-lm(Sales ~ Price+Urban+US, data=carseats)
summary(mlr_modCar)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
#smaller multiple regression model 
mlr_modsell<-lm(Sales ~ Price+US, data=carseats)
summary(mlr_modsell)
## 
## Call:
## lm(formula = Sales ~ Price + US, data = carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16
#confidence intervals for coefficents
confint(mlr_modsell)
##                   2.5 %      97.5 %
## (Intercept) 11.79032020 14.27126531
## Price       -0.06475984 -0.04419543
## USYes        0.69151957  1.70776632