#Problem 1
auto<-read.csv("http://faculty.marshall.usc.edu/gareth-james/ISL/Auto.csv",
header=TRUE,
na.strings = "?")
# OMIT NAs
auto=na.omit(auto)
# TAKE OUT COLUMNS FOR ORIGIN AND NAME
auto<-auto[,-c(8:9)]
autodf<-data.frame(mpg=auto$mpg,
cylinders=auto$cylinders,
displacement=auto$displacement,
horsepower=auto$horsepower,
weight=auto$weight,
acceleration=auto$acceleration,
year=auto$year)
attach(auto)
names(auto)
## [1] "mpg" "cylinders" "displacement" "horsepower"
## [5] "weight" "acceleration" "year"
head(auto)
## mpg cylinders displacement horsepower weight acceleration year
## 1 18 8 307 130 3504 12.0 70
## 2 15 8 350 165 3693 11.5 70
## 3 18 8 318 150 3436 11.0 70
## 4 16 8 304 150 3433 12.0 70
## 5 17 8 302 140 3449 10.5 70
## 6 15 8 429 198 4341 10.0 70
pairs(autodf)
mlr_mod<-lm(mpg ~ cylinders+displacement+horsepower+weight+acceleration+year, data=auto)
summary(mlr_mod)
##
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight +
## acceleration + year, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6927 -2.3864 -0.0801 2.0291 14.3607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.454e+01 4.764e+00 -3.051 0.00244 **
## cylinders -3.299e-01 3.321e-01 -0.993 0.32122
## displacement 7.678e-03 7.358e-03 1.044 0.29733
## horsepower -3.914e-04 1.384e-02 -0.028 0.97745
## weight -6.795e-03 6.700e-04 -10.141 < 2e-16 ***
## acceleration 8.527e-02 1.020e-01 0.836 0.40383
## year 7.534e-01 5.262e-02 14.318 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.435 on 385 degrees of freedom
## Multiple R-squared: 0.8093, Adjusted R-squared: 0.8063
## F-statistic: 272.2 on 6 and 385 DF, p-value: < 2.2e-16
autoMod<-lm(mpg~cylinders:displacement + horsepower:weight + acceleration:year)
summary(autoMod)
##
## Call:
## lm(formula = mpg ~ cylinders:displacement + horsepower:weight +
## acceleration:year)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1672 -3.3748 -0.3619 2.6899 17.2209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.980e+01 1.897e+00 15.710 < 2e-16 ***
## cylinders:displacement -2.838e-03 6.915e-04 -4.104 4.94e-05 ***
## horsepower:weight -1.467e-05 3.231e-06 -4.541 7.50e-06 ***
## acceleration:year 1.791e-03 1.313e-03 1.364 0.173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.714 on 388 degrees of freedom
## Multiple R-squared: 0.6381, Adjusted R-squared: 0.6353
## F-statistic: 228 on 3 and 388 DF, p-value: < 2.2e-16
auto_Mod<-lm(mpg~cylinders*displacement, data=auto)
summary(auto_Mod)
##
## Call:
## lm(formula = mpg ~ cylinders * displacement, data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.0432 -2.4308 -0.2263 2.2048 20.9051
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.22040 2.34712 20.545 < 2e-16 ***
## cylinders -2.41838 0.53456 -4.524 8.08e-06 ***
## displacement -0.13436 0.01615 -8.321 1.50e-15 ***
## cylinders:displacement 0.01182 0.00207 5.711 2.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.454 on 388 degrees of freedom
## Multiple R-squared: 0.6769, Adjusted R-squared: 0.6744
## F-statistic: 271 on 3 and 388 DF, p-value: < 2.2e-16
#model with a quadratic term
modwq<-lm(mpg~weight+I(weight^2), data=auto)
summary(modwq)
##
## Call:
## lm(formula = mpg ~ weight + I(weight^2), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.6246 -2.7134 -0.3485 1.8267 16.0866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.226e+01 2.993e+00 20.800 < 2e-16 ***
## weight -1.850e-02 1.972e-03 -9.379 < 2e-16 ***
## I(weight^2) 1.697e-06 3.059e-07 5.545 5.43e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.176 on 389 degrees of freedom
## Multiple R-squared: 0.7151, Adjusted R-squared: 0.7137
## F-statistic: 488.3 on 2 and 389 DF, p-value: < 2.2e-16
modyq<-lm(mpg~year+I(year^2), data=auto)
summary(modyq)
##
## Call:
## lm(formula = mpg ~ year + I(year^2), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2681 -5.0887 -0.8619 4.6922 18.2275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 565.72999 146.93208 3.850 0.000138 ***
## year -15.53584 3.87206 -4.012 7.21e-05 ***
## I(year^2) 0.11028 0.02546 4.331 1.89e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.223 on 389 degrees of freedom
## Multiple R-squared: 0.3675, Adjusted R-squared: 0.3643
## F-statistic: 113 on 2 and 389 DF, p-value: < 2.2e-16
#model with a log term
modwlog<-lm(mpg~log(weight), data=auto)
summary(modwlog)
##
## Call:
## lm(formula = mpg ~ log(weight), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.4315 -2.6752 -0.2888 1.9429 16.0136
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 209.9433 6.0002 34.99 <2e-16 ***
## log(weight) -23.4317 0.7534 -31.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.189 on 390 degrees of freedom
## Multiple R-squared: 0.7127, Adjusted R-squared: 0.7119
## F-statistic: 967.3 on 1 and 390 DF, p-value: < 2.2e-16
modylog<-lm(mpg~log(year), data=auto)
summary(modylog)
##
## Call:
## lm(formula = mpg ~ log(year), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.8860 -5.5076 -0.4014 4.8925 18.2655
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -377.873 28.803 -13.12 <2e-16 ***
## log(year) 92.699 6.653 13.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.386 on 390 degrees of freedom
## Multiple R-squared: 0.3324, Adjusted R-squared: 0.3307
## F-statistic: 194.2 on 1 and 390 DF, p-value: < 2.2e-16
#model with a square term
modws<-lm(mpg~weight+I(weight^(1/2)), data=auto)
summary(modws)
##
## Call:
## lm(formula = mpg ~ weight + I(weight^(1/2)), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5660 -2.6552 -0.4161 1.7373 16.1001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 109.218284 11.573797 9.437 < 2e-16 ***
## weight 0.013191 0.003828 3.446 0.000631 ***
## I(weight^(1/2)) -2.314535 0.424250 -5.456 8.7e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.181 on 389 degrees of freedom
## Multiple R-squared: 0.7145, Adjusted R-squared: 0.713
## F-statistic: 486.7 on 2 and 389 DF, p-value: < 2.2e-16
modys<-lm(mpg~year+I(year^(1/2)), data=auto)
summary(modys)
##
## Call:
## lm(formula = mpg ~ year + I(year^(1/2)), data = auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2719 -5.0997 -0.8405 4.7305 18.2006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2480.086 586.528 4.228 2.94e-05 ***
## year 34.833 7.729 4.507 8.71e-06 ***
## I(year^(1/2)) -585.630 134.688 -4.348 1.76e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.222 on 389 degrees of freedom
## Multiple R-squared: 0.3678, Adjusted R-squared: 0.3645
## F-statistic: 113.1 on 2 and 389 DF, p-value: < 2.2e-16
detach(auto)
#Problem 2
#install.packages('ISLR')
library(ISLR)
data(Carseats)
names(Carseats)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
carseats<-data.frame(Sales=Carseats$Sales,
Price=Carseats$Price,
Urban=Carseats$Urban,
US=Carseats$US)
pairs(carseats)
#multiple regression model to predict Sales
mlr_modCar<-lm(Sales ~ Price+Urban+US, data=carseats)
summary(mlr_modCar)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
#smaller multiple regression model
mlr_modsell<-lm(Sales ~ Price+US, data=carseats)
summary(mlr_modsell)
##
## Call:
## lm(formula = Sales ~ Price + US, data = carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
#confidence intervals for coefficents
confint(mlr_modsell)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## Price -0.06475984 -0.04419543
## USYes 0.69151957 1.70776632