a)

library("MASS")
library('ISLR')
## Warning: package 'ISLR' was built under R version 3.5.2
mpg = Auto$mpg
mpg.med = median(mpg)
mpg01 = mpg - mpg.med > 0
mpg01 = mpg01+0
newAuto = Auto
attach(newAuto)
## The following object is masked _by_ .GlobalEnv:
## 
##     mpg
newAuto$mpg01 = mpg01

b)

correlation

#cor(newAuto[,-9])
sort(abs(cor(newAuto[,-9])[,9]))
## acceleration         year       origin   horsepower displacement 
##    0.3468215    0.4299042    0.5136984    0.6670526    0.7534766 
##       weight    cylinders          mpg        mpg01 
##    0.7577566    0.7591939    0.8369392    1.0000000
cor(newAuto[,-9])[,9]
##          mpg    cylinders displacement   horsepower       weight 
##    0.8369392   -0.7591939   -0.7534766   -0.6670526   -0.7577566 
## acceleration         year       origin        mpg01 
##    0.3468215    0.4299042    0.5136984    1.0000000

The most signficant corrleations with mpg01 (excepting mpg) is horsepower, displacement, weight, cylinders. Each correspond negatively. ##paired plots

pairs(newAuto[,-9])

Plots appear to indicate a relationship between mpg01 and mpg, displacement, horsepower, weight, acceleration. ##boxplots

for (i in 1:8)
{
  boxplot(newAuto[,i]~mpg01, data = newAuto, main = colnames(newAuto)[i])
}

Analysis of boxplots suggests a relationship between cylinders, displacement, horsepower, and weight.

c)

train = sample(1:dim(newAuto)[1], dim(Auto)[1]*.75, rep=FALSE)
test = -train
trainingData = newAuto[train, ]
testingData = newAuto[test, ]
mpg01test = newAuto$mpg01[test]

d)

attach(trainingData)
## The following objects are masked _by_ .GlobalEnv:
## 
##     mpg, mpg01
## The following objects are masked from newAuto:
## 
##     acceleration, cylinders, displacement, horsepower, mpg, name,
##     origin, weight, year
mpg01 = trainingData$mpg01
cylinders = trainingData$cylinders
displacement = trainingData$displacement
horsepower = trainingData$horsepower


lda_model <- lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = trainingData)
lda_model
## Call:
## lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = trainingData)
## 
## Prior probabilities of groups:
##         0         1 
## 0.4897959 0.5102041 
## 
## Group means:
##   cylinders   weight displacement horsepower
## 0  6.729167 3602.451     272.5000  129.47917
## 1  4.220000 2352.140     117.4033   79.34667
## 
## Coefficients of linear discriminants:
##                        LD1
## cylinders    -0.3914875442
## weight       -0.0009897414
## displacement -0.0021990324
## horsepower    0.0031799656
pred.lda = predict(lda_model, testingData)
table(pred.lda$class, mpg01test)
##    mpg01test
##      0  1
##   0 44  0
##   1  8 46

Our test error is:

mean(pred.lda$class != mpg01test)
## [1] 0.08163265

e)

qdaModel = qda(mpg01 ~ displacement + cylinders + horsepower + weight + acceleration, data = trainingData)
qdaModel
## Call:
## qda(mpg01 ~ displacement + cylinders + horsepower + weight + 
##     acceleration, data = trainingData)
## 
## Prior probabilities of groups:
##         0         1 
## 0.4897959 0.5102041 
## 
## Group means:
##   displacement cylinders horsepower   weight acceleration
## 0     272.5000  6.729167  129.47917 3602.451     14.66597
## 1     117.4033  4.220000   79.34667 2352.140     16.44400

f)

mpg.logit = glm(mpg01 ~ displacement + cylinders + horsepower + weight + acceleration, data = trainingData, family = "binomial")
summary(mpg.logit)
## 
## Call:
## glm(formula = mpg01 ~ displacement + cylinders + horsepower + 
##     weight + acceleration, family = "binomial", data = trainingData)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1843  -0.2368   0.1333   0.3756   3.3195  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  12.3082205  3.0274078   4.066 4.79e-05 ***
## displacement -0.0163097  0.0088984  -1.833   0.0668 .  
## cylinders     0.1391592  0.3741415   0.372   0.7099    
## horsepower   -0.0503703  0.0225528  -2.233   0.0255 *  
## weight       -0.0013818  0.0009793  -1.411   0.1582    
## acceleration -0.0881034  0.1410185  -0.625   0.5321    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 407.45  on 293  degrees of freedom
## Residual deviance: 164.49  on 288  degrees of freedom
## AIC: 176.49
## 
## Number of Fisher Scoring iterations: 7
pred = round(predict(mpg.logit, testingData, type = "response"))
table(pred, mpg01test)
##     mpg01test
## pred  0  1
##    0 46  2
##    1  6 44

Testing error:

mean(pred != mpg01test)
## [1] 0.08163265