library("MASS")
library('ISLR')
## Warning: package 'ISLR' was built under R version 3.5.2
mpg = Auto$mpg
mpg.med = median(mpg)
mpg01 = mpg - mpg.med > 0
mpg01 = mpg01+0
newAuto = Auto
attach(newAuto)
## The following object is masked _by_ .GlobalEnv:
##
## mpg
newAuto$mpg01 = mpg01
#cor(newAuto[,-9])
sort(abs(cor(newAuto[,-9])[,9]))
## acceleration year origin horsepower displacement
## 0.3468215 0.4299042 0.5136984 0.6670526 0.7534766
## weight cylinders mpg mpg01
## 0.7577566 0.7591939 0.8369392 1.0000000
cor(newAuto[,-9])[,9]
## mpg cylinders displacement horsepower weight
## 0.8369392 -0.7591939 -0.7534766 -0.6670526 -0.7577566
## acceleration year origin mpg01
## 0.3468215 0.4299042 0.5136984 1.0000000
The most signficant corrleations with mpg01 (excepting mpg) is horsepower, displacement, weight, cylinders. Each correspond negatively. ##paired plots
pairs(newAuto[,-9])
Plots appear to indicate a relationship between mpg01 and mpg, displacement, horsepower, weight, acceleration. ##boxplots
for (i in 1:8)
{
boxplot(newAuto[,i]~mpg01, data = newAuto, main = colnames(newAuto)[i])
}
Analysis of boxplots suggests a relationship between cylinders, displacement, horsepower, and weight.
train = sample(1:dim(newAuto)[1], dim(Auto)[1]*.75, rep=FALSE)
test = -train
trainingData = newAuto[train, ]
testingData = newAuto[test, ]
mpg01test = newAuto$mpg01[test]
attach(trainingData)
## The following objects are masked _by_ .GlobalEnv:
##
## mpg, mpg01
## The following objects are masked from newAuto:
##
## acceleration, cylinders, displacement, horsepower, mpg, name,
## origin, weight, year
mpg01 = trainingData$mpg01
cylinders = trainingData$cylinders
displacement = trainingData$displacement
horsepower = trainingData$horsepower
lda_model <- lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = trainingData)
lda_model
## Call:
## lda(mpg01 ~ cylinders + weight + displacement + horsepower, data = trainingData)
##
## Prior probabilities of groups:
## 0 1
## 0.4897959 0.5102041
##
## Group means:
## cylinders weight displacement horsepower
## 0 6.729167 3602.451 272.5000 129.47917
## 1 4.220000 2352.140 117.4033 79.34667
##
## Coefficients of linear discriminants:
## LD1
## cylinders -0.3914875442
## weight -0.0009897414
## displacement -0.0021990324
## horsepower 0.0031799656
pred.lda = predict(lda_model, testingData)
table(pred.lda$class, mpg01test)
## mpg01test
## 0 1
## 0 44 0
## 1 8 46
Our test error is:
mean(pred.lda$class != mpg01test)
## [1] 0.08163265
qdaModel = qda(mpg01 ~ displacement + cylinders + horsepower + weight + acceleration, data = trainingData)
qdaModel
## Call:
## qda(mpg01 ~ displacement + cylinders + horsepower + weight +
## acceleration, data = trainingData)
##
## Prior probabilities of groups:
## 0 1
## 0.4897959 0.5102041
##
## Group means:
## displacement cylinders horsepower weight acceleration
## 0 272.5000 6.729167 129.47917 3602.451 14.66597
## 1 117.4033 4.220000 79.34667 2352.140 16.44400
mpg.logit = glm(mpg01 ~ displacement + cylinders + horsepower + weight + acceleration, data = trainingData, family = "binomial")
summary(mpg.logit)
##
## Call:
## glm(formula = mpg01 ~ displacement + cylinders + horsepower +
## weight + acceleration, family = "binomial", data = trainingData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1843 -0.2368 0.1333 0.3756 3.3195
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.3082205 3.0274078 4.066 4.79e-05 ***
## displacement -0.0163097 0.0088984 -1.833 0.0668 .
## cylinders 0.1391592 0.3741415 0.372 0.7099
## horsepower -0.0503703 0.0225528 -2.233 0.0255 *
## weight -0.0013818 0.0009793 -1.411 0.1582
## acceleration -0.0881034 0.1410185 -0.625 0.5321
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 407.45 on 293 degrees of freedom
## Residual deviance: 164.49 on 288 degrees of freedom
## AIC: 176.49
##
## Number of Fisher Scoring iterations: 7
pred = round(predict(mpg.logit, testingData, type = "response"))
table(pred, mpg01test)
## mpg01test
## pred 0 1
## 0 46 2
## 1 6 44
Testing error:
mean(pred != mpg01test)
## [1] 0.08163265