Chap 9 - Applied

Including Plots

You can also embed plots, for example:

Dat <- data.frame(x1,x2, fc)
svmfit <- svm(fc~., data = Dat, kernel = "polynomial", degree = 3,
              cost = 0.1, scale = FALSE)
plot(svmfit, Dat)

We have seen that we can ﬁt an SVM with a non-linear kernel in order to perform classiﬁcation using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.

Generate a data set with n = 500 and p = 2, such that the observations belong to two classes with a quadratic decision boundary between them. For instance, you can do this as follows:

x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- 1*(x1^2-x2^2 > 0) 
yf <- as.factor(y)
Data.2 <- data.frame(x1,x2,y,yf)

Plot the observations, colored according to their class labels. Your plot should display X 1 on the x-axis, and X 2 on the y-axis.

plot(x1,x2,col = yf)

Fit a logistic regression model to the data, using X 1 and X 2 as predictors.

Logit.model <- glm(formula = y~. -yf, data = Data.2, family = 'binomial')

Apply this model to the training data in order to obtain a predicted class label for each training observation. Plot the observations, colored according to the predicted class labels. The decision boundary should be linear.

Particion the Data

Data.3 <- Data.2[, -4]
library(caret)

## Loading required package: lattice

i.Data.3 <- createDataPartition(Data.3$y, p = 0.8, list = FALSE)
Train.Data.3 <- Data.3[i.Data.3,]
Test.Data.3 <- Data.3[-i.Data.3,]

Predict the value with Training Data

Logit.model <- glm(formula = y~. , data = Train.Data.3, family = 'binomial')
pred.logit <- predict(Logit.model, type = 'response')
pred.logit.Tb <- rep(0,400)
pred.logit.Tb[pred.logit > 0.5] <- 1
Tb <- table(Pred = pred.logit.Tb, Truth = Train.Data.3$y)
Tb

##     Truth
## Pred   0   1
##    0  63  18
##    1 124 195

(Tb[1,1] + Tb[2,2])/sum(Tb)

## [1] 0.645

plot(Train.Data.3$x1,Train.Data.3$x2, col = as.factor(pred.logit.Tb))

Now ﬁt a logistic regression model to the data using non-linear functions of X 1 and X 2 as predictors

Logit.model <- glm(formula = y~ I(x1^2) + I(x2^2), data = Train.Data.3, family = 'binomial')

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

pred.logit <- predict(Logit.model, type = 'response')
pred.logit.Tb <- rep(0,400)
pred.logit.Tb[pred.logit > 0.5] <- 1
Tb <- table(Pred = pred.logit.Tb, Truth = Train.Data.3$y)
Tb

##     Truth
## Pred   0   1
##    0 187   0
##    1   0 213

Table

(Tb[1,1] + Tb[2,2])/sum(Tb)

## [1] 1

Plot

plot(Train.Data.3$x1,Train.Data.3$x2, col = as.factor(pred.logit.Tb))

Fit a support vector classiﬁer to the data with X 1 and X 2 as predictors. Obtain a class prediction for each training observation. Plot the observations, colored according to the predicted class labels.

radial

Train.Data.3$y <- as.factor(Train.Data.3$y)
tune.out <- tune(svm, y~., data = Train.Data.3,
                 kernel = "radial", 
                 ranges = list(cost = c(0.1,1,10,100,1000)),
                 gamma = c(0.5,1,2,3,4))
pred.tune.out <- predict(tune.out$best.model,Train.Data.3)
plot(Train.Data.3$x1,Train.Data.3$x2, col = pred.tune.out)

Table

table(Pred = pred.tune.out, Truth = Train.Data.3$y)

##     Truth
## Pred   0   1
##    0 185   0
##    1   2 213

polynomial

tune.out <- tune(svm, y~., data = Train.Data.3,
                 kernel = "polynomial", 
                 ranges = list(cost = c(0.1,1,10,100,1000)),
                 gamma = c(0.5,1,2,3,4))
pred.tune.out <- predict(tune.out$best.model,Train.Data.3)
plot(Train.Data.3$x1,Train.Data.3$x2, col = pred.tune.out)

Table

table(Pred = pred.tune.out, Truth = Train.Data.3$y)

##     Truth
## Pred   0   1
##    0   0   0
##    1 187 213

In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set.

Create a binary variable that takes on a 1 for cars with gas mileage above the median, and a 0 for cars with gas mileage below the median.

auto <- Auto[,-c(9)]
head(auto)

##   mpg cylinders displacement horsepower weight acceleration year origin
## 1  18         8          307        130   3504         12.0   70      1
## 2  15         8          350        165   3693         11.5   70      1
## 3  18         8          318        150   3436         11.0   70      1
## 4  16         8          304        150   3433         12.0   70      1
## 5  17         8          302        140   3449         10.5   70      1
## 6  15         8          429        198   4341         10.0   70      1

auto <- auto %>%
        mutate(mpgbn = ifelse(mpg > median(mpg), 1,0))
auto$mpgbn <- as.factor(auto$mpgbn)
auto <- auto[, -1]

Fit a support vector classiﬁer to the data with various values of cost, in order to predict whether a car gets high or low gas mileage. Report the cross-validation errors associated with different values of this parameter. Comment on your results.

library(caret)
set.seed(30)
i.F.Auto <- createFolds(y = auto$mpgbn, k = 10, list = TRUE)
poly.error.rate <- matrix(NA ,10, 100)
cost <- seq(0.01,10, length.out = 100)
degree <- seq(0.01,10, length.out = 100)
gamma <- seq(0.01,10, length.out = 100)

for(i in 1:10) {
        train.dat <- auto[i.F.Auto[[i]],]
        cv.dat <- auto[-i.F.Auto[[i]],]    
        for(j in 1:100)
        {svm.fit <-svm(mpgbn~., data = train.dat, kernel = "polynomial", degree = degree[j],
              cost = 0.01, scale = TRUE)
        pred <- predict(svm.fit, cv.dat)
        tb <- table(pred, cv.dat$mpgbn)
        poly.error.rate[i,j] <- 1-((tb[1,1]+tb[1,2])/sum(tb))}
}
cv.poly.error <- apply(poly.error.rate, 2, mean)

rd.error.rate <- matrix(NA ,10, 100)

tune.out <- tune(svm, mpgbn~., data = auto,
                 kernel = "radial",
                 range = list(cost = cost),
                 gamma = gamma)

for(i in 1:10) {
        train.dat <- auto[i.F.Auto[[i]],]
        cv.dat <- auto[-i.F.Auto[[i]],]    
        for(j in 1:100)
        {svm.fit <-svm(mpgbn~., data = train.dat, kernel = "radial", gamma = gamma[j],
              cost = 0.6154545, scale = TRUE)
        pred <- predict(svm.fit, cv.dat)
        tb <- table(pred, cv.dat$mpgbn)
        rd.error.rate[i,j] <- 1-((tb[1,1]+tb[1,2])/sum(tb))}
}

rd.cv.Error <- apply(rd.error.rate,2,mean)

round(rd.cv.Error[which.min(rd.cv.Error)],digits = 3); gamma[which.min(rd.cv.Error)]

## [1] 0.524

## [1] 1.523636

table

	Error	Deg/Gam
Radial	0.524	1.523636
Poly	0.625	1.019091

poly.svm.best <- svm(mpgbn~., data = auto, kernel="polynomial", cost = 0.01, degree = 1.019091, scale = TRUE)
rd.svm.best <- svm(mpgbn~., data = auto, kernel="radial", cost = 0.01, gamma = 10, scale = TRUE)
par(mfrow = c(1,2))
plot(rd.svm.best, auto, displacement~weight)

This problem involves the OJ data set which is part of the ISLR package.

Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations.

oj <- OJ
i.Train.o <- createDataPartition(y = oj$Purchase, p = 800/nrow(oj), list = FALSE)   
Train.o <- oj[i.Train.o, ]
Test.o <- oj[-i.Train.o, ]

Fit a support vector classiﬁer to the training data using cost=0.01, with Purchase as the response and the other variables as predictors. Use the summary() function to produce summary statistics, and describe the results obtained.

oj.svm.fit <- svm(Purchase ~. , data = Train.o, cost = 0.01,
                  kernel = 'linear')
summary(oj.svm.fit)

## 
## Call:
## svm(formula = Purchase ~ ., data = Train.o, cost = 0.01, kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
##       gamma:  0.05555556 
## 
## Number of Support Vectors:  448
## 
##  ( 224 224 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

tr.pred <- predict(oj.svm.fit)
ts.pred <- predict(oj.svm.fit, newdata = Test.o)
Tr.table <- table(Predict = tr.pred, Truth = Train.o$Purchase)
Ts.table <- table(Predict = ts.pred, Truth = Test.o$Purchase)

What are the training and test error rates?

(Tr.table[1,2]+Tr.table[2,1])/sum(Tr.table); (Ts.table[1,2]+Ts.table[2,1])/sum(Ts.table)

## [1] 0.1735331

## [1] 0.1375465

Use the tune() function to select an optimal cost. Consider values in the range 0.01 to 10.

cost <- seq(0.01,10,100)
tune.oj.svm.fit <- tune(svm, Purchase~., data = oj,
                        kernel = 'linear',
                        ranges = list(cost = cost))
tune.oj.svm.fit$best.model

## 
## Call:
## best.tune(method = svm, train.x = Purchase ~ ., data = oj, ranges = list(cost = cost), 
##     kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
##       gamma:  0.05555556 
## 
## Number of Support Vectors:  560

The optimal cost is 0.01, so no need to calculate

Repeat parts (b) through (e) using a support vector machine with a radial kernel. Use the default value for gamma.

cost <- seq(0.01,10,100)
tune.oj.rd.svm.fit <- tune(svm, Purchase~., data = Train.o,
                        kernel = 'radial',
                        ranges = list(cost = cost))

tr.rd.pred <- predict(tune.oj.rd.svm.fit$best.model, Train.o)
ts.rd.pred <- predict(tune.oj.rd.svm.fit$best.model, Test.o)
table(pred = tr.rd.pred, truth = Train.o$Purchase)

##     truth
## pred  CH  MM
##   CH 489 312
##   MM   0   0

table(pred = ts.rd.pred, truth = Test.o$Purchase)

##     truth
## pred  CH  MM
##   CH 164 105
##   MM   0   0

radial is not fit.

Repeat parts (b) through (e) using a support vector machine with a polynomial kernel. Set degree=2.

tune.oj.polu.svm.fit <- tune(svm, Purchase~., data = Train.o,
                        kernel = 'polynomial',
                        ranges = list(cost = cost),
                        degree = 2)
tr.rd.pred <- predict(tune.oj.polu.svm.fit$best.model, Train.o)
ts.rd.pred <- predict(tune.oj.polu.svm.fit$best.model, Test.o)
table(pred = tr.rd.pred, truth = Train.o$Purchase)

##     truth
## pred  CH  MM
##   CH 489 312
##   MM   0   0

table(pred = ts.rd.pred, truth = Test.o$Purchase)

##     truth
## pred  CH  MM
##   CH 164 105
##   MM   0   0

Linear approach is rock!

Chap 9 - Applied

Bruce Kim

October 6, 2016

Applied

Including Plots