STA 6543 Chapter 9 HW Assignment #8

Problem 5

Part A

Generate a data set with n = 500 and p = 2.

set.seed(12345)
x1 <- runif (500) -0.5
x2 <- runif (500) -0.5
y <- 1*(x1^2 - x2^2 > 0)

Part B

Plot the observations.

plot(x1[y == 0], x2[y == 0], col = "green", xlab = "x1", ylab = "x2", pch = "+")
points(x1[y == 1], x2[y == 1], col = "blue", pch = 4)

Part C

**Fit logistic regression model using x1 and x2 as predictors.

log.fit <- glm(y ~ x1 + x2, family = binomial)
summary(log.fit)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.385  -1.207   1.011   1.130   1.328  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.07563    0.09032   0.837    0.402
## x1           0.46494    0.31237   1.488    0.137
## x2           0.41377    0.32373   1.278    0.201
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 692.18  on 499  degrees of freedom
## Residual deviance: 688.04  on 497  degrees of freedom
## AIC: 694.04
## 
## Number of Fisher Scoring iterations: 4

###Part D Apply model to training data and plot observations.

data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
lm.prob <- predict(log.fit, data, type = "response")
lm.pred <- ifelse(lm.prob > 0.5, 1, 0)
data.pos <- data[lm.pred == 1, ]
data.neg <- data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "green", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "blue", pch = 4)

Part E

Fit a logistics model to the data using non-linear functions of x1 and x2 as predictors.

lm.fit <- glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = data, family = binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Part F

Apply the model to the training data and plot observations.

lm.prob <- predict(lm.fit, data, type = "response")
lm.pred <- ifelse(lm.prob > 0.5, 1, 0)
data.pos <- data[lm.pred == 1, ]
data.neg <- data[lm.pred == 0, ]
plot(data.pos$x1, data.pos$x2, col = "green", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg$x1, data.neg$x2, col = "blue", pch = 4)

Part G

Fit a support vector classifier to the data with x1 and x2 as predictors. Plot the observations.

svm.fit <- svm(y ~ x1 + x2, data, kernel = "linear", cost = 0.01)
svm.pred2 <- predict(svm.fit, data)
data.pos1 <- data[svm.pred2 == 1, ]
data.neg1 <- data[svm.pred2 == 0, ]
plot(data.pos1$x1, data.pos1$x2, col = "green", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg1$x1, data.neg1$x2, col = "blue", pch = 4)

Part H

Fit an SVM using non-linear kernel to the data & plot the observations.

svm.fit2 <- svm(as.factor(y) ~ x1 + x2, data, gamma = 1)
svm.pred2 <- predict(svm.fit2, data)
data.pos2 <- data[svm.pred2 == 1, ]
data.neg2 <- data[svm.pred2 == 0, ]
plot(data.pos2$x1, data.pos2$x2, col = "green", xlab = "x1", ylab = "x2", pch = "+")
points(data.neg2$x1, data.neg2$x2, col = "blue", pch = 4)

Problem 7

Use Auto datset.

attach(Auto)

Part A

Create binary variable that takes on a 1 for cars with gas mileage above the median, and a 0 for cars below.

median.auto <- median(Auto$mpg)
median.auto

## [1] 22.75

Auto$med <- ifelse(Auto$mpg > median.auto, 1, 0)
Auto$med <- as.factor(Auto$med)

Part B

Fit a support vector classifier & report the cross-validation errors.

set.seed(1)
svm.fita <- svm(med ~ ., data = Auto, kernel= "linear", cost = 0.1, scale = FALSE)
tune.out <- tune(svm, med~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.01025641 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-02 0.07653846 0.03617137
## 2 1e-01 0.04596154 0.03378238
## 3 1e+00 0.01025641 0.01792836
## 4 5e+00 0.02051282 0.02648194
## 5 1e+01 0.02051282 0.02648194
## 6 1e+02 0.03076923 0.03151981

Interpretation

Cost = 1 which means it is the lowest cross-validation error.

Part C

Repeat with radial and polynomial basis kernels.

set.seed(1)
tune.out.poly <- tune(svm, med ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 1, 5, 10), degree = c(2, 3, 4)))
summary(tune.out.poly)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.5130128 
## 
## - Detailed performance results:
##    cost degree     error dispersion
## 1   0.1      2 0.5511538 0.04366593
## 2   1.0      2 0.5511538 0.04366593
## 3   5.0      2 0.5511538 0.04366593
## 4  10.0      2 0.5130128 0.08963366
## 5   0.1      3 0.5511538 0.04366593
## 6   1.0      3 0.5511538 0.04366593
## 7   5.0      3 0.5511538 0.04366593
## 8  10.0      3 0.5511538 0.04366593
## 9   0.1      4 0.5511538 0.04366593
## 10  1.0      4 0.5511538 0.04366593
## 11  5.0      4 0.5511538 0.04366593
## 12 10.0      4 0.5511538 0.04366593

set.seed(1)
tune.out.rad <- tune(svm, med ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 1, 5, 10), gamma = c(0.01, 0.1, 1, 5, 10, 100)))
summary(tune.out.rad)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10  0.01
## 
## - best performance: 0.02557692 
## 
## - Detailed performance results:
##    cost gamma      error dispersion
## 1   0.1 1e-02 0.08929487 0.04382379
## 2   1.0 1e-02 0.07403846 0.03522110
## 3   5.0 1e-02 0.04852564 0.03303346
## 4  10.0 1e-02 0.02557692 0.02093679
## 5   0.1 1e-01 0.07903846 0.03874545
## 6   1.0 1e-01 0.05371795 0.03525162
## 7   5.0 1e-01 0.02820513 0.03299190
## 8  10.0 1e-01 0.03076923 0.03375798
## 9   0.1 1e+00 0.55115385 0.04366593
## 10  1.0 1e+00 0.06384615 0.04375618
## 11  5.0 1e+00 0.05884615 0.04020934
## 12 10.0 1e+00 0.05884615 0.04020934
## 13  0.1 5e+00 0.55115385 0.04366593
## 14  1.0 5e+00 0.49493590 0.04724924
## 15  5.0 5e+00 0.48217949 0.05470903
## 16 10.0 5e+00 0.48217949 0.05470903
## 17  0.1 1e+01 0.55115385 0.04366593
## 18  1.0 1e+01 0.51794872 0.05063697
## 19  5.0 1e+01 0.51794872 0.04917316
## 20 10.0 1e+01 0.51794872 0.04917316
## 21  0.1 1e+02 0.55115385 0.04366593
## 22  1.0 1e+02 0.55115385 0.04366593
## 23  5.0 1e+02 0.55115385 0.04366593
## 24 10.0 1e+02 0.55115385 0.04366593

Part D

Make plots.

svm.linear <- svm(med ~ ., data = Auto, kernel = "linear", cost = 1)
svm.poly <- svm(med ~ ., data = Auto, kernel = "polynomial", cost = 10, degree = 2)
svm.rad <- svm(med ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)

plotpairs = function(fit) {
  for (name in names(Auto)[!(names(Auto) %in% c("mpg", "med", "name"))]) {
    plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
  }
}

plotpairs(svm.linear)

detach(Auto)

Problem 8

Use OJ dataset.

attach(OJ)

Part A

Create a training set with a random sample of 800 observations.

set.seed(1)
train <- sample(1070, 800)

training <- OJ[train, ]
testing <- OJ[-train, ]

Part B

Fit a support vector classifier to the training data using cost = 0.01, with Purchase as the response and the other variables as predictors.

svm.fita <- svm(Purchase ~ ., kernel = "linear", data = training, cost = 0.01)
summary(svm.fita)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

Part C

Training and testing error rates.

ypreds <- predict(svm.fita, training)
table(predict = ypreds, truth = training$Purchase)

##        truth
## predict  CH  MM
##      CH 420  75
##      MM  65 240

(65 + 75)/(420 + 75 + 65 + 240)

## [1] 0.175

ypreds <- predict(svm.fita, testing)
table(predict = ypreds, truth = testing$Purchase)

##        truth
## predict  CH  MM
##      CH 153  33
##      MM  15  69

(15 + 33)/(153 + 33 + 15 + 69)

## [1] 0.1777778

Training Error Rate: 17.5% Testing Error Rate: 17.78%

Part D

Use tune() to select the optimal cost.

set.seed(1)
tune.out <- tune(svm, Purchase ~ ., data = OJ, kernel = "linear", ranges = list(cost = c(0.001, 0.01, 0.1, 1, 5, 10)))
summary(tune.out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.1626168 
## 
## - Detailed performance results:
##    cost     error dispersion
## 1 1e-03 0.2373832 0.04561497
## 2 1e-02 0.1691589 0.04024604
## 3 1e-01 0.1663551 0.03984617
## 4 1e+00 0.1626168 0.03945456
## 5 5e+00 0.1654206 0.03917066
## 6 1e+01 0.1682243 0.03865942

bestmod <- tune.out$best.model
summary(bestmod)

## 
## Call:
## best.tune(method = svm, train.x = Purchase ~ ., data = OJ, ranges = list(cost = c(0.001, 
##     0.01, 0.1, 1, 5, 10)), kernel = "linear")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  1 
## 
## Number of Support Vectors:  442
## 
##  ( 221 221 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

Part E

Compute training and testing error rates.

ypred.best <- predict(bestmod, training)
table(predict = ypred.best, truth = training$Purchase)

##        truth
## predict  CH  MM
##      CH 424  69
##      MM  61 246

(61 + 69)/(424 + 69 + 61 + 246)

## [1] 0.1625

ypred.best <- predict(bestmod, testing)
table(predict = ypred.best, truth = testing$Purchase)

##        truth
## predict  CH  MM
##      CH 155  29
##      MM  13  73

(13 + 29)/(155 + 29 + 13 + 73)

## [1] 0.1555556

Training Error Rate: 16.25% Testing Error Rate: 15.56%

Part F

Repeat using support vector machine with a radial kernel.

set.seed(1)
svm.rad <- svm(Purchase ~ ., data = training, kernel = "radial")
summary(svm.rad)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  373
## 
##  ( 188 185 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred.rad <- predict(svm.rad, training)
table(predict = pred.rad, truth = training$Purchase)

##        truth
## predict  CH  MM
##      CH 441  77
##      MM  44 238

(44 + 77)/(441 + 77 + 44 + 238)

## [1] 0.15125

pred.rad <- predict(svm.rad, testing)
table(predict = pred.rad, truth = testing$Purchase)

##        truth
## predict  CH  MM
##      CH 151  33
##      MM  17  69

(17 + 33)/(151 + 33 + 17 + 69)

## [1] 0.1851852

Training Error Rate: 15.13% Testing Error Rate: 18.52%

Part G

Repeat using support vector machine with a polynomial kernel (degree = 2).

set.seed(1)
svm.poly <- svm(Purchase ~ ., data = training, kernel = "polynomial", degree = 2)
summary(svm.poly)

## 
## Call:
## svm(formula = Purchase ~ ., data = training, kernel = "polynomial", 
##     degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  447
## 
##  ( 225 222 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred.poly <- predict(svm.poly, training)
table(predict = pred.poly, truth = training$Purchase)

##        truth
## predict  CH  MM
##      CH 449 110
##      MM  36 205

(36 + 110)/(449 + 110 + 36 + 205)

## [1] 0.1825

pred.poly <- predict(svm.poly, testing)
table(predict = pred.poly, truth = testing$Purchase)

##        truth
## predict  CH  MM
##      CH 153  45
##      MM  15  57

(15 + 45)/(153 + 45 + 15 + 57)

## [1] 0.2222222

Training Error Rate: 18.25% Testing Error Rate: 22.22%

Part H

Which approach gives the best results?

Radial seems to be the best option because it has the lowest training and testing errors.

STA 6543 Chapter 9 HW Assignment #8

Lacy Burke

5/6/2022

Problem 5

Part A

Part B

Part C

Part E

Part F

Part G

Part H

Problem 7

Part A

Part B

Part C

Part D

Problem 8

Part A

Part B

Part C

Part D

Part E

Part F

Part G

Part H