hw 8 dm

Chapter 9: 5, 7, 8

5. a)

x1=runif (500) -0.5
x2=runif (500) -0.5
y=1*(x1^2-x2^2 > 0)

plot(x1,x2, col= (6 -y))

c<- glm(y ~ x1 + x2, family = "binomial")
summary(c)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = "binomial")
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.246  -1.162  -1.097   1.172   1.282  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.03206    0.08968  -0.358    0.721
## x1           0.35145    0.31683   1.109    0.267
## x2           0.09408    0.31036   0.303    0.762
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 693.02  on 499  degrees of freedom
## Residual deviance: 691.70  on 497  degrees of freedom
## AIC: 697.7
## 
## Number of Fisher Scoring iterations: 3

data <- data.frame(x1 = x1, x2 = x2, y = y)
probs <- predict(c, data, type = "response")
preds <- rep(0, 500)
preds[probs > 0.47] <- 1
plot(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1), pch = (3 - 1), xlab = "X1", ylab = "X2")
points(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0), pch = (3 - 0))

e <- glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(e)

## 
## Call:
## glm(formula = y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), family = "binomial")
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -1.213e-03  -2.000e-08  -2.000e-08   2.000e-08   1.291e-03  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept)     -39.69   32913.43  -0.001    0.999
## poly(x1, 2)1    481.05  116802.77   0.004    0.997
## poly(x1, 2)2  32460.15  989545.80   0.033    0.974
## poly(x2, 2)1  -1434.51  123305.81  -0.012    0.991
## poly(x2, 2)2 -31954.11  929756.98  -0.034    0.973
## I(x1 * x2)     -262.78  396724.37  -0.001    0.999
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6.9302e+02  on 499  degrees of freedom
## Residual deviance: 4.8228e-06  on 494  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 25

probs <- predict(e, data, type = "response")
preds <- rep(0, 500)
preds[probs > 0.47] <- 1
plot(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (7 - 1), xlab = "X1", ylab = "X2")
points(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0))

library(e1071)
data$y <- as.factor(data$y)
g <- svm(y ~ x1 + x2, data, kernel = "linear", cost = 0.01)
preds <- predict(g, data)
plot(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (4 - 0), xlab = "X1", ylab = "X2")
points(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1))

data$y <- as.factor(data$y)
h <- svm(y ~ x1 + x2, data, kernel = "radial", gamma = 1)
preds <- predict(h, data)
plot(data[preds == 0, ]$x1, data[preds == 0, ]$x2, col = (6 - 0), xlab = "X1", ylab = "X2")
points(data[preds == 1, ]$x1, data[preds == 1, ]$x2, col = (4 - 1))

i) THe svm with the non-linear kernel seems to be better at classifying the data.

library(ISLR)
a <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpglevel <- as.factor(a)

set.seed(2021)
b <- tune(svm, mpglevel ~ ., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(b)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.01262821 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1  0.01 0.07384615 0.05674660
## 2  0.10 0.05333333 0.05252135
## 3  1.00 0.01262821 0.01778017
## 4  5.00 0.01775641 0.02086164
## 5 10.00 0.02025641 0.02319375

The lowest error is 0.02 for the radial kernel.

c <- tune(svm, mpglevel ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), gamma = c(0.01, 0.1, 1, 5, 10))
summary(c)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.02551282 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1  0.01 0.57660256 0.05049905
## 2  0.10 0.08935897 0.04404841
## 3  1.00 0.07141026 0.03359556
## 4  5.00 0.05096154 0.03591683
## 5 10.00 0.02551282 0.02402645

d<- tune(svm, mpglevel ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.01, 0.1, 1, 5, 10)), gamma = c(0.01, 0.1, 1, 5, 10))
summary(d)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.2730128 
## 
## - Detailed performance results:
##    cost     error dispersion
## 1  0.01 0.5332051 0.02282387
## 2  0.10 0.5332051 0.02282387
## 3  1.00 0.5126923 0.04311879
## 4  5.00 0.2908974 0.04599609
## 5 10.00 0.2730128 0.05012627

d_svm <- svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 100, gamma = 0.01)
plot(d_svm, Auto, mpg~cylinders)

plot(d_svm, Auto, mpg~year)

plot(d_svm, Auto, weight~displacement)

set.seed(2021)
index <- sample(nrow(OJ), 0.7*nrow(OJ))
train <- OJ[index, ]
test <- OJ[-index, ]

g<- svm(Purchase~., train, kernel = "linear", cost = 0.01)
summary(g)

## 
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  407
## 
##  ( 204 203 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

The training accuracy is 84% and the test accuracy is ~81%

pred<- predict(g, train)
table(train$Purchase, pred)

##     pred
##       CH  MM
##   CH 397  53
##   MM  67 232

(416 + 214)/(416+214+52+67)

## [1] 0.8411215

pred<- predict(g, test)
table(test$Purchase, pred)

##     pred
##       CH  MM
##   CH 171  32
##   MM  28  90

(162+97)/(162+97+23+39)

## [1] 0.8068536

d_out <- tune(svm, Purchase ~ ., data = train, kernel = "linear", ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)

## 
## Error estimation of 'svm' using 10-fold cross validation: 0.178991

e) The training accuracy is 84% (error = ~16%) and the test accuracy is ~81%. The error/accuracy rates are about the same when using the best cost parameter.

e <- svm(Purchase ~ ., kernel = "linear", data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)

##     pred_train
##       CH  MM
##   CH 397  53
##   MM  67 232

(416+214)/(416+214+52+67)

## [1] 0.8411215

pred_test <- predict(e, test)
table(test$Purchase, pred_test)

##     pred_test
##       CH  MM
##   CH 171  32
##   MM  28  90

(162+97)/(162+97+23+39)

## [1] 0.8068536

f) With a radial kernel and cost = 0.01, all of the MM are misclassified in both the train and test sets. The same goes for the tuned cost parameter svm.

set.seed(2021)
g<- svm(Purchase~., train, kernel = "radial", cost = 0.01)
summary(g)

## 
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "radial", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  0.01 
## 
## Number of Support Vectors:  602
## 
##  ( 303 299 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred<- predict(g, train)
table(train$Purchase, pred)

##     pred
##       CH  MM
##   CH 450   0
##   MM 299   0

pred<- predict(g, test)
table(test$Purchase, pred)

##     pred
##       CH  MM
##   CH 203   0
##   MM 118   0

d_out <- tune(svm, Purchase ~ ., data = train, kernel = "radial", ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)

## 
## Error estimation of 'svm' using 10-fold cross validation: 0.3991892

e <- svm(Purchase ~ ., kernel = "radial", data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)

##     pred_train
##       CH  MM
##   CH 450   0
##   MM 299   0

pred_test <- predict(e, test)
table(test$Purchase, pred_test)

##     pred_test
##       CH  MM
##   CH 203   0
##   MM 118   0

g) Accuracy improves with the polynomial kernel, achieving between a 59% and a 63% accuracy on the train and test sets.

set.seed(2021)
g<- svm(Purchase~., train, kernel = "polynomial", degree=2, cost = 0.01)
summary(g)

## 
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "polynomial", 
##     degree = 2, cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  0.01 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  603
## 
##  ( 304 299 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred<- predict(g, train)
table(train$Purchase, pred)

##     pred
##       CH  MM
##   CH 450   0
##   MM 291   8

# accuracy = 63%
(467+5)/(467+5+276+1)

## [1] 0.6301736

pred<- predict(g, test)
table(test$Purchase, pred)

##     pred
##       CH  MM
##   CH 202   1
##   MM 117   1

# accuracy = 59%
(185+6)/(185+6+130)

## [1] 0.5950156

d_out <- tune(svm, Purchase ~ ., data = train, kernel = "polynomial", degree= 2, ranges = list(cost = 0.01,0.1,1,10))
summary(d_out)

## 
## Error estimation of 'svm' using 10-fold cross validation: 0.3978559

e <- svm(Purchase ~ ., kernel = "polynomial", degree=2, data = train, cost = d_out$best.parameter$cost)
pred_train <- predict(e, train)
table(train$Purchase, pred_train)

##     pred_train
##       CH  MM
##   CH 450   0
##   MM 291   8

# accuracy = 63%
(467+5)/(467+5+277)

## [1] 0.6301736

pred_test <- predict(e, test)
table(test$Purchase, pred_test)

##     pred_test
##       CH  MM
##   CH 202   1
##   MM 117   1

# accuracy = 59.5%
(185+6)/(185+6+130)

## [1] 0.5950156

h) The linear kernel gives the best results accuracy-wise with ranges of 81-84%.

hw 8 dm

Amber Chin

5/6/2021