#problem 5

x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- ifelse(x1^2 - x2^2 > 0, 1, 0)
df <- data.frame(x1, x2, y = as.factor(y))
ggplot(df, aes(x1, x2, color = y)) + geom_point() +
  ggtitle("Original Class Labels")

glm.linear <- glm(y ~ x1 + x2, data = df, family = binomial)
probs.linear <- predict(glm.linear, type = "response")
pred.linear <- ifelse(probs.linear > 0.5, 1, 0)
ggplot(df, aes(x1, x2, color = as.factor(pred.linear))) + geom_point() +
  ggtitle("Logistic Regression (Linear) – Predicted Classes")

glm.nonlin <- glm(y ~ poly(x1, 2) + poly(x2, 2) + I(x1 * x2), data = df, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
probs.nonlin <- predict(glm.nonlin, type = "response")
pred.nonlin <- ifelse(probs.nonlin > 0.5, 1, 0)
ggplot(df, aes(x1, x2, color = as.factor(pred.nonlin))) + geom_point() +
  ggtitle("Logistic Regression (Nonlinear) – Predicted Classes")

svc.linear <- svm(y ~ x1 + x2, data = df, kernel = "linear", cost = 1)
pred.svc <- predict(svc.linear, df)
ggplot(df, aes(x1, x2, color = pred.svc)) + geom_point() +
  ggtitle("SVM Linear Kernel – Predicted Classes")

svc.rbf <- svm(y ~ x1 + x2, data = df, kernel = "radial", cost = 1)
pred.rbf <- predict(svc.rbf, df)
ggplot(df, aes(x1, x2, color = pred.rbf)) + geom_point() +
  ggtitle("SVM RBF Kernel – Predicted Classes")

- The linear logistic model and linear SVC both fail to capture the true decision boundary.

- The nonlinear logistic regression improves the boundary but not perfectly.

- The RBF kernel SVM captures the true nonlinear decision boundary very well.

Problem 7

data(Auto)
Auto <- Auto %>% mutate(mpg01 = ifelse(mpg > median(mpg), 1, 0)) %>%
  select(-mpg, -name)

set.seed(1)
tune.out <- tune(svm, mpg01 ~ ., data = Auto, kernel = "linear",
                 ranges = list(cost = c(0.001, 0.01, 0.1, 1, 10)))
summary(tune.out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##  0.01
## 
## - best performance: 0.1053223 
## 
## - Detailed performance results:
##    cost     error dispersion
## 1 1e-03 0.1088870 0.02541796
## 2 1e-02 0.1053223 0.03162078
## 3 1e-01 0.1083165 0.03461157
## 4 1e+00 0.1100350 0.03552713
## 5 1e+01 0.1101804 0.03557982
# Radial
tune.rbf <- tune(svm, mpg01 ~ ., data = Auto, kernel = "radial",
                 ranges = list(cost = c(0.1, 1, 10), gamma = c(0.5, 1, 2)))

# Polynomial
tune.poly <- tune(svm, mpg01 ~ ., data = Auto, kernel = "polynomial",
                  ranges = list(cost = c(0.1, 1, 10), degree = c(2, 3)))

list(radial = summary(tune.rbf), poly = summary(tune.poly))
## $radial
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     1   0.5
## 
## - best performance: 0.05955388 
## 
## - Detailed performance results:
##   cost gamma      error dispersion
## 1  0.1   0.5 0.06970597 0.03349539
## 2  1.0   0.5 0.05955388 0.03307096
## 3 10.0   0.5 0.06724373 0.03548038
## 4  0.1   1.0 0.08114160 0.02662778
## 5  1.0   1.0 0.06031574 0.02879835
## 6 10.0   1.0 0.06986628 0.02813494
## 7  0.1   2.0 0.12026254 0.01759096
## 8  1.0   2.0 0.07073989 0.02240443
## 9 10.0   2.0 0.08673891 0.02796514
## 
## 
## $poly
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      3
## 
## - best performance: 0.1113738 
## 
## - Detailed performance results:
##   cost degree     error dispersion
## 1  0.1      2 0.2269920 0.03627644
## 2  1.0      2 0.1651022 0.02910030
## 3 10.0      2 0.1592405 0.04433953
## 4  0.1      3 0.1376712 0.03070863
## 5  1.0      3 0.1228948 0.03188548
## 6 10.0      3 0.1113738 0.02617738
# Plotting radial kernel predictions on top 2 features
best.rbf <- tune.rbf$best.model
plot(best.rbf, Auto, horsepower ~ weight)

# Polynomial kernel on same
best.poly <- tune.poly$best.model
plot(best.poly, Auto, horsepower ~ weight)

Problem 8

data(OJ)
set.seed(1)
train_ind <- sample(1:nrow(OJ), 800)
OJ_train <- OJ[train_ind, ]
OJ_test <- OJ[-train_ind, ]

svm.linear <- svm(Purchase ~ ., data = OJ_train, kernel = "linear", cost = 0.01)
summary(svm.linear)
## 
## Call:
## svm(formula = Purchase ~ ., data = OJ_train, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
train.pred <- predict(svm.linear, OJ_train)
test.pred <- predict(svm.linear, OJ_test)
mean(train.pred != OJ_train$Purchase)
## [1] 0.175
mean(test.pred != OJ_test$Purchase)
## [1] 0.1777778
tune.linear <- tune(svm, Purchase ~ ., data = OJ_train, kernel = "linear",
                    ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(tune.linear)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.17125 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.17375 0.03884174
## 2  0.10 0.17875 0.03064696
## 3  1.00 0.17500 0.03061862
## 4 10.00 0.17125 0.03488573
best.linear <- tune.linear$best.model
mean(predict(best.linear, OJ_train) != OJ_train$Purchase)
## [1] 0.16375
mean(predict(best.linear, OJ_test) != OJ_test$Purchase)
## [1] 0.1481481
svm.radial <- tune(svm, Purchase ~ ., data = OJ_train, kernel = "radial",
                   ranges = list(cost = c(0.01, 0.1, 1, 10)))
summary(svm.radial)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.17625 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39375 0.06568284
## 2  0.10 0.18250 0.05470883
## 3  1.00 0.17625 0.03793727
## 4 10.00 0.18125 0.04340139
svm.poly <- tune(svm, Purchase ~ ., data = OJ_train, kernel = "polynomial",
                 ranges = list(cost = c(0.01, 0.1, 1, 10)), degree = 2)
summary(svm.poly)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.18625 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39000 0.08287373
## 2  0.10 0.32375 0.06730166
## 3  1.00 0.20000 0.05137012
## 4 10.00 0.18625 0.05185785

The radial kernel often performs best due to its flexibility.

Polynomial and linear can be simpler, but may underfit.

Optimal cost values vary – tuning is essential.