Assignment 8

5 a.

set.seed(42)
n <- 500
x1 <- runif(n) - 0.5
x2 <- runif(n) - 0.5
y  <- as.factor(1 * (x1^2 - x2^2 > 0))   
data <- data.frame(x1, x2, y)

b.

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.4.2

ggplot(data, aes(x = x1, y = x2, color = y)) +
  geom_point(alpha = 0.6) +
  labs(title = "True Classes", color = "Class") +
  theme_minimal()

c.

glm_lin <- glm(y ~ x1 + x2, data = data, family = binomial)
summary(glm_lin)

## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial, data = data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.09978    0.08976   1.112    0.266
## x1          -0.17659    0.30658  -0.576    0.565
## x2          -0.20067    0.30978  -0.648    0.517
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 691.79  on 499  degrees of freedom
## Residual deviance: 691.08  on 497  degrees of freedom
## AIC: 697.08
## 
## Number of Fisher Scoring iterations: 3

d.

data$pred_lin <- predict(glm_lin, type = "response") > 0.5
data$pred_lin <- factor(data$pred_lin, levels = c(FALSE, TRUE), labels = c("0","1"))

ggplot(data, aes(x = x1, y = x2, color = pred_lin)) +
  geom_point(alpha = 0.6) +
  labs(title = "Linear Logistic Regression Prediction",
       color = "Predicted\nClass") +
  theme_minimal()

e.

glm_nl <- glm(y ~ poly(x1, 2, raw = TRUE) + poly(x2, 2, raw = TRUE) +
                   I(x1 * x2),
              data = data, family = binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(glm_nl)

## 
## Call:
## glm(formula = y ~ poly(x1, 2, raw = TRUE) + poly(x2, 2, raw = TRUE) + 
##     I(x1 * x2), family = binomial, data = data)
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)                  10.41     138.85   0.075    0.940
## poly(x1, 2, raw = TRUE)1    183.96    3263.04   0.056    0.955
## poly(x1, 2, raw = TRUE)2  88648.36  844122.80   0.105    0.916
## poly(x2, 2, raw = TRUE)1   -331.24    4128.28  -0.080    0.936
## poly(x2, 2, raw = TRUE)2 -86338.24  820611.62  -0.105    0.916
## I(x1 * x2)                -1396.21   15698.89  -0.089    0.929
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 6.9179e+02  on 499  degrees of freedom
## Residual deviance: 8.2483e-05  on 494  degrees of freedom
## AIC: 12
## 
## Number of Fisher Scoring iterations: 25

f.

data$pred_nl <- predict(glm_nl, type = "response") > 0.5
data$pred_nl <- factor(data$pred_nl, levels = c(FALSE, TRUE), labels = c("0","1"))

ggplot(data, aes(x = x1, y = x2, color = pred_nl)) +
  geom_point(alpha = 0.6) +
  labs(title = "Non-linear Logistic Regression Prediction",
       color = "Predicted\nClass") +
  theme_minimal()

g.

library(e1071)

## Warning: package 'e1071' was built under R version 4.4.3

svm_lin <- svm(y ~ x1 + x2, data = data, kernel = "linear", cost = 1, scale = FALSE)
data$pred_svm_lin <- predict(svm_lin, data)

ggplot(data, aes(x = x1, y = x2, color = pred_svm_lin)) +
  geom_point(alpha = 0.6) +
  labs(title = "Linear SVM Prediction",
       color = "Predicted\nClass") +
  theme_minimal()

h.

svm_rbf <- svm(y ~ x1 + x2, data = data, kernel = "radial", gamma = 1, cost = 1, scale = FALSE)
data$pred_svm_rbf <- predict(svm_rbf, data)

ggplot(data, aes(x = x1, y = x2, color = pred_svm_rbf)) +
  geom_point(alpha = 0.6) +
  labs(title = "Non-linear (RBF) SVM Prediction",
       color = "Predicted\nClass") +
  theme_minimal()

i.

From these results, we can see that the linear methods can only display straight lines, and that is not good since the true boundaries we see are not that linear. We can also see that the feature transformation can produce highly flexible decision boundaries that match the data’s shape.

7 a.

library(ISLR)

## Warning: package 'ISLR' was built under R version 4.4.3

library(e1071)     
data(Auto)

Auto$mpg01 <- factor(ifelse(Auto$mpg > median(Auto$mpg), "High", "Low"))

b.

set.seed(42)
tune_lin <- tune(svm,
                 mpg01 ~ . - mpg,
                 data = Auto,
                 kernel = "linear",
                 ranges = list(cost = c(0.01, 0.1, 1, 10, 100)),
                 tunecontrol = tune.control(cross = 5))
summary(tune_lin)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 5-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 0.08932165 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-02 0.08935411 0.03618129
## 2 1e-01 0.08932165 0.04691167
## 3 1e+00 0.09948069 0.02287240
## 4 1e+01 0.11213892 0.02859924
## 5 1e+02 0.13008763 0.02081952

cv_results_lin <- data.frame(
  cost = tune_lin$performances$cost,
  error = tune_lin$performances$error
)
print(cv_results_lin)

##    cost      error
## 1 1e-02 0.08935411
## 2 1e-01 0.08932165
## 3 1e+00 0.09948069
## 4 1e+01 0.11213892
## 5 1e+02 0.13008763

We can see here that the cross validation shows that the smallest error (8.93%) happens at very low cost values, such as 0.01 and 0.1., so we could say that a soft margin is optimal for this data. We can also see that as the cost rises, the error rises as well, which shows that if the margin is too hard it could lead to overfitting.

c.

set.seed(42)


tune_radial <- tune(svm,
                    mpg01 ~ . - mpg,
                    data = Auto,
                    kernel = "radial",
                    ranges = list(cost  = c(0.1, 1, 10, 100),
                                  gamma = c(0.5, 1, 2)))
summary(tune_radial)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##     1     1
## 
## - best performance: 0.07891026 
## 
## - Detailed performance results:
##     cost gamma      error dispersion
## 1    0.1   0.5 0.09166667 0.05222075
## 2    1.0   0.5 0.08660256 0.04333178
## 3   10.0   0.5 0.08397436 0.04088294
## 4  100.0   0.5 0.08910256 0.03762626
## 5    0.1   1.0 0.59679487 0.05312225
## 6    1.0   1.0 0.07891026 0.03633038
## 7   10.0   1.0 0.08910256 0.04132724
## 8  100.0   1.0 0.08910256 0.04132724
## 9    0.1   2.0 0.59679487 0.05312225
## 10   1.0   2.0 0.16544872 0.07914205
## 11  10.0   2.0 0.15525641 0.06985108
## 12 100.0   2.0 0.15525641 0.06985108

tune_poly <- tune(svm,
                  mpg01 ~ . - mpg,
                  data = Auto,
                  kernel = "polynomial",
                  ranges = list(cost   = c(0.1, 1, 10),
                                degree = c(2, 3)))
summary(tune_poly)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.5612179 
## 
## - Detailed performance results:
##   cost degree     error dispersion
## 1  0.1      2 0.5842949 0.04703306
## 2  1.0      2 0.5842949 0.04703306
## 3 10.0      2 0.5612179 0.06909693
## 4  0.1      3 0.5842949 0.04703306
## 5  1.0      3 0.5842949 0.04703306
## 6 10.0      3 0.5842949 0.04703306

We can see here that the error stays high across the combinations of cost and polynomial degree, which means that the polynomial kernel is performing worse than random guessing. When the cost is increased from 0.1 or 1 to 10 for degree 2 does have some improvement, showing that the error goes down from 58.4% to 56.1%, but its still not acceptable. Degree 3 does not benefit from any cost, because the error stays at 58.4%. We can see that overall from these results, the polynomial kernel is a poor choice in this case.

8. a.

library(ISLR2)

## Warning: package 'ISLR2' was built under R version 4.4.2

## 
## Attaching package: 'ISLR2'

## The following object is masked _by_ '.GlobalEnv':
## 
##     Auto

## The following objects are masked from 'package:ISLR':
## 
##     Auto, Credit

library(e1071)
set.seed(42)

data(OJ)
n <- nrow(OJ)
train_idx <- sample(seq_len(n), size = 800)
OJ_train  <- OJ[train_idx, ]
OJ_test   <- OJ[-train_idx, ]

b.

svc_lin01 <- svm(Purchase ~ ., data = OJ_train, 
                 kernel = "linear", cost = 0.01, scale = TRUE)
summary(svc_lin01)

## 
## Call:
## svm(formula = Purchase ~ ., data = OJ_train, kernel = "linear", cost = 0.01, 
##     scale = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  432
## 
##  ( 215 217 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

Since the model has such a low cost and uses 432 support vectors, we can see that this indicates a very soft margin. This means that the data is underfit, so there will be high training and test erros.

c.

pred_train_lin01 <- predict(svc_lin01, OJ_train)
pred_test_lin01  <- predict(svc_lin01, OJ_test)

train_err_lin01 <- mean(pred_train_lin01 != OJ_train$Purchase)
test_err_lin01  <- mean(pred_test_lin01  != OJ_test$Purchase)

cat("Linear SVC (cost=0.01):\n",
    "  Training error =", round(train_err_lin01, 4), "\n",
    "  Test error     =", round(test_err_lin01, 4), "\n")

## Linear SVC (cost=0.01):
##    Training error = 0.1713 
##    Test error     = 0.163

The training error rate is 0.1713 or 17.13%, and the test error rate is 0.163, or 16.3%.

d.

set.seed(42)
tune_lin <- tune(svm,
                 Purchase ~ .,
                 data    = OJ_train,
                 kernel  = "linear",
                 ranges  = list(cost = c(0.01, 0.1, 1, 5, 10)),
                 tunecontrol = tune.control(cross = 10))
summary(tune_lin)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.175 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.17750 0.02415229
## 2  0.10 0.17625 0.03356689
## 3  1.00 0.17500 0.02886751
## 4  5.00 0.18375 0.02703521
## 5 10.00 0.18625 0.02729087

best_lin <- tune_lin$best.model

e.

pred_train_lin <- predict(best_lin, OJ_train)
pred_test_lin  <- predict(best_lin, OJ_test)

train_err_lin <- mean(pred_train_lin != OJ_train$Purchase)
test_err_lin  <- mean(pred_test_lin  != OJ_test$Purchase)

cat("Linear SVC (cost=", best_lin$cost, "):\n",
    "  Training error =", round(train_err_lin, 4), "\n",
    "  Test error     =", round(test_err_lin, 4), "\n", sep = "")

## Linear SVC (cost=1):
##   Training error =0.1675
##   Test error     =0.163

f.

set.seed(42)
tune_rad <- tune(svm,
                 Purchase ~ .,
                 data   = OJ_train,
                 kernel = "radial",
                 ranges = list(cost  = c(0.01, 0.1, 1, 5, 10)))
summary(tune_rad)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.18 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.38500 0.04199868
## 2  0.10 0.18125 0.03784563
## 3  1.00 0.18000 0.03343734
## 4  5.00 0.18625 0.03701070
## 5 10.00 0.19375 0.03738408

best_rad <- tune_rad$best.model


pred_train_rad <- predict(best_rad, OJ_train)
pred_test_rad  <- predict(best_rad, OJ_test)
train_err_rad  <- mean(pred_train_rad != OJ_train$Purchase)
test_err_rad   <- mean(pred_test_rad  != OJ_test$Purchase)

cat("RBF SVM (cost=", best_rad$cost, "):\n",
    "  Training error =", round(train_err_rad, 4), "\n",
    "  Test error     =", round(test_err_rad, 4), "\n", sep = "")

## RBF SVM (cost=1):
##   Training error =0.15
##   Test error     =0.1593

g.

set.seed(42)
tune_poly <- tune(svm,
                  Purchase  ~ .,
                  data      = OJ_train,
                  kernel    = "polynomial",
                  degree    = 2,
                  ranges    = list(cost = c(0.01, 0.1, 1, 5, 10)))
summary(tune_poly)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     5
## 
## - best performance: 0.18375 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.38625 0.04308019
## 2  0.10 0.31625 0.05529278
## 3  1.00 0.19250 0.04216370
## 4  5.00 0.18375 0.04041881
## 5 10.00 0.19000 0.03425801

best_poly <- tune_poly$best.model


pred_train_poly <- predict(best_poly, OJ_train)
pred_test_poly  <- predict(best_poly, OJ_test)
train_err_poly  <- mean(pred_train_poly != OJ_train$Purchase)
test_err_poly   <- mean(pred_test_poly  != OJ_test$Purchase)

cat("Poly SVM (deg=2, cost=", best_poly$cost, "):\n",
    "  Training error =", round(train_err_poly, 4), "\n",
    "  Test error     =", round(test_err_poly, 4), "\n", sep = "")

## Poly SVM (deg=2, cost=5):
##   Training error =0.1475
##   Test error     =0.1667

h.

results <- data.frame(
  Method = c("Linear SVC (tuned)", "RBF SVM (tuned)", "Poly SVM deg2"),
  Cost   = c(best_lin$cost,     best_rad$cost,     best_poly$cost),
  TrainErr = c(train_err_lin,   train_err_rad,     train_err_poly),
  TestErr  = c(test_err_lin,    test_err_rad,      test_err_poly)
)
print(results)

##               Method Cost TrainErr   TestErr
## 1 Linear SVC (tuned)    1   0.1675 0.1629630
## 2    RBF SVM (tuned)    1   0.1500 0.1592593
## 3      Poly SVM deg2    5   0.1475 0.1666667

We can see here that the RBF SVM (tuned) offers the best performance. This is because it has the lowest test error value, 0.1592593. Even though the Ploy SVM deg2 has the lowest training error, it has the worst/highest test error value.

Assignment 8

Valeria Villarreal epi354

2025-05-03

5

a.

b.

c.

d.

e.

f.

g.

h.

i.

7

a.

b.

c.

8.

a.

b.

c.

d.

e.

f.

g.

h.