library(ggplot2)
library(e1071)

library(ISLR2)

#5 a.

set.seed(1)
x1 <- runif(500) - 0.5
x2 <- runif(500) - 0.5
y <- ifelse(x1^2 - x2^2 > 0, 1, 0)  

data <- data.frame(x1 = x1, x2 = x2, y = as.factor(y))
ggplot(data, aes(x = x1, y = x2, color = y)) +
  geom_point(alpha = 0.7) +
  labs(title = "Quadratic Decision Boundary", color = "Class") +
  theme_minimal()

logit_model <- glm(y ~ x1 + x2, data = data, family = binomial)
summary(logit_model)
## 
## Call:
## glm(formula = y ~ x1 + x2, family = binomial, data = data)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.087260   0.089579  -0.974    0.330
## x1           0.196199   0.316864   0.619    0.536
## x2          -0.002854   0.305712  -0.009    0.993
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 692.18  on 499  degrees of freedom
## Residual deviance: 691.79  on 497  degrees of freedom
## AIC: 697.79
## 
## Number of Fisher Scoring iterations: 3
data$y_hat_linear <- ifelse(predict(logit_model, type = "response") > 0.5, 1, 0)

ggplot(data, aes(x = x1, y = x2, color = as.factor(y_hat_linear))) +
  geom_point(alpha = 0.7) +
  labs(title = "Logistic Regression (Linear)", color = "Predicted Class") +
  theme_minimal()

data$x1_sq <- data$x1^2
data$x2_sq <- data$x2^2
data$x1_x2 <- data$x1 * data$x2

logit_nonlin <- glm(y ~ x1 + x2 + x1_sq + x2_sq + x1_x2, data = data, family = binomial)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
data$y_hat_nonlin <- ifelse(predict(logit_nonlin, type = "response") > 0.5, 1, 0)

ggplot(data, aes(x = x1, y = x2, color = as.factor(y_hat_nonlin))) +
  geom_point(alpha = 0.7) +
  labs(title = "Logistic Regression (Non-linear Features)", color = "Predicted Class") +
  theme_minimal()

svm_linear <- svm(y ~., data = data, kernel = "linear", cost = 0.01)

data$y_hat_svm_linear <- predict(svm_linear)

ggplot(data, aes(x = x1, y = x2, color = y_hat_svm_linear)) +
  geom_point(alpha = 0.7) +
  labs(title = "SVM with Linear Kernel", color = "Predicted Class") +
  theme_minimal()

svm_rbf <- svm(y ~ x1 + x2, data = data, kernel = "radial", gamma = 1, cost = 1)

data$y_hat_svm_rbf <- predict(svm_rbf)

ggplot(data, aes(x = x1, y = x2, color = y_hat_svm_rbf)) +
  geom_point(alpha = 0.7) +
  labs(title = "SVM with RBF Kernel", color = "Predicted Class") +
  theme_minimal()

SVM shows the non linear boundaries that the other methods don’t

#7 a.

data(Auto)
Auto <- na.omit(Auto)

Auto$mpg_binary <- ifelse(Auto$mpg > median(Auto$mpg), 1, 0)
Auto$mpg_binary <- as.factor(Auto$mpg_binary)
Auto_svm <- subset(Auto, select = -c(mpg))

set.seed(1)
tune_out_linear <- tune(svm, mpg_binary ~ ., data = Auto_svm,
                        kernel = "linear",
                        ranges = list(cost = c(0.001, 0.01, 0.1, 1, 10, 100)))

summary(tune_out_linear)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 0.08673077 
## 
## - Detailed performance results:
##    cost      error dispersion
## 1 1e-03 0.13525641 0.05661708
## 2 1e-02 0.08923077 0.04698309
## 3 1e-01 0.08673077 0.04040897
## 4 1e+00 0.09961538 0.04923181
## 5 1e+01 0.11237179 0.05701890
## 6 1e+02 0.11750000 0.06208951
best_model_linear <- tune_out_linear$best.model
set.seed(1)
tune_out_radial <- tune(svm, mpg_binary ~ ., data = Auto_svm,
                        kernel = "radial",
                        ranges = list(cost = c(0.1, 1, 10), gamma = c(0.5, 1, 2)))

summary(tune_out_radial)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost gamma
##    10     1
## 
## - best performance: 0.07897436 
## 
## - Detailed performance results:
##   cost gamma      error dispersion
## 1  0.1   0.5 0.08410256 0.04164179
## 2  1.0   0.5 0.08673077 0.04708817
## 3 10.0   0.5 0.09173077 0.04008042
## 4  0.1   1.0 0.55115385 0.04366593
## 5  1.0   1.0 0.07903846 0.04891067
## 6 10.0   1.0 0.07897436 0.04869339
## 7  0.1   2.0 0.55115385 0.04366593
## 8  1.0   2.0 0.13769231 0.06926822
## 9 10.0   2.0 0.13512821 0.06692968
set.seed(1)
tune_out_poly <- tune(svm, mpg_binary ~ ., data = Auto_svm,
                      kernel = "polynomial",
                      ranges = list(cost = c(0.1, 1, 10), degree = c(2, 3, 4)))

summary(tune_out_poly)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 0.520641 
## 
## - Detailed performance results:
##   cost degree     error dispersion
## 1  0.1      2 0.5511538 0.04366593
## 2  1.0      2 0.5511538 0.04366593
## 3 10.0      2 0.5206410 0.08505283
## 4  0.1      3 0.5511538 0.04366593
## 5  1.0      3 0.5511538 0.04366593
## 6 10.0      3 0.5511538 0.04366593
## 7  0.1      4 0.5511538 0.04366593
## 8  1.0      4 0.5511538 0.04366593
## 9 10.0      4 0.5511538 0.04366593
# Choose only 2 features to visualize
plot(best_model_linear, Auto_svm, horsepower ~ weight)

# For higher-dimensional models, use:
plot(best_model_linear, Auto_svm, weight ~ displacement)

plot(tune_out_radial$best.model, Auto_svm, acceleration ~ weight)

#8 a.

set.seed(1)
train_idx <- sample(1:nrow(OJ), 800)
train <- OJ[train_idx, ]
test <- OJ[-train_idx, ]
svm_linear <- svm(Purchase ~ ., data = train, kernel = "linear", cost = 0.01, scale = TRUE)
summary(svm_linear)
## 
## Call:
## svm(formula = Purchase ~ ., data = train, kernel = "linear", cost = 0.01, 
##     scale = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
train_pred <- predict(svm_linear, train)
train_error <- mean(train_pred != train$Purchase)

test_pred <- predict(svm_linear, test)
test_error <- mean(test_pred != test$Purchase)

train_error
## [1] 0.175
test_error
## [1] 0.1777778
set.seed(2)
tuned_linear <- tune(svm, Purchase ~ ., data = train, kernel = "linear",
                     ranges = list(cost = c(0.01, 0.1, 1, 10, 100)))

summary(tuned_linear)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   100
## 
## - best performance: 0.16875 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1 1e-02 0.17625 0.04059026
## 2 1e-01 0.17125 0.04168749
## 3 1e+00 0.17000 0.04090979
## 4 1e+01 0.17000 0.03736085
## 5 1e+02 0.16875 0.03919768
best_linear <- tuned_linear$best.model

train_error_best <- mean(predict(best_linear, train) != train$Purchase)
test_error_best <- mean(predict(best_linear, test) != test$Purchase)

train_error_best
## [1] 0.165
test_error_best
## [1] 0.1481481
set.seed(3)
tuned_radial <- tune(svm, Purchase ~ ., data = train, kernel = "radial",
                     ranges = list(cost = c(0.01, 0.1, 1, 10)))

summary(tuned_radial)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##     1
## 
## - best performance: 0.17875 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39375 0.05344065
## 2  0.10 0.18875 0.04427267
## 3  1.00 0.17875 0.03729108
## 4 10.00 0.19250 0.04090979
best_radial <- tuned_radial$best.model
mean(predict(best_radial, train) != train$Purchase)
## [1] 0.15125
mean(predict(best_radial, test) != test$Purchase)
## [1] 0.1851852
set.seed(4)
tuned_poly <- tune(svm, Purchase ~ ., data = train, kernel = "polynomial",
                   degree = 2,
                   ranges = list(cost = c(0.01, 0.1, 1, 10)))

summary(tuned_poly)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##    10
## 
## - best performance: 0.185 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.39125 0.06096732
## 2  0.10 0.32250 0.07139483
## 3  1.00 0.19875 0.05152197
## 4 10.00 0.18500 0.02415229
best_poly <- tuned_poly$best.model
mean(predict(best_poly, train) != train$Purchase)
## [1] 0.15
mean(predict(best_poly, test) != test$Purchase)
## [1] 0.1888889