Assignment 8

Question 5) We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.

####(a) Generate a data set with n = 500 and p = 2, such that the observations belong to two classes with a quadratic decision boundary between them.

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.1

set.seed(1)
x1 = runif(500) - 0.5 
x2 = runif(500) - 0.5
y=1*(x1^2-x2^2>0)

plot(x1[y==0],x2[y==0],col="red",xlab="X1",ylab="X2")
points(x1[y==1],x2[y==1],col="blue")

df=data.frame(x1 = x1, x2 = x2, y = as.factor(y))
glm_fit=glm(y~.,data=df, family='binomial')

glm_prob=predict(glm_fit,newdata=df,type='response')
glm_pred=ifelse(glm_prob>0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2)) +
  geom_point(data = df, mapping = aes(colour = glm_pred))

glm_fit_2=glm(y~poly(x1,2)+poly(x2,2),data=df,family='binomial')

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

glm_prob_2=predict(glm_fit_2,newdata=df,type='response')
glm_pred_2=ifelse(glm_prob_2>0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2)) +
  geom_point(data = df, mapping = aes(colour = glm_pred_2))

library(e1071)

## Warning: package 'e1071' was built under R version 4.5.1

svm_lin=svm(y~.,data=df,kernel='linear',cost=0.01)
plot(svm_lin,df)

svm_lin_2=svm(y~.,data=df,kernel='radial',gamma=1)
plot(svm_lin_2,data=df)

### Question 7) In this problem, you will use support vector approaches in order to predict whether a given car gets high or low gas mileage based on the Auto data set. (a) Create a binary variable that takes on a 1 for cars with gas mileage above the median, and a 0 for cars with gas mileage below the median.

library(ISLR)
data(Auto)
attach(Auto)

## The following object is masked from package:ggplot2:
## 
##     mpg

mpg_med = median(Auto$mpg)
bin.var = ifelse(Auto$mpg > mpg_med, 1, 0)
Auto$mpglevel = as.factor(bin.var)

library(e1071)
set.seed(1)
tune_out = tune(svm, mpg~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 
    0.1, 1, 5, 10, 100)))
summary(tune_out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 8.981009 
## 
## - Detailed performance results:
##    cost     error dispersion
## 1 1e-02 10.305990   5.295587
## 2 1e-01  8.981009   4.750742
## 3 1e+00  9.647184   4.313908
## 4 5e+00 10.149220   4.755080
## 5 1e+01 10.306219   4.953047
## 6 1e+02 10.684083   5.080506

set.seed(2)
tune_out = tune(svm, mpg ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 
    1, 5, 10), degree = c(2, 3, 4)))
summary(tune_out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 50.95606 
## 
## - Detailed performance results:
##    cost degree    error dispersion
## 1   0.1      2 61.59446   13.60292
## 2   1.0      2 60.15304   13.79293
## 3   5.0      2 55.06386   15.19391
## 4  10.0      2 50.95606   15.72388
## 5   0.1      3 61.71831   13.56940
## 6   1.0      3 61.39833   13.54758
## 7   5.0      3 59.99304   13.43208
## 8  10.0      3 58.28857   13.27760
## 9   0.1      4 61.75343   13.57197
## 10  1.0      4 61.74822   13.57317
## 11  5.0      4 61.72510   13.57851
## 12 10.0      4 61.69626   13.58520

set.seed(33)
tune_out = tune(svm, mpg ~ ., data = Auto, kernel = "radial", ranges = list(cost = c(0.1, 
    1, 5, 10), degree = c(2, 3, 4)))
summary(tune_out)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 7.294024 
## 
## - Detailed performance results:
##    cost degree     error dispersion
## 1   0.1      2 22.211472   8.440215
## 2   1.0      2 10.118616   4.286046
## 3   5.0      2  7.649506   2.993534
## 4  10.0      2  7.294024   2.818545
## 5   0.1      3 22.211472   8.440215
## 6   1.0      3 10.118616   4.286046
## 7   5.0      3  7.649506   2.993534
## 8  10.0      3  7.294024   2.818545
## 9   0.1      4 22.211472   8.440215
## 10  1.0      4 10.118616   4.286046
## 11  5.0      4  7.649506   2.993534
## 12 10.0      4  7.294024   2.818545

svm_lin = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm_poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10, 
    degree = 2)
svm_radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
    for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
        plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
    }
}
plotpairs(svm_lin)

plotpairs(svm_poly)

plotpairs(svm_radial)

### Question 8) This problem involves the OJ data set which is part of the ISLR2 package.

(a) Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations.

attach(OJ)
set.seed(1)
data_Train = sample(nrow(OJ), 800)
oj_train = OJ[data_Train,]
oj_test = OJ[-data_Train,]

(b) Fit a support vector classifier to the training data using cost = 0.01, with Purchase as the response and the other variables as predictors. Use the summary() function to produce summary statistics, and describe the results obtained.

svc=svm(Purchase~.,data=oj_train,kernel='linear',cost=0.01)
summary(svc)

## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred_train = predict(svc, oj_train)
(t<-table(oj_train$Purchase, pred_train))

##     pred_train
##       CH  MM
##   CH 420  65
##   MM  75 240

pred_test = predict(svc, oj_test)
table(oj_test$Purchase, pred_test)

##     pred_test
##       CH  MM
##   CH 153  15
##   MM  33  69

set.seed(1)
tune_svc = tune(svm, Purchase ~ ., data = oj_train, kernel = "linear", ranges = list(cost = c(0.01,0.1,1,10)))
summary(tune_svc)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 0.1725 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.17625 0.02853482
## 2  0.10 0.17250 0.03162278
## 3  1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764

svm_lin_1 = svm(Purchase ~ ., kernel = "linear", data = oj_train, cost = tune_out$best.parameters$cost)
pred_train_1 = predict(svm_lin_1, oj_train)
table(oj_train$Purchase, pred_train_1)

##     pred_train_1
##       CH  MM
##   CH 423  62
##   MM  69 246

test_pred_1 = predict(svm_lin_1, oj_test)
(t<-table(oj_test$Purchase, test_pred_1))

##     test_pred_1
##       CH  MM
##   CH 156  12
##   MM  28  74

set.seed(1)
svm_rad_1 = svm(Purchase ~ ., data = oj_train, kernel = "radial")
summary(svm_rad_1)

## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  373
## 
##  ( 188 185 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred_train_2 = predict(svm_rad_1, oj_train)
table(oj_train$Purchase, pred_train_2)

##     pred_train_2
##       CH  MM
##   CH 441  44
##   MM  77 238

test_pred_2 = predict(svm_rad_1, oj_test)
table(oj_test$Purchase, test_pred_2)

##     test_pred_2
##       CH  MM
##   CH 151  17
##   MM  33  69

svm_rad_1 = svm(Purchase ~ ., data = oj_train, kernel = "radial", cost = tune_svc$best.parameters$cost)
pred_train = predict(svm_rad_1, oj_train)
table(oj_train$Purchase, pred_train_1)

##     pred_train_1
##       CH  MM
##   CH 423  62
##   MM  69 246

test_pred_3 = predict(svm_rad_1, oj_test)
(t<-table(oj_test$Purchase, test_pred_3))

##     test_pred_3
##       CH  MM
##   CH 150  18
##   MM  37  65

svm_pol_1 = svm(Purchase ~ ., kernel = "poly", data = oj_train, degree=2)
summary(svm_pol_1)

## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "poly", degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  447
## 
##  ( 225 222 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM

pred_train_2 = predict(svm_pol_1, oj_train)
(t<-table(oj_train$Purchase, pred_train_2))

##     pred_train_2
##       CH  MM
##   CH 449  36
##   MM 110 205

test_pred_3 = predict(svm_pol_1, oj_test)
(t<-table(oj_test$Purchase, test_pred_3))

##     test_pred_3
##       CH  MM
##   CH 153  15
##   MM  45  57

set.seed(1)
tune_svc = tune(svm, Purchase ~ ., data = oj_train, kernel = "poly", degree = 2, ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune_svc)

## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##      cost
##  3.162278
## 
## - best performance: 0.1775 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.39125 0.04210189
## 2   0.01778279 0.37125 0.03537988
## 3   0.03162278 0.36500 0.03476109
## 4   0.05623413 0.33750 0.04714045
## 5   0.10000000 0.32125 0.05001736
## 6   0.17782794 0.24500 0.04758034
## 7   0.31622777 0.19875 0.03972562
## 8   0.56234133 0.20500 0.03961621
## 9   1.00000000 0.20250 0.04116363
## 10  1.77827941 0.18500 0.04199868
## 11  3.16227766 0.17750 0.03670453
## 12  5.62341325 0.18375 0.03064696
## 13 10.00000000 0.18125 0.02779513

Assignment 8

2025-08-05

Question 5) We have seen that we can fit an SVM with a non-linear kernel in order to perform classification using a non-linear decision boundary. We will now see that we can also obtain a non-linear decision boundary by performing logistic regression using non-linear transformations of the features.

(a) Create a training set containing a random sample of 800 observations, and a test set containing the remaining observations.

(b) Fit a support vector classifier to the training data using cost = 0.01, with Purchase as the response and the other variables as predictors. Use the summary() function to produce summary statistics, and describe the results obtained.

0 LInear basis approach seems to give best results on this data.