#QUESTION 5 PART A)
set.seed(1)
x1 = runif(500) - 0.5 
x2 = runif(500) - 0.5
y=1*(x1^2-x2^2>0)
#QUESTION 5 PART B)
plot(x1[y==0],x2[y==0],col="red",xlab="X1",ylab="X2")
points(x1[y==1],x2[y==1],col="blue")

#QUESTION 5 PART C)
df=data.frame(x1 = x1, x2 = x2, y = as.factor(y))
glm_fit=glm(y~.,data=df, family='binomial')
library(ggplot2)
#QUESTION 5 PART D)
glm_prob=predict(glm_fit,newdata=df,type='response')
glm_pred=ifelse(glm_prob>0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2)) +
  geom_point(data = df, mapping = aes(colour = glm_pred))

#QUESTION 5 PART E)
glm_fit_2=glm(y~poly(x1,2)+poly(x2,2),data=df,family='binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
#QUESTION 5 PART F)
library(ggplot2)
glm_prob_2=predict(glm_fit_2,newdata=df,type='response')
glm_pred_2=ifelse(glm_prob_2>0.5,1,0)
ggplot(data = df, mapping = aes(x1, x2)) +
  geom_point(data = df, mapping = aes(colour = glm_pred_2))

#QUESTION 5 PART G)
# Load necessary library
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.2
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Fit a support vector classifier with a linear kernel
svm_fit <- svm(y ~ x1 + x2, data = df, kernel = "linear", cost = 1)

# Add predicted classes to the dataframe
df <- df %>%
  mutate(svm_pred = predict(svm_fit, newdata = df))

# Plot the predictions
ggplot(df, aes(x = x1, y = x2, color = factor(svm_pred))) +
  geom_point() +
  labs(title = "SVM Predictions (Linear Kernel)",
       color = "Predicted Class") +
  theme_minimal()

#QUESTION 5 PART H)
# Load required libraries (if not already)
library(e1071)
library(ggplot2)
library(dplyr)

# Fit SVM with radial basis function (non-linear kernel)
svm_rbf <- svm(y ~ x1 + x2, data = df, kernel = "radial", cost = 1, gamma = 1)

# Predict class labels
df <- df %>%
  mutate(svm_rbf_pred = predict(svm_rbf, newdata = df))

# Plot the predictions
ggplot(df, aes(x = x1, y = x2, color = factor(svm_rbf_pred))) +
  geom_point() +
  labs(title = "SVM Predictions (RBF Kernel)",
       color = "Predicted Class") +
  theme_minimal()

#QUESTION 5 PART I)
#The results show that SVMs are crucial when assessing non linear models. SVMs with radial kernels are good at dtermining the non-linear boundaries compared to other models. Furthermore, we can conclude that parameter of gamma should be used for the cross validation as it can avoid over and underfitting, and because cross validation selects the one that provides the best generalization performance. 
#QUESTION 7 PART A)
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.4.2
mpg_med = median(Auto$mpg)
bin.var = ifelse(Auto$mpg > mpg_med, 1, 0)
Auto$mpglevel = as.factor(bin.var)
#QUESTION 7 PART B)
library(e1071)
set.seed(1)
tune_out = tune(svm, mpg~., data = Auto, kernel = "linear", ranges = list(cost = c(0.01, 
    0.1, 1, 5, 10, 100)))
summary(tune_out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 8.981009 
## 
## - Detailed performance results:
##    cost     error dispersion
## 1 1e-02 10.305990   5.295587
## 2 1e-01  8.981009   4.750742
## 3 1e+00  9.647184   4.313908
## 4 5e+00 10.149220   4.755080
## 5 1e+01 10.306219   4.953047
## 6 1e+02 10.684083   5.080506
#The results show that cost of 0.1 shows the lowest error rate.
#QUESTION 7 PART C)
set.seed(2)
tune_out = tune(svm, mpg ~ ., data = Auto, kernel = "polynomial", ranges = list(cost = c(0.1, 
    1, 5, 10), degree = c(2, 3, 4)))
summary(tune_out)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost degree
##    10      2
## 
## - best performance: 50.95606 
## 
## - Detailed performance results:
##    cost degree    error dispersion
## 1   0.1      2 61.59446   13.60292
## 2   1.0      2 60.15304   13.79293
## 3   5.0      2 55.06386   15.19391
## 4  10.0      2 50.95606   15.72388
## 5   0.1      3 61.71831   13.56940
## 6   1.0      3 61.39833   13.54758
## 7   5.0      3 59.99304   13.43208
## 8  10.0      3 58.28857   13.27760
## 9   0.1      4 61.75343   13.57197
## 10  1.0      4 61.74822   13.57317
## 11  5.0      4 61.72510   13.57851
## 12 10.0      4 61.69626   13.58520
#The new results show that the test parameter for cost is 10.
#QUESTION 7 PART D)
svm_lin = svm(mpglevel ~ ., data = Auto, kernel = "linear", cost = 1)
svm_poly = svm(mpglevel ~ ., data = Auto, kernel = "polynomial", cost = 10, 
    degree = 2)
svm_radial = svm(mpglevel ~ ., data = Auto, kernel = "radial", cost = 10, gamma = 0.01)
plotpairs = function(fit) {
    for (name in names(Auto)[!(names(Auto) %in% c("mpg", "mpglevel", "name"))]) {
        plot(fit, Auto, as.formula(paste("mpg~", name, sep = "")))
    }
}
plotpairs(svm_lin)

#QUESTION 8 PART A)
attach(OJ)
set.seed(1)
data_Train = sample(nrow(OJ), 800)
oj_train = OJ[data_Train,]
oj_test = OJ[-data_Train,]
#QUESTION 8 PART B)
svc=svm(Purchase~.,data=oj_train,kernel='linear',cost=0.01)
summary(svc)
## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "linear", cost = 0.01)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  linear 
##        cost:  0.01 
## 
## Number of Support Vectors:  435
## 
##  ( 219 216 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
#The results show that there are a total of 432 vectors, and 215 of those vectors belong to CH and 217 belong to MM.
#QUESTION 8 PART C)
pred_train = predict(svc, oj_train)
(t<-table(oj_train$Purchase, pred_train))
##     pred_train
##       CH  MM
##   CH 420  65
##   MM  75 240
pred_test = predict(svc, oj_test)
table(oj_test$Purchase, pred_test)
##     pred_test
##       CH  MM
##   CH 153  15
##   MM  33  69
#QUESTION 8 PART D)
set.seed(1)
tune_svc = tune(svm, Purchase ~ ., data = oj_train, kernel = "linear", ranges = list(cost = c(0.01,0.1,1,10)))
summary(tune_svc)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##  cost
##   0.1
## 
## - best performance: 0.1725 
## 
## - Detailed performance results:
##    cost   error dispersion
## 1  0.01 0.17625 0.02853482
## 2  0.10 0.17250 0.03162278
## 3  1.00 0.17500 0.02946278
## 4 10.00 0.17375 0.03197764
#QUESTION 8 PART E)
svm_lin_1 = svm(Purchase ~ ., kernel = "linear", data = oj_train, cost = tune_out$best.parameters$cost)
pred_train_1 = predict(svm_lin_1, oj_train)
table(oj_train$Purchase, pred_train_1)
##     pred_train_1
##       CH  MM
##   CH 423  62
##   MM  69 246
test_pred_1 = predict(svm_lin_1, oj_test)
(t<-table(oj_test$Purchase, test_pred_1))
##     test_pred_1
##       CH  MM
##   CH 156  12
##   MM  28  74
#QUESTION 8 PART F)
set.seed(1)
svm_radial1 = svm(Purchase ~ ., data = oj_train, kernel = "radial")
summary(svm_radial1)
## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "radial")
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  373
## 
##  ( 188 185 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
pred_train_2 = predict(svm_radial1, oj_train)
table(oj_train$Purchase, pred_train_2)
##     pred_train_2
##       CH  MM
##   CH 441  44
##   MM  77 238
test_pred_2 = predict(svm_radial1, oj_test)
table(oj_test$Purchase, test_pred_2)
##     test_pred_2
##       CH  MM
##   CH 151  17
##   MM  33  69
svm_radial1 = svm(Purchase ~ ., data = oj_train, kernel = "radial", cost = tune_svc$best.parameters$cost)
pred_train = predict(svm_radial1, oj_train)
table(oj_train$Purchase, pred_train_1)
##     pred_train_1
##       CH  MM
##   CH 423  62
##   MM  69 246
test_pred_3 = predict(svm_radial1, oj_test)
(t<-table(oj_test$Purchase, test_pred_3))
##     test_pred_3
##       CH  MM
##   CH 150  18
##   MM  37  65
#QUESTION 8 PART G)
svm_polynomial1 = svm(Purchase ~ ., kernel = "poly", data = oj_train, degree=2)
summary(svm_polynomial1)
## 
## Call:
## svm(formula = Purchase ~ ., data = oj_train, kernel = "poly", degree = 2)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  polynomial 
##        cost:  1 
##      degree:  2 
##      coef.0:  0 
## 
## Number of Support Vectors:  447
## 
##  ( 225 222 )
## 
## 
## Number of Classes:  2 
## 
## Levels: 
##  CH MM
pred_train_2 = predict(svm_polynomial1, oj_train)
(t<-table(oj_train$Purchase, pred_train_2))
##     pred_train_2
##       CH  MM
##   CH 449  36
##   MM 110 205
test_pred_3 = predict(svm_polynomial1, oj_test)
(t<-table(oj_test$Purchase, test_pred_3))
##     test_pred_3
##       CH  MM
##   CH 153  15
##   MM  45  57
set.seed(1)
tune_svc = tune(svm, Purchase ~ ., data = oj_train, kernel = "poly", degree = 2, ranges = list(cost = 10^seq(-2, 1, by = 0.25)))
summary(tune_svc)
## 
## Parameter tuning of 'svm':
## 
## - sampling method: 10-fold cross validation 
## 
## - best parameters:
##      cost
##  3.162278
## 
## - best performance: 0.1775 
## 
## - Detailed performance results:
##           cost   error dispersion
## 1   0.01000000 0.39125 0.04210189
## 2   0.01778279 0.37125 0.03537988
## 3   0.03162278 0.36500 0.03476109
## 4   0.05623413 0.33750 0.04714045
## 5   0.10000000 0.32125 0.05001736
## 6   0.17782794 0.24500 0.04758034
## 7   0.31622777 0.19875 0.03972562
## 8   0.56234133 0.20500 0.03961621
## 9   1.00000000 0.20250 0.04116363
## 10  1.77827941 0.18500 0.04199868
## 11  3.16227766 0.17750 0.03670453
## 12  5.62341325 0.18375 0.03064696
## 13 10.00000000 0.18125 0.02779513
#QUESTION 8 PART H)
#The linear approach seems to give the best results on this data.