Support Vector Machine

##Link:  https://www.geeksforgeeks.org/r-language/classifying-data-using-support-vector-machinessvms-in-r/
rm(list=ls())
# install.packages('e1071') 
# install.packages('caTools')
# install.packages('ggplot2')
# install.packages('caret')
#install.packages("caTools")
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(e1071) 
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
library(caTools)
library(ggplot2)
#data = read.csv("~/Desktop/CAU/AI4OPT/Data Engineering and Mining II/social.csv")
data=read.csv("/Users/maxineharlemon/AIOpt/social.csv")
head(data)
summary(data)
##     User.ID            Gender               Age        EstimatedSalary 
##  Min.   :15566689   Length:400         Min.   :18.00   Min.   : 15000  
##  1st Qu.:15626764   Class :character   1st Qu.:29.75   1st Qu.: 43000  
##  Median :15694342   Mode  :character   Median :37.00   Median : 70000  
##  Mean   :15691540                      Mean   :37.66   Mean   : 69742  
##  3rd Qu.:15750363                      3rd Qu.:46.00   3rd Qu.: 88000  
##  Max.   :15815236                      Max.   :60.00   Max.   :150000  
##    Purchased     
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.3575  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
set.seed(123)

data$Gender=factor(data$Gender, levels=c("Male","Female"), labels=c(0,1))
data$Gender=as.numeric(data$Gender)

data[, c("Age", "EstimatedSalary")] <- scale(data[, c("Age", "EstimatedSalary")])

split <- sample.split(data$Purchased, SplitRatio = 0.75)
training_set <- subset(data, split == TRUE)
test_set <- subset(data, split == FALSE)
sum(is.na(training_set))
## [1] 0
training_set_cleaned <- na.omit(training_set)
classifier<- svm(Purchased ~ Age + EstimatedSalary + Gender, data = training_set, type='C-classification',kernal='radial',gamma=0.1)
y_pred <- predict(classifier, newdata = test_set)

table(test_set$Purchased, y_pred)
##    y_pred
##      0  1
##   0 59  5
##   1  4 32
accuracy <- sum(diag(table(test_set$Purchased, y_pred))) / sum(table(test_set$Purchased, y_pred))
cat("Accuracy: ", accuracy)
## Accuracy:  0.91
confusionMatrix(table(test_set$Purchased, y_pred))
## Confusion Matrix and Statistics
## 
##    y_pred
##      0  1
##   0 59  5
##   1  4 32
##                                         
##                Accuracy : 0.91          
##                  95% CI : (0.836, 0.958)
##     No Information Rate : 0.63          
##     P-Value [Acc > NIR] : 1.623e-10     
##                                         
##                   Kappa : 0.8059        
##                                         
##  Mcnemar's Test P-Value : 1             
##                                         
##             Sensitivity : 0.9365        
##             Specificity : 0.8649        
##          Pos Pred Value : 0.9219        
##          Neg Pred Value : 0.8889        
##              Prevalence : 0.6300        
##          Detection Rate : 0.5900        
##    Detection Prevalence : 0.6400        
##       Balanced Accuracy : 0.9007        
##                                         
##        'Positive' Class : 0             
## 
X1 = seq(min(training_set$Age) - 1, max(training_set$Age) + 1, by = 0.5)
X2 = seq(min(training_set$EstimatedSalary) - 1, max(training_set$EstimatedSalary) + 1, by = 0.5)

grid_set = expand.grid(X1, X2)
grid_set$Gender = 1.5 #median(training_set$Gender)  # Default Gender value for grid

names(grid_set) <- c("Age", "EstimatedSalary", "Gender")
y_grid = predict(classifier, newdata = grid_set)

ggplot() +
  geom_text(data = grid_set, aes(x = Age, y = EstimatedSalary, label = as.factor(y_grid))) +
  geom_point(data = training_set, aes(x = Age, y = EstimatedSalary, color = as.factor(Purchased)), size = 3, shape = 21) +
  scale_fill_manual(values = c('coral1', 'aquamarine')) +
  scale_color_manual(values = c('green4', 'red3')) +
  labs(title = 'SVM Decision Boundary (Training set)', x = 'Age', y = 'Estimated Salary') +
  theme_minimal() +
  theme(legend.position = "none")