Working on SVM

# Working on SVM
# Library which is used to perform the algorithms

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

getwd()

## [1] "C:/Users/KAUSHIK/Desktop"

# Read the file, understand the dataset.

heart <- read.csv('heart.csv', na.strings=c("","","NA"))
View(heart)
str(heart)

## 'data.frame':    303 obs. of  14 variables:
##  $ ï..age  : int  63 37 41 56 57 57 56 44 52 57 ...
##  $ sex     : int  1 1 0 1 0 1 0 1 1 1 ...
##  $ cp      : int  3 2 1 1 0 0 1 1 2 2 ...
##  $ trestbps: int  145 130 130 120 120 140 140 120 172 150 ...
##  $ chol    : int  233 250 204 236 354 192 294 263 199 168 ...
##  $ fbs     : int  1 0 0 0 0 0 0 0 1 0 ...
##  $ restecg : int  0 1 0 1 1 1 0 1 1 1 ...
##  $ thalach : int  150 187 172 178 163 148 153 173 162 174 ...
##  $ exang   : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ oldpeak : num  2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
##  $ slope   : int  0 0 2 2 2 1 1 2 2 2 ...
##  $ ca      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ thal    : int  1 2 2 2 2 1 2 3 3 2 ...
##  $ target  : int  1 1 1 1 1 1 1 1 1 1 ...

dim(heart)

## [1] 303  14

head(heart)

##   ï..age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
## 1     63   1  3      145  233   1       0     150     0     2.3     0  0
## 2     37   1  2      130  250   0       1     187     0     3.5     0  0
## 3     41   0  1      130  204   0       0     172     0     1.4     2  0
## 4     56   1  1      120  236   0       1     178     0     0.8     2  0
## 5     57   0  0      120  354   0       1     163     1     0.6     2  0
## 6     57   1  0      140  192   0       1     148     0     0.4     1  0
##   thal target
## 1    1      1
## 2    2      1
## 3    2      1
## 4    2      1
## 5    2      1
## 6    1      1

# Spliting the data into training and testing part. to train and test the variables in dataset.

set.seed(3033)
train <- createDataPartition(y = heart$target, p = 0.7, list = FALSE)
training <- heart[train,]
testing <- heart[-train,]

# Check the dim value for training and testing 
dim(training)

## [1] 213  14

dim(testing)

## [1] 90 14

# To check the Null Values in dataset
anyNA(heart)

## [1] FALSE

summary(heart)

##      ï..age           sex               cp           trestbps    
##  Min.   :29.00   Min.   :0.0000   Min.   :0.000   Min.   : 94.0  
##  1st Qu.:47.50   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:120.0  
##  Median :55.00   Median :1.0000   Median :1.000   Median :130.0  
##  Mean   :54.37   Mean   :0.6832   Mean   :0.967   Mean   :131.6  
##  3rd Qu.:61.00   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:140.0  
##  Max.   :77.00   Max.   :1.0000   Max.   :3.000   Max.   :200.0  
##       chol            fbs            restecg          thalach     
##  Min.   :126.0   Min.   :0.0000   Min.   :0.0000   Min.   : 71.0  
##  1st Qu.:211.0   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:133.5  
##  Median :240.0   Median :0.0000   Median :1.0000   Median :153.0  
##  Mean   :246.3   Mean   :0.1485   Mean   :0.5281   Mean   :149.6  
##  3rd Qu.:274.5   3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:166.0  
##  Max.   :564.0   Max.   :1.0000   Max.   :2.0000   Max.   :202.0  
##      exang           oldpeak         slope             ca        
##  Min.   :0.0000   Min.   :0.00   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.00   1st Qu.:1.000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.80   Median :1.000   Median :0.0000  
##  Mean   :0.3267   Mean   :1.04   Mean   :1.399   Mean   :0.7294  
##  3rd Qu.:1.0000   3rd Qu.:1.60   3rd Qu.:2.000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :6.20   Max.   :2.000   Max.   :4.0000  
##       thal           target      
##  Min.   :0.000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:0.0000  
##  Median :2.000   Median :1.0000  
##  Mean   :2.314   Mean   :0.5446  
##  3rd Qu.:3.000   3rd Qu.:1.0000  
##  Max.   :3.000   Max.   :1.0000

# To convert into categorical varibale we convert target variable into factorial variable
# Below line convert target variable into factorial variable.

training[['target']] = factor(training[['target']])

# "trainControl Method" will control all the computaion overheads so that we can used the train function 
# provided by the caret package.

trainctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

# "train function" provied by the carate package which can extract different algorithms from carate package
# "~." indicates target variable is our traget varibale.

svm_Linear <- train(target~., data = training, method = "svmLinear", trControl = trainctrl, 
                    preProcess = c("center","scale"), tuneLength = 10)
svm_Linear

## Support Vector Machines with Linear Kernel 
## 
## 213 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## Pre-processing: centered (13), scaled (13) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 192, 191, 191, 192, 191, 192, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.8498701  0.6951338
## 
## Tuning parameter 'C' was held constant at a value of 1

# To test the data we use predict method and passing the testing part init
testpred <- predict(svm_Linear, newdata = testing)
testpred

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1
## [36] 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0
## [71] 1 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1
## Levels: 0 1

# Now we will go for test the accuarcy of the model by using testing set 
# So we are going with confusion matrix

confusionMatrix(table(testpred, testing$target))

## Confusion Matrix and Statistics
## 
##         
## testpred  0  1
##        0 26  7
##        1 13 44
##                                           
##                Accuracy : 0.7778          
##                  95% CI : (0.6779, 0.8587)
##     No Information Rate : 0.5667          
##     P-Value [Acc > NIR] : 2.334e-05       
##                                           
##                   Kappa : 0.5392          
##  Mcnemar's Test P-Value : 0.2636          
##                                           
##             Sensitivity : 0.6667          
##             Specificity : 0.8627          
##          Pos Pred Value : 0.7879          
##          Neg Pred Value : 0.7719          
##              Prevalence : 0.4333          
##          Detection Rate : 0.2889          
##    Detection Prevalence : 0.3667          
##       Balanced Accuracy : 0.7647          
##                                           
##        'Positive' Class : 0               
##

# To improve the perfomance we are going to used gride search. 

grid <- expand.grid(C = c(0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 5))
svm_Linear_Grid <- train(target~., data = training, method = "svmLinear", trControl = trainctrl, 
                         preProcess = c("center","scale"), tuneGrid = grid, tuneLength = 10)

## Warning: model fit failed for Fold01.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold02.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold03.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold04.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold05.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold06.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold07.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold08.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold09.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold10.Rep1: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold01.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold02.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold03.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold04.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold05.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold06.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold07.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold08.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold09.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold10.Rep2: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold01.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold02.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold03.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold04.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold05.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold06.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold07.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold08.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold09.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning: model fit failed for Fold10.Rep3: C=0.00 Error in .local(x, ...) : 
##   No Support Vectors found. You may want to change your parameters

## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.

## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results

svm_Linear_Grid

## Support Vector Machines with Linear Kernel 
## 
## 213 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## Pre-processing: centered (13), scaled (13) 
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 191, 191, 191, 193, 192, 192, ... 
## Resampling results across tuning parameters:
## 
##   C     Accuracy   Kappa    
##   0.00        NaN        NaN
##   0.01  0.8564646  0.7063399
##   0.05  0.8533550  0.7009723
##   0.10  0.8548773  0.7045789
##   0.25  0.8564574  0.7086782
##   0.50  0.8610750  0.7182457
##   0.75  0.8641775  0.7246806
##   1.00  0.8657648  0.7278472
##   1.25  0.8610029  0.7183752
##   1.50  0.8626696  0.7216416
##   1.75  0.8626696  0.7216416
##   2.00  0.8611544  0.7185081
##   5.00  0.8642569  0.7249166
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.

# Plot the graph for the svm Linear Gride variable

plot(svm_Linear_Grid)

# again repeat the steps of predict functions

testpred_grid <- predict(svm_Linear_Grid, newdata = testing)
testpred_grid

##  [1] 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1
## [36] 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0
## [71] 1 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1
## Levels: 0 1

# again check the confusions matrix which will used in perfect prediction of accuracy

confusionMatrix(table(testpred_grid, testing$target))

## Confusion Matrix and Statistics
## 
##              
## testpred_grid  0  1
##             0 26  7
##             1 13 44
##                                           
##                Accuracy : 0.7778          
##                  95% CI : (0.6779, 0.8587)
##     No Information Rate : 0.5667          
##     P-Value [Acc > NIR] : 2.334e-05       
##                                           
##                   Kappa : 0.5392          
##  Mcnemar's Test P-Value : 0.2636          
##                                           
##             Sensitivity : 0.6667          
##             Specificity : 0.8627          
##          Pos Pred Value : 0.7879          
##          Neg Pred Value : 0.7719          
##              Prevalence : 0.4333          
##          Detection Rate : 0.2889          
##    Detection Prevalence : 0.3667          
##       Balanced Accuracy : 0.7647          
##                                           
##        'Positive' Class : 0               
##

# After training function for the First time we got Accuarcy of 85.23
# After Confusion Matrix we got Accuarcy of 77.78
# We will apply expand.grid function
# The final value used for the model was C = 1.
# Again we will apply Confusion Matrix we got same Accuarcy prediction that is 77.78

Working on SVM

Kaushik Kanakdande

13 February 2019