# Working on SVM
# Library which is used to perform the algorithms
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
getwd()
## [1] "C:/Users/KAUSHIK/Desktop"
# Read the file, understand the dataset.
heart <- read.csv('heart.csv', na.strings=c("","","NA"))
View(heart)
str(heart)
## 'data.frame': 303 obs. of 14 variables:
## $ ï..age : int 63 37 41 56 57 57 56 44 52 57 ...
## $ sex : int 1 1 0 1 0 1 0 1 1 1 ...
## $ cp : int 3 2 1 1 0 0 1 1 2 2 ...
## $ trestbps: int 145 130 130 120 120 140 140 120 172 150 ...
## $ chol : int 233 250 204 236 354 192 294 263 199 168 ...
## $ fbs : int 1 0 0 0 0 0 0 0 1 0 ...
## $ restecg : int 0 1 0 1 1 1 0 1 1 1 ...
## $ thalach : int 150 187 172 178 163 148 153 173 162 174 ...
## $ exang : int 0 0 0 0 1 0 0 0 0 0 ...
## $ oldpeak : num 2.3 3.5 1.4 0.8 0.6 0.4 1.3 0 0.5 1.6 ...
## $ slope : int 0 0 2 2 2 1 1 2 2 2 ...
## $ ca : int 0 0 0 0 0 0 0 0 0 0 ...
## $ thal : int 1 2 2 2 2 1 2 3 3 2 ...
## $ target : int 1 1 1 1 1 1 1 1 1 1 ...
dim(heart)
## [1] 303 14
head(heart)
## ï..age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
## 1 63 1 3 145 233 1 0 150 0 2.3 0 0
## 2 37 1 2 130 250 0 1 187 0 3.5 0 0
## 3 41 0 1 130 204 0 0 172 0 1.4 2 0
## 4 56 1 1 120 236 0 1 178 0 0.8 2 0
## 5 57 0 0 120 354 0 1 163 1 0.6 2 0
## 6 57 1 0 140 192 0 1 148 0 0.4 1 0
## thal target
## 1 1 1
## 2 2 1
## 3 2 1
## 4 2 1
## 5 2 1
## 6 1 1
# Spliting the data into training and testing part. to train and test the variables in dataset.
set.seed(3033)
train <- createDataPartition(y = heart$target, p = 0.7, list = FALSE)
training <- heart[train,]
testing <- heart[-train,]
# Check the dim value for training and testing
dim(training)
## [1] 213 14
dim(testing)
## [1] 90 14
# To check the Null Values in dataset
anyNA(heart)
## [1] FALSE
summary(heart)
## ï..age sex cp trestbps
## Min. :29.00 Min. :0.0000 Min. :0.000 Min. : 94.0
## 1st Qu.:47.50 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:120.0
## Median :55.00 Median :1.0000 Median :1.000 Median :130.0
## Mean :54.37 Mean :0.6832 Mean :0.967 Mean :131.6
## 3rd Qu.:61.00 3rd Qu.:1.0000 3rd Qu.:2.000 3rd Qu.:140.0
## Max. :77.00 Max. :1.0000 Max. :3.000 Max. :200.0
## chol fbs restecg thalach
## Min. :126.0 Min. :0.0000 Min. :0.0000 Min. : 71.0
## 1st Qu.:211.0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:133.5
## Median :240.0 Median :0.0000 Median :1.0000 Median :153.0
## Mean :246.3 Mean :0.1485 Mean :0.5281 Mean :149.6
## 3rd Qu.:274.5 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:166.0
## Max. :564.0 Max. :1.0000 Max. :2.0000 Max. :202.0
## exang oldpeak slope ca
## Min. :0.0000 Min. :0.00 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.80 Median :1.000 Median :0.0000
## Mean :0.3267 Mean :1.04 Mean :1.399 Mean :0.7294
## 3rd Qu.:1.0000 3rd Qu.:1.60 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.20 Max. :2.000 Max. :4.0000
## thal target
## Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:0.0000
## Median :2.000 Median :1.0000
## Mean :2.314 Mean :0.5446
## 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :3.000 Max. :1.0000
# To convert into categorical varibale we convert target variable into factorial variable
# Below line convert target variable into factorial variable.
training[['target']] = factor(training[['target']])
# "trainControl Method" will control all the computaion overheads so that we can used the train function
# provided by the caret package.
trainctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)
# "train function" provied by the carate package which can extract different algorithms from carate package
# "~." indicates target variable is our traget varibale.
svm_Linear <- train(target~., data = training, method = "svmLinear", trControl = trainctrl,
preProcess = c("center","scale"), tuneLength = 10)
svm_Linear
## Support Vector Machines with Linear Kernel
##
## 213 samples
## 13 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 192, 191, 191, 192, 191, 192, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8498701 0.6951338
##
## Tuning parameter 'C' was held constant at a value of 1
# To test the data we use predict method and passing the testing part init
testpred <- predict(svm_Linear, newdata = testing)
testpred
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1
## [36] 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0
## [71] 1 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1
## Levels: 0 1
# Now we will go for test the accuarcy of the model by using testing set
# So we are going with confusion matrix
confusionMatrix(table(testpred, testing$target))
## Confusion Matrix and Statistics
##
##
## testpred 0 1
## 0 26 7
## 1 13 44
##
## Accuracy : 0.7778
## 95% CI : (0.6779, 0.8587)
## No Information Rate : 0.5667
## P-Value [Acc > NIR] : 2.334e-05
##
## Kappa : 0.5392
## Mcnemar's Test P-Value : 0.2636
##
## Sensitivity : 0.6667
## Specificity : 0.8627
## Pos Pred Value : 0.7879
## Neg Pred Value : 0.7719
## Prevalence : 0.4333
## Detection Rate : 0.2889
## Detection Prevalence : 0.3667
## Balanced Accuracy : 0.7647
##
## 'Positive' Class : 0
##
# To improve the perfomance we are going to used gride search.
grid <- expand.grid(C = c(0, 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 5))
svm_Linear_Grid <- train(target~., data = training, method = "svmLinear", trControl = trainctrl,
preProcess = c("center","scale"), tuneGrid = grid, tuneLength = 10)
## Warning: model fit failed for Fold01.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold02.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold03.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold04.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold05.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold06.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold07.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold08.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold09.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold10.Rep1: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold01.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold02.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold03.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold04.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold05.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold06.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold07.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold08.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold09.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold10.Rep2: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold01.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold02.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold03.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold04.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold05.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold06.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold07.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold08.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold09.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning: model fit failed for Fold10.Rep3: C=0.00 Error in .local(x, ...) :
## No Support Vectors found. You may want to change your parameters
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info =
## trainInfo, : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
svm_Linear_Grid
## Support Vector Machines with Linear Kernel
##
## 213 samples
## 13 predictor
## 2 classes: '0', '1'
##
## Pre-processing: centered (13), scaled (13)
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 191, 191, 191, 193, 192, 192, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa
## 0.00 NaN NaN
## 0.01 0.8564646 0.7063399
## 0.05 0.8533550 0.7009723
## 0.10 0.8548773 0.7045789
## 0.25 0.8564574 0.7086782
## 0.50 0.8610750 0.7182457
## 0.75 0.8641775 0.7246806
## 1.00 0.8657648 0.7278472
## 1.25 0.8610029 0.7183752
## 1.50 0.8626696 0.7216416
## 1.75 0.8626696 0.7216416
## 2.00 0.8611544 0.7185081
## 5.00 0.8642569 0.7249166
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was C = 1.
# Plot the graph for the svm Linear Gride variable
plot(svm_Linear_Grid)

# again repeat the steps of predict functions
testpred_grid <- predict(svm_Linear_Grid, newdata = testing)
testpred_grid
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1
## [36] 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0
## [71] 1 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1
## Levels: 0 1
# again check the confusions matrix which will used in perfect prediction of accuracy
confusionMatrix(table(testpred_grid, testing$target))
## Confusion Matrix and Statistics
##
##
## testpred_grid 0 1
## 0 26 7
## 1 13 44
##
## Accuracy : 0.7778
## 95% CI : (0.6779, 0.8587)
## No Information Rate : 0.5667
## P-Value [Acc > NIR] : 2.334e-05
##
## Kappa : 0.5392
## Mcnemar's Test P-Value : 0.2636
##
## Sensitivity : 0.6667
## Specificity : 0.8627
## Pos Pred Value : 0.7879
## Neg Pred Value : 0.7719
## Prevalence : 0.4333
## Detection Rate : 0.2889
## Detection Prevalence : 0.3667
## Balanced Accuracy : 0.7647
##
## 'Positive' Class : 0
##
# After training function for the First time we got Accuarcy of 85.23
# After Confusion Matrix we got Accuarcy of 77.78
# We will apply expand.grid function
# The final value used for the model was C = 1.
# Again we will apply Confusion Matrix we got same Accuarcy prediction that is 77.78