# 3. Support vector machine
# Remove the Severe responses from both the training and test set to create a binary classification problem.
# Fit several support vector machines to the data. Compare the test set accuracy for different kernal functions
# and cost parameters. You don’t have to do anything fancy here. Just try a few different combinations of
# parameters and print the test set accuracy.
library(AppliedPredictiveModeling)
data(hepatic)
## Remove the Severe responses
ind1 <- which(injury == "Severe")
bio <- bio[-ind1,]
injury <- injury[-ind1]
injury <- as.factor(as.character(injury))
## a. Split the data into a training and test set. use random
nfolds <- round(251*0.75)
nfolds
## [1] 188
set.seed(12334)
ind <- sample(seq(1,281,by = 1),nfolds)
## 训练集
biotrain <- bio[ind,]
injurytrain <- injury[ind]
## 测试集
biotest <- bio[-ind,]
injurytest <- injury[-ind]
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
library(e1071)
## kernel = "radial",cost = 1
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 1)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 36
## None 0 2
##
## Accuracy : 0.561
## 95% CI : (0.447, 0.6704)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.3709
##
## Kappa : 0.0563
## Mcnemar's Test P-Value : 5.433e-09
##
## Sensitivity : 1.00000
## Specificity : 0.05263
## Pos Pred Value : 0.55000
## Neg Pred Value : 1.00000
## Prevalence : 0.53659
## Detection Rate : 0.53659
## Detection Prevalence : 0.97561
## Balanced Accuracy : 0.52632
##
## 'Positive' Class : Mild
##
## kernel = "radial",cost = 5
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 5)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 35 36
## None 9 2
##
## Accuracy : 0.4512
## 95% CI : (0.341, 0.5651)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9514561
##
## Kappa : -0.1596
## Mcnemar's Test P-Value : 0.0001063
##
## Sensitivity : 0.79545
## Specificity : 0.05263
## Pos Pred Value : 0.49296
## Neg Pred Value : 0.18182
## Prevalence : 0.53659
## Detection Rate : 0.42683
## Detection Prevalence : 0.86585
## Balanced Accuracy : 0.42404
##
## 'Positive' Class : Mild
##
## kernel = "radial",cost = 10
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 10)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 34 32
## None 10 6
##
## Accuracy : 0.4878
## 95% CI : (0.3758, 0.6008)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.840513
##
## Kappa : -0.0722
## Mcnemar's Test P-Value : 0.001194
##
## Sensitivity : 0.7727
## Specificity : 0.1579
## Pos Pred Value : 0.5152
## Neg Pred Value : 0.3750
## Prevalence : 0.5366
## Detection Rate : 0.4146
## Detection Prevalence : 0.8049
## Balanced Accuracy : 0.4653
##
## 'Positive' Class : Mild
##
## kernel = "polynomial",cost = 1
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 1)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## kernel = "polynomial",cost = 5
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 5)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## kernel = "polynomial",cost = 10
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 10)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
kernels <- c("polynomial","radial","sigmoid")
costs <- c(1,5,10,15)
for(kk in kernels){
for(co in costs){
print(kk)
print(co)
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel =kk,cost = co)
pre <- predict(svm1,biotest)
print(confusionMatrix(pre,injurytest))
}
}
## [1] "polynomial"
## [1] 1
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## [1] "polynomial"
## [1] 5
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## [1] "polynomial"
## [1] 10
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## [1] "polynomial"
## [1] 15
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 20 24
## None 24 14
##
## Accuracy : 0.4146
## 95% CI : (0.3068, 0.5288)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9899
##
## Kappa : -0.177
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.4545
## Specificity : 0.3684
## Pos Pred Value : 0.4545
## Neg Pred Value : 0.3684
## Prevalence : 0.5366
## Detection Rate : 0.2439
## Detection Prevalence : 0.5366
## Balanced Accuracy : 0.4115
##
## 'Positive' Class : Mild
##
## [1] "radial"
## [1] 1
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 36
## None 0 2
##
## Accuracy : 0.561
## 95% CI : (0.447, 0.6704)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.3709
##
## Kappa : 0.0563
## Mcnemar's Test P-Value : 5.433e-09
##
## Sensitivity : 1.00000
## Specificity : 0.05263
## Pos Pred Value : 0.55000
## Neg Pred Value : 1.00000
## Prevalence : 0.53659
## Detection Rate : 0.53659
## Detection Prevalence : 0.97561
## Balanced Accuracy : 0.52632
##
## 'Positive' Class : Mild
##
## [1] "radial"
## [1] 5
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 35 36
## None 9 2
##
## Accuracy : 0.4512
## 95% CI : (0.341, 0.5651)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.9514561
##
## Kappa : -0.1596
## Mcnemar's Test P-Value : 0.0001063
##
## Sensitivity : 0.79545
## Specificity : 0.05263
## Pos Pred Value : 0.49296
## Neg Pred Value : 0.18182
## Prevalence : 0.53659
## Detection Rate : 0.42683
## Detection Prevalence : 0.86585
## Balanced Accuracy : 0.42404
##
## 'Positive' Class : Mild
##
## [1] "radial"
## [1] 10
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 34 32
## None 10 6
##
## Accuracy : 0.4878
## 95% CI : (0.3758, 0.6008)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.840513
##
## Kappa : -0.0722
## Mcnemar's Test P-Value : 0.001194
##
## Sensitivity : 0.7727
## Specificity : 0.1579
## Pos Pred Value : 0.5152
## Neg Pred Value : 0.3750
## Prevalence : 0.5366
## Detection Rate : 0.4146
## Detection Prevalence : 0.8049
## Balanced Accuracy : 0.4653
##
## 'Positive' Class : Mild
##
## [1] "radial"
## [1] 15
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 32 32
## None 12 6
##
## Accuracy : 0.4634
## 95% CI : (0.3525, 0.577)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.924803
##
## Kappa : -0.1191
## Mcnemar's Test P-Value : 0.004179
##
## Sensitivity : 0.7273
## Specificity : 0.1579
## Pos Pred Value : 0.5000
## Neg Pred Value : 0.3333
## Prevalence : 0.5366
## Detection Rate : 0.3902
## Detection Prevalence : 0.7805
## Balanced Accuracy : 0.4426
##
## 'Positive' Class : Mild
##
## [1] "sigmoid"
## [1] 1
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 38
## None 0 0
##
## Accuracy : 0.5366
## 95% CI : (0.423, 0.6475)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.5451
##
## Kappa : 0
## Mcnemar's Test P-Value : 1.947e-09
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.5366
## Neg Pred Value : NaN
## Prevalence : 0.5366
## Detection Rate : 0.5366
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Mild
##
## [1] "sigmoid"
## [1] 5
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 38
## None 0 0
##
## Accuracy : 0.5366
## 95% CI : (0.423, 0.6475)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.5451
##
## Kappa : 0
## Mcnemar's Test P-Value : 1.947e-09
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.5366
## Neg Pred Value : NaN
## Prevalence : 0.5366
## Detection Rate : 0.5366
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Mild
##
## [1] "sigmoid"
## [1] 10
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 38
## None 0 0
##
## Accuracy : 0.5366
## 95% CI : (0.423, 0.6475)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.5451
##
## Kappa : 0
## Mcnemar's Test P-Value : 1.947e-09
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.5366
## Neg Pred Value : NaN
## Prevalence : 0.5366
## Detection Rate : 0.5366
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Mild
##
## [1] "sigmoid"
## [1] 15
## Confusion Matrix and Statistics
##
## Reference
## Prediction Mild None
## Mild 44 38
## None 0 0
##
## Accuracy : 0.5366
## 95% CI : (0.423, 0.6475)
## No Information Rate : 0.5366
## P-Value [Acc > NIR] : 0.5451
##
## Kappa : 0
## Mcnemar's Test P-Value : 1.947e-09
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.5366
## Neg Pred Value : NaN
## Prevalence : 0.5366
## Detection Rate : 0.5366
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Mild
##