# 3. Support vector machine
# Remove the Severe responses from both the training and test set to create a binary classification problem.
# Fit several support vector machines to the data. Compare the test set accuracy for different kernal functions
# and cost parameters. You don’t have to do anything fancy here. Just try a few different combinations of
# parameters and print the test set accuracy.


library(AppliedPredictiveModeling)
data(hepatic)

## Remove the Severe responses
ind1 <- which(injury == "Severe")
bio <- bio[-ind1,]
injury <- injury[-ind1]
injury <- as.factor(as.character(injury))

##  a. Split the data into a training and test set. use random

nfolds <- round(251*0.75)
nfolds 
## [1] 188
set.seed(12334)
ind <- sample(seq(1,281,by = 1),nfolds)
## 训练集
biotrain <- bio[ind,] 
injurytrain <- injury[ind] 
## 测试集
biotest <- bio[-ind,] 
injurytest <- injury[-ind]

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
library(e1071)


## kernel = "radial",cost = 1
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 1)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   36
##       None    0    2
##                                          
##                Accuracy : 0.561          
##                  95% CI : (0.447, 0.6704)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.3709         
##                                          
##                   Kappa : 0.0563         
##  Mcnemar's Test P-Value : 5.433e-09      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.05263        
##          Pos Pred Value : 0.55000        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.53659        
##          Detection Rate : 0.53659        
##    Detection Prevalence : 0.97561        
##       Balanced Accuracy : 0.52632        
##                                          
##        'Positive' Class : Mild           
## 
## kernel = "radial",cost = 5
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 5)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   35   36
##       None    9    2
##                                          
##                Accuracy : 0.4512         
##                  95% CI : (0.341, 0.5651)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.9514561      
##                                          
##                   Kappa : -0.1596        
##  Mcnemar's Test P-Value : 0.0001063      
##                                          
##             Sensitivity : 0.79545        
##             Specificity : 0.05263        
##          Pos Pred Value : 0.49296        
##          Neg Pred Value : 0.18182        
##              Prevalence : 0.53659        
##          Detection Rate : 0.42683        
##    Detection Prevalence : 0.86585        
##       Balanced Accuracy : 0.42404        
##                                          
##        'Positive' Class : Mild           
## 
## kernel = "radial",cost = 10
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "radial",cost = 10)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   34   32
##       None   10    6
##                                           
##                Accuracy : 0.4878          
##                  95% CI : (0.3758, 0.6008)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.840513        
##                                           
##                   Kappa : -0.0722         
##  Mcnemar's Test P-Value : 0.001194        
##                                           
##             Sensitivity : 0.7727          
##             Specificity : 0.1579          
##          Pos Pred Value : 0.5152          
##          Neg Pred Value : 0.3750          
##              Prevalence : 0.5366          
##          Detection Rate : 0.4146          
##    Detection Prevalence : 0.8049          
##       Balanced Accuracy : 0.4653          
##                                           
##        'Positive' Class : Mild            
## 
## kernel = "polynomial",cost = 1
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 1)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
## 
## kernel = "polynomial",cost = 5
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 5)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
## 
## kernel = "polynomial",cost = 10
svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel = "polynomial",cost = 10)
pre <- predict(svm1,biotest)
confusionMatrix(pre,injurytest)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
## 
kernels <- c("polynomial","radial","sigmoid")
costs <- c(1,5,10,15)

for(kk in kernels){
  for(co in costs){
    print(kk)
    print(co)
    
    svm1 <- svm(biotrain,injurytrain,scale = FALSE,kernel =kk,cost = co)
    pre <- predict(svm1,biotest)
    print(confusionMatrix(pre,injurytest))
    
  }
}
## [1] "polynomial"
## [1] 1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
##                                           
## [1] "polynomial"
## [1] 5
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
##                                           
## [1] "polynomial"
## [1] 10
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
##                                           
## [1] "polynomial"
## [1] 15
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   20   24
##       None   24   14
##                                           
##                Accuracy : 0.4146          
##                  95% CI : (0.3068, 0.5288)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.9899          
##                                           
##                   Kappa : -0.177          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.4545          
##             Specificity : 0.3684          
##          Pos Pred Value : 0.4545          
##          Neg Pred Value : 0.3684          
##              Prevalence : 0.5366          
##          Detection Rate : 0.2439          
##    Detection Prevalence : 0.5366          
##       Balanced Accuracy : 0.4115          
##                                           
##        'Positive' Class : Mild            
##                                           
## [1] "radial"
## [1] 1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   36
##       None    0    2
##                                          
##                Accuracy : 0.561          
##                  95% CI : (0.447, 0.6704)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.3709         
##                                          
##                   Kappa : 0.0563         
##  Mcnemar's Test P-Value : 5.433e-09      
##                                          
##             Sensitivity : 1.00000        
##             Specificity : 0.05263        
##          Pos Pred Value : 0.55000        
##          Neg Pred Value : 1.00000        
##              Prevalence : 0.53659        
##          Detection Rate : 0.53659        
##    Detection Prevalence : 0.97561        
##       Balanced Accuracy : 0.52632        
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "radial"
## [1] 5
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   35   36
##       None    9    2
##                                          
##                Accuracy : 0.4512         
##                  95% CI : (0.341, 0.5651)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.9514561      
##                                          
##                   Kappa : -0.1596        
##  Mcnemar's Test P-Value : 0.0001063      
##                                          
##             Sensitivity : 0.79545        
##             Specificity : 0.05263        
##          Pos Pred Value : 0.49296        
##          Neg Pred Value : 0.18182        
##              Prevalence : 0.53659        
##          Detection Rate : 0.42683        
##    Detection Prevalence : 0.86585        
##       Balanced Accuracy : 0.42404        
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "radial"
## [1] 10
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   34   32
##       None   10    6
##                                           
##                Accuracy : 0.4878          
##                  95% CI : (0.3758, 0.6008)
##     No Information Rate : 0.5366          
##     P-Value [Acc > NIR] : 0.840513        
##                                           
##                   Kappa : -0.0722         
##  Mcnemar's Test P-Value : 0.001194        
##                                           
##             Sensitivity : 0.7727          
##             Specificity : 0.1579          
##          Pos Pred Value : 0.5152          
##          Neg Pred Value : 0.3750          
##              Prevalence : 0.5366          
##          Detection Rate : 0.4146          
##    Detection Prevalence : 0.8049          
##       Balanced Accuracy : 0.4653          
##                                           
##        'Positive' Class : Mild            
##                                           
## [1] "radial"
## [1] 15
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   32   32
##       None   12    6
##                                          
##                Accuracy : 0.4634         
##                  95% CI : (0.3525, 0.577)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.924803       
##                                          
##                   Kappa : -0.1191        
##  Mcnemar's Test P-Value : 0.004179       
##                                          
##             Sensitivity : 0.7273         
##             Specificity : 0.1579         
##          Pos Pred Value : 0.5000         
##          Neg Pred Value : 0.3333         
##              Prevalence : 0.5366         
##          Detection Rate : 0.3902         
##    Detection Prevalence : 0.7805         
##       Balanced Accuracy : 0.4426         
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "sigmoid"
## [1] 1
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   38
##       None    0    0
##                                          
##                Accuracy : 0.5366         
##                  95% CI : (0.423, 0.6475)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.5451         
##                                          
##                   Kappa : 0              
##  Mcnemar's Test P-Value : 1.947e-09      
##                                          
##             Sensitivity : 1.0000         
##             Specificity : 0.0000         
##          Pos Pred Value : 0.5366         
##          Neg Pred Value :    NaN         
##              Prevalence : 0.5366         
##          Detection Rate : 0.5366         
##    Detection Prevalence : 1.0000         
##       Balanced Accuracy : 0.5000         
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "sigmoid"
## [1] 5
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   38
##       None    0    0
##                                          
##                Accuracy : 0.5366         
##                  95% CI : (0.423, 0.6475)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.5451         
##                                          
##                   Kappa : 0              
##  Mcnemar's Test P-Value : 1.947e-09      
##                                          
##             Sensitivity : 1.0000         
##             Specificity : 0.0000         
##          Pos Pred Value : 0.5366         
##          Neg Pred Value :    NaN         
##              Prevalence : 0.5366         
##          Detection Rate : 0.5366         
##    Detection Prevalence : 1.0000         
##       Balanced Accuracy : 0.5000         
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "sigmoid"
## [1] 10
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   38
##       None    0    0
##                                          
##                Accuracy : 0.5366         
##                  95% CI : (0.423, 0.6475)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.5451         
##                                          
##                   Kappa : 0              
##  Mcnemar's Test P-Value : 1.947e-09      
##                                          
##             Sensitivity : 1.0000         
##             Specificity : 0.0000         
##          Pos Pred Value : 0.5366         
##          Neg Pred Value :    NaN         
##              Prevalence : 0.5366         
##          Detection Rate : 0.5366         
##    Detection Prevalence : 1.0000         
##       Balanced Accuracy : 0.5000         
##                                          
##        'Positive' Class : Mild           
##                                          
## [1] "sigmoid"
## [1] 15
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Mild None
##       Mild   44   38
##       None    0    0
##                                          
##                Accuracy : 0.5366         
##                  95% CI : (0.423, 0.6475)
##     No Information Rate : 0.5366         
##     P-Value [Acc > NIR] : 0.5451         
##                                          
##                   Kappa : 0              
##  Mcnemar's Test P-Value : 1.947e-09      
##                                          
##             Sensitivity : 1.0000         
##             Specificity : 0.0000         
##          Pos Pred Value : 0.5366         
##          Neg Pred Value :    NaN         
##              Prevalence : 0.5366         
##          Detection Rate : 0.5366         
##    Detection Prevalence : 1.0000         
##       Balanced Accuracy : 0.5000         
##                                          
##        'Positive' Class : Mild           
##