1 Reading in the Data

ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")

colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.

Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information

Data$Group<-as.factor(Data$Group)

2 For PC1 + PC2

In order to use each method, I first prepare a suitable data frame - splitting it into training and test sets and then splitting the training set into 5 folds.

Data1<-data.frame(Data[,c("Group","PC1","PC2")])

# Stratify the entire training set into training and test sets

set.seed(234)

library(caret)
train.index<-createDataPartition(Data1$Group, p=0.8, list = FALSE)
train.set<-Data1[train.index, ]
test.set<-Data1[-train.index, ]

# Stratify the training set into 5 folds

folds <- createFolds(y=factor(train.set$Group), k = 5, list = FALSE)
train.set$fold <- folds

3 Bayesian Optimization

Three main ways of choosing hyperparameters apart from selecting all possible combinations or just randomly performing a self selected sweep of what we think would perform well is to use a Grid Search, Random Search or Bayesian Optimization.

The upper confidence bound was used as the acquisition function.

The log of cost and gamma were used for reasons of scale.

3.1 SVM Linear Kernel

#linear

svm_fit_bayes<-function(logCost){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="linear", 
                cost=exp(logCost), scale= FALSE)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 5.13   Round = 1   logCost = 13.6405   Value = -0.0186 
## elapsed = 5.50   Round = 2   logCost = 14.5428   Value = -0.0186 
## elapsed = 0.04   Round = 3   logCost = -4.4991   Value = -0.0254 
## elapsed = 5.47   Round = 4   logCost = 14.4021   Value = -0.0186 
## elapsed = 0.04   Round = 5   logCost = -3.3272   Value = -0.0153 
## elapsed = 2.41   Round = 6   logCost = 11.1199   Value = -0.0203 
## elapsed = 6.55   Round = 7   logCost = 18.2346   Value = -0.1034 
## elapsed = 4.53   Round = 8   logCost = 12.9411   Value = -0.0186 
## elapsed = 6.62   Round = 9   logCost = 18.1934   Value = -0.1034 
## elapsed = 0.05   Round = 10  logCost = 2.1058    Value = -0.0203 
## elapsed = 0.45   Round = 11  logCost = 8.8931    Value = -0.0203 
## elapsed = 0.42   Round = 12  logCost = 8.6925    Value = -0.0203 
## elapsed = 1.24   Round = 13  logCost = 9.5712    Value = -0.0220 
## elapsed = 1.24   Round = 14  logCost = 9.5747    Value = -0.0220 
## elapsed = 0.04   Round = 15  logCost = -4.9700   Value = -0.0220 
## elapsed = 0.36   Round = 16  logCost = 6.0279    Value = -0.0203 
## elapsed = 0.06   Round = 17  logCost = 2.8288    Value = -0.0203 
## elapsed = 5.10   Round = 18  logCost = 13.5004   Value = -0.0186 
## elapsed = 0.05   Round = 19  logCost = -1.5418   Value = -0.0186 
## elapsed = 6.34   Round = 20  logCost = 16.7944   Value = -0.1034 
## elapsed = 0.41   Round = 21  logCost = 8.0767    Value = -0.0203 
## elapsed = 1.41   Round = 22  logCost = 9.4776    Value = -0.0220 
## elapsed = 6.32   Round = 23  logCost = 16.6300   Value = -0.1034 
## elapsed = 1.64   Round = 24  logCost = 10.4356   Value = -0.0203 
## elapsed = 0.39   Round = 25  logCost = 7.2445    Value = -0.0203 
## elapsed = 0.08   Round = 26  logCost = 4.3700    Value = -0.0203 
## elapsed = 4.82   Round = 27  logCost = 12.4167   Value = -0.0186 
## elapsed = 0.04   Round = 28  logCost = -0.2072   Value = -0.0186 
## elapsed = 6.13   Round = 29  logCost = 15.9732   Value = -0.0186 
## elapsed = 6.48   Round = 30  logCost = 17.0627   Value = -0.1034 
## elapsed = 1.90   Round = 31  logCost = 10.4807   Value = -0.0203 
## elapsed = 0.04   Round = 32  logCost = 1.3078    Value = -0.0203 
## elapsed = 0.04   Round = 33  logCost = -0.5958   Value = -0.0186 
## elapsed = 4.74   Round = 34  logCost = 12.6207   Value = -0.0186 
## elapsed = 0.43   Round = 35  logCost = 8.3371    Value = -0.0203 
## elapsed = 4.31   Round = 36  logCost = 12.2033   Value = -0.0186 
## elapsed = 4.71   Round = 37  logCost = 12.5207   Value = -0.0186 
## elapsed = 0.05   Round = 38  logCost = -1.1632   Value = -0.0169 
## elapsed = 0.36   Round = 39  logCost = 7.6583    Value = -0.0203 
## elapsed = 0.07   Round = 40  logCost = 3.8680    Value = -0.0203 
## elapsed = 1.47   Round = 41  logCost = 9.5338    Value = -0.0220 
## elapsed = 6.56   Round = 42  logCost = 17.6858   Value = -0.1034 
## elapsed = 6.11   Round = 43  logCost = 16.1313   Value = -0.0186 
## elapsed = 0.04   Round = 44  logCost = 0.8071    Value = -0.0203 
## elapsed = 2.46   Round = 45  logCost = 11.4682   Value = -0.0186 
## elapsed = 5.34   Round = 46  logCost = 14.4283   Value = -0.0186 
## elapsed = 0.04   Round = 47  logCost = 1.0083    Value = -0.0203 
## elapsed = 2.65   Round = 48  logCost = 10.7092   Value = -0.0203 
## elapsed = 1.14   Round = 49  logCost = 9.8284    Value = -0.0220 
## elapsed = 0.04   Round = 50  logCost = -1.8423   Value = -0.0186 
## elapsed = 1.76   Round = 51  logCost = 10.4467   Value = -0.0203 
## elapsed = 2.44   Round = 52  logCost = 11.3556   Value = -0.0186 
## elapsed = 4.87   Round = 53  logCost = 12.5493   Value = -0.0186 
## elapsed = 0.04   Round = 54  logCost = -2.0199   Value = -0.0186 
## elapsed = 2.94   Round = 55  logCost = 11.3426   Value = -0.0186 
## elapsed = 2.59   Round = 56  logCost = 11.5541   Value = -0.0203 
## elapsed = 4.85   Round = 57  logCost = 13.1535   Value = -0.0186 
## elapsed = 0.04   Round = 58  logCost = -2.8344   Value = -0.0169 
## elapsed = 0.04   Round = 59  logCost = -2.3143   Value = -0.0169 
## elapsed = 2.76   Round = 60  logCost = 11.1460   Value = -0.0203 
## elapsed = 3.36   Round = 61  logCost = 11.9674   Value = -0.0186 
## elapsed = 0.04   Round = 62  logCost = -1.8601   Value = -0.0186 
## elapsed = 1.86   Round = 63  logCost = 10.2196   Value = -0.0203 
## elapsed = 2.41   Round = 64  logCost = 10.8603   Value = -0.0203 
## elapsed = 3.15   Round = 65  logCost = 11.7292   Value = -0.0186 
## elapsed = 2.54   Round = 66  logCost = 11.3742   Value = -0.0186 
## elapsed = 0.04   Round = 67  logCost = -2.1433   Value = -0.0169 
## elapsed = 0.04   Round = 68  logCost = -3.6650   Value = -0.0220 
## elapsed = 0.04   Round = 69  logCost = -1.0259   Value = -0.0186 
## elapsed = 2.40   Round = 70  logCost = 11.1258   Value = -0.0203 
## 
##  Best Parameters Found: 
## Round = 5    logCost = -3.3272   Value = -0.0153
  OPT_Res$Best_Par
##   logCost 
## -3.327248
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 0.03589176
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="linear", 
                cost=0.03589176, scale = FALSE)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.01525424

3.2 SVM polynomial kernel

svm_fit_bayes<-function(logCost, logGamma, Degree){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="polynomial", 
                cost=exp(logCost), gamma=exp(logGamma), degree=Degree)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           logGamma = c(-9, -0.75),
                                                           Degree = c(1L, 5L)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.07   Round = 1   logCost = 13.6405   logGamma = -3.7612  Degree = 4.0000 Value = -0.1627 
## elapsed = 0.12   Round = 2   logCost = 14.5428   logGamma = -5.8652  Degree = 4.0000 Value = -0.5305 
## elapsed = 0.11   Round = 3   logCost = -4.4991   logGamma = -7.7302  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.06   Round = 4   logCost = 14.4021   logGamma = -2.3362  Degree = 3.0000 Value = -0.0153 
## elapsed = 0.11   Round = 5   logCost = -3.3272   logGamma = -8.3482  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.12   Round = 6   logCost = 11.1199   logGamma = -7.8096  Degree = 5.0000 Value = -0.6678 
## elapsed = 0.06   Round = 7   logCost = 18.2346   logGamma = -8.2880  Degree = 2.0000 Value = -0.0915 
## elapsed = 0.06   Round = 8   logCost = 12.9411   logGamma = -5.1917  Degree = 2.0000 Value = -0.0831 
## elapsed = 15.71  Round = 9   logCost = 18.1934   logGamma = -2.3282  Degree = 2.0000 Value = -0.0729 
## elapsed = 0.10   Round = 10  logCost = 2.1058    logGamma = -3.3696  Degree = 3.0000 Value = -0.5712 
## elapsed = 0.11   Round = 11  logCost = 8.8931    logGamma = -5.9625  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.08   Round = 12  logCost = 8.6925    logGamma = -4.9629  Degree = 2.0000 Value = -0.1407 
## elapsed = 0.10   Round = 13  logCost = 9.5712    logGamma = -6.8974  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.05   Round = 14  logCost = 9.5747    logGamma = -1.8662  Degree = 3.0000 Value = -0.0271 
## elapsed = 0.11   Round = 15  logCost = -4.9700   logGamma = -5.2061  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.06   Round = 16  logCost = 6.0279    logGamma = -5.7480  Degree = 1.0000 Value = -0.0542 
## elapsed = 0.09   Round = 17  logCost = 2.8288    logGamma = -2.7361  Degree = 2.0000 Value = -0.2186 
## elapsed = 0.07   Round = 18  logCost = 13.5004   logGamma = -4.8409  Degree = 2.0000 Value = -0.0763 
## elapsed = 0.12   Round = 19  logCost = -1.5418   logGamma = -3.4781  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.05   Round = 20  logCost = 16.7944   logGamma = -4.0321  Degree = 3.0000 Value = -0.0271 
## elapsed = 0.08   Round = 21  logCost = 8.0767    logGamma = -1.3042  Degree = 4.0000 Value = -0.0814 
## elapsed = 0.11   Round = 22  logCost = 9.4776    logGamma = -6.1408  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.05   Round = 23  logCost = 16.6300   logGamma = -4.1688  Degree = 3.0000 Value = -0.0271 
## elapsed = 0.10   Round = 24  logCost = 10.4356   logGamma = -6.4218  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.12   Round = 25  logCost = 7.2445    logGamma = -8.4438  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.11   Round = 26  logCost = 4.3700    logGamma = -5.8644  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.10   Round = 27  logCost = 12.4167   logGamma = -4.4647  Degree = 4.0000 Value = -0.3356 
## elapsed = 0.11   Round = 28  logCost = -0.2072   logGamma = -5.7881  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.07   Round = 29  logCost = 15.9732   logGamma = -7.7657  Degree = 2.0000 Value = -0.0932 
## elapsed = 0.06   Round = 30  logCost = 17.0627   logGamma = -7.9927  Degree = 2.0000 Value = -0.0966 
## elapsed = 0.06   Round = 31  logCost = 10.4807   logGamma = -4.1554  Degree = 3.0000 Value = -0.1153 
## elapsed = 0.12   Round = 32  logCost = 1.3078    logGamma = -4.5781  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.11   Round = 33  logCost = -0.5958   logGamma = -6.2720  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.06   Round = 34  logCost = 12.6207   logGamma = -2.0301  Degree = 5.0000 Value = -0.0508 
## elapsed = 0.05   Round = 35  logCost = 8.3371    logGamma = -1.3244  Degree = 3.0000 Value = -0.0271 
## elapsed = 0.09   Round = 36  logCost = 12.2033   logGamma = -3.6795  Degree = 5.0000 Value = -0.3102 
## elapsed = 0.11   Round = 37  logCost = 12.5207   logGamma = -6.6158  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.09   Round = 38  logCost = -1.1632   logGamma = -1.1124  Degree = 2.0000 Value = -0.2542 
## elapsed = 0.11   Round = 39  logCost = 7.6583    logGamma = -6.2656  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.07   Round = 40  logCost = 3.8680    logGamma = -2.2080  Degree = 2.0000 Value = -0.1102 
## elapsed = 0.07   Round = 41  logCost = 9.5338    logGamma = -1.9580  Degree = 4.0000 Value = -0.0932 
## elapsed = 0.08   Round = 42  logCost = 17.6858   logGamma = -1.9475  Degree = 5.0000 Value = -0.0356 
## elapsed = 0.11   Round = 43  logCost = 16.1313   logGamma = -7.7448  Degree = 3.0000 Value = -0.5034 
## elapsed = 0.11   Round = 44  logCost = 0.8071    logGamma = -7.7905  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.12   Round = 45  logCost = 11.4682   logGamma = -4.7437  Degree = 5.0000 Value = -0.6593 
## elapsed = 0.18   Round = 46  logCost = 14.4283   logGamma = -0.8029  Degree = 5.0000 Value = -0.0322 
## elapsed = 0.10   Round = 47  logCost = 1.0083    logGamma = -5.9347  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.07   Round = 48  logCost = 10.7092   logGamma = -5.9287  Degree = 2.0000 Value = -0.1373 
## elapsed = 0.06   Round = 49  logCost = 9.8284    logGamma = -3.3159  Degree = 2.0000 Value = -0.0746 
## elapsed = 0.11   Round = 50  logCost = -1.8423   logGamma = -4.7731  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.05   Round = 51  logCost = 11.7216   logGamma = -7.0827  Degree = 1.0000 Value = -0.0237 
## elapsed = 0.05   Round = 52  logCost = 9.8926    logGamma = -4.7655  Degree = 1.0000 Value = -0.0153 
## elapsed = 0.11   Round = 53  logCost = 11.9598   logGamma = -9.0000  Degree = 2.0000 Value = -0.5576 
## elapsed = 0.19   Round = 54  logCost = 20.0000   logGamma = -7.7977  Degree = 1.0000 Value = -0.0203 
## elapsed = 0.27   Round = 55  logCost = 20.0000   logGamma = -6.0442  Degree = 2.0000 Value = -0.0780 
## elapsed = 22.35  Round = 56  logCost = 16.7543   logGamma = -0.7500  Degree = 4.0000 Value = -0.1051 
## elapsed = 11.51  Round = 57  logCost = 14.3533   logGamma = -0.7500  Degree = 2.0000 Value = -0.0712 
## elapsed = 0.06   Round = 58  logCost = 16.3753   logGamma = -6.5310  Degree = 1.0000 Value = -0.0203 
## elapsed = 6.50   Round = 59  logCost = 19.9506   logGamma = -0.7500  Degree = 1.0000 Value = -0.0203 
## elapsed = 62.46  Round = 60  logCost = 20.0000   logGamma = -0.7500  Degree = 2.0000 Value = -0.1220 
## elapsed = 4.98   Round = 61  logCost = 20.0000   logGamma = -2.9799  Degree = 1.0000 Value = -0.0424 
## elapsed = 0.44   Round = 62  logCost = 16.0386   logGamma = -1.6832  Degree = 1.0000 Value = -0.0186 
## elapsed = 7.56   Round = 63  logCost = 11.8412   logGamma = -0.7500  Degree = 4.0000 Value = -0.0746 
## elapsed = 0.06   Round = 64  logCost = 3.2181    logGamma = -1.6990  Degree = 1.0000 Value = -0.0441 
## elapsed = 0.06   Round = 65  logCost = 8.6984    logGamma = -0.7500  Degree = 1.0000 Value = -0.0186 
## elapsed = 0.97   Round = 66  logCost = 20.0000   logGamma = -0.7500  Degree = 5.0000 Value = -0.0271 
## elapsed = 0.06   Round = 67  logCost = 5.9144    logGamma = -3.3240  Degree = 1.0000 Value = -0.0288 
## elapsed = 0.10   Round = 68  logCost = 20.0000   logGamma = -9.0000  Degree = 1.0000 Value = -0.0203 
## elapsed = 0.06   Round = 69  logCost = 2.4833    logGamma = -0.7500  Degree = 5.0000 Value = -0.1220 
## elapsed = 0.13   Round = 70  logCost = -5.0000   logGamma = -0.7500  Degree = 5.0000 Value = -0.4661 
## 
##  Best Parameters Found: 
## Round = 4    logCost = 14.4021   logGamma = -2.3362  Degree = 3.0000 Value = -0.0153
  OPT_Res$Best_Par
##   logCost  logGamma    Degree 
## 14.402135 -2.336219  3.000000
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 1797909
  as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.09669255
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="polynomial", 
                cost=1797909, 
                gamma=0.09669255, 
                degree=3)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.01525424

3.3 SVM Radial Kernel

svm_fit_bayes<-function(logCost, logGamma){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="radial", 
                cost=exp(logCost), gamma=exp(logGamma))
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           logGamma = c(-9, -0.75)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.09   Round = 1   logCost = 13.6405   logGamma = -3.7612  Value = -0.0186 
## elapsed = 0.09   Round = 2   logCost = 14.5428   logGamma = -5.8652  Value = -0.0203 
## elapsed = 0.14   Round = 3   logCost = -4.4991   logGamma = -7.7302  Value = -0.6678 
## elapsed = 0.10   Round = 4   logCost = 14.4021   logGamma = -2.3362  Value = -0.0203 
## elapsed = 0.17   Round = 5   logCost = -3.3272   logGamma = -8.3482  Value = -0.6678 
## elapsed = 0.08   Round = 6   logCost = 11.1199   logGamma = -7.8096  Value = -0.0254 
## elapsed = 0.08   Round = 7   logCost = 18.2346   logGamma = -8.2880  Value = -0.0220 
## elapsed = 0.11   Round = 8   logCost = 12.9411   logGamma = -5.1917  Value = -0.0186 
## elapsed = 0.57   Round = 9   logCost = 18.1934   logGamma = -2.3282  Value = -0.0237 
## elapsed = 0.11   Round = 10  logCost = 2.1058    logGamma = -3.3696  Value = -0.0644 
## elapsed = 0.09   Round = 11  logCost = 8.8931    logGamma = -5.9625  Value = -0.0254 
## elapsed = 0.05   Round = 12  logCost = 8.6925    logGamma = -4.9629  Value = -0.0237 
## elapsed = 0.06   Round = 13  logCost = 9.5712    logGamma = -6.8974  Value = -0.0288 
## elapsed = 0.06   Round = 14  logCost = 9.5747    logGamma = -1.8662  Value = -0.0186 
## elapsed = 0.13   Round = 15  logCost = -4.9700   logGamma = -5.2061  Value = -0.6678 
## elapsed = 0.07   Round = 16  logCost = 6.0279    logGamma = -5.7480  Value = -0.0475 
## elapsed = 0.07   Round = 17  logCost = 2.8288    logGamma = -2.7361  Value = -0.0525 
## elapsed = 0.06   Round = 18  logCost = 13.5004   logGamma = -4.8409  Value = -0.0186 
## elapsed = 0.13   Round = 19  logCost = -1.5418   logGamma = -3.4781  Value = -0.4729 
## elapsed = 0.11   Round = 20  logCost = 16.7944   logGamma = -4.0321  Value = -0.0203 
## elapsed = 0.05   Round = 21  logCost = 8.0767    logGamma = -1.3042  Value = -0.0186 
## elapsed = 0.05   Round = 22  logCost = 9.4776    logGamma = -6.1408  Value = -0.0254 
## elapsed = 0.11   Round = 23  logCost = 16.6300   logGamma = -4.1688  Value = -0.0169 
## elapsed = 0.06   Round = 24  logCost = 10.4356   logGamma = -6.4218  Value = -0.0237 
## elapsed = 0.08   Round = 25  logCost = 7.2445    logGamma = -8.4438  Value = -0.0593 
## elapsed = 0.08   Round = 26  logCost = 4.3700    logGamma = -5.8644  Value = -0.0746 
## elapsed = 0.05   Round = 27  logCost = 12.4167   logGamma = -4.4647  Value = -0.0203 
## elapsed = 0.13   Round = 28  logCost = -0.2072   logGamma = -5.7881  Value = -0.5593 
## elapsed = 0.06   Round = 29  logCost = 15.9732   logGamma = -7.7657  Value = -0.0220 
## elapsed = 0.06   Round = 30  logCost = 17.0627   logGamma = -7.9927  Value = -0.0153 
## elapsed = 0.06   Round = 31  logCost = 10.4807   logGamma = -4.1554  Value = -0.0186 
## elapsed = 0.10   Round = 32  logCost = 1.3078    logGamma = -4.5781  Value = -0.1898 
## elapsed = 0.13   Round = 33  logCost = -0.5958   logGamma = -6.2720  Value = -0.6678 
## elapsed = 0.07   Round = 34  logCost = 12.6207   logGamma = -2.0301  Value = -0.0186 
## elapsed = 0.05   Round = 35  logCost = 8.3371    logGamma = -1.3244  Value = -0.0186 
## elapsed = 0.06   Round = 36  logCost = 12.2033   logGamma = -3.6795  Value = -0.0186 
## elapsed = 0.06   Round = 37  logCost = 12.5207   logGamma = -6.6158  Value = -0.0186 
## elapsed = 0.09   Round = 38  logCost = -1.1632   logGamma = -1.1124  Value = -0.0949 
## elapsed = 0.06   Round = 39  logCost = 7.6583    logGamma = -6.2656  Value = -0.0390 
## elapsed = 0.06   Round = 40  logCost = 3.8680    logGamma = -2.2080  Value = -0.0390 
## elapsed = 0.06   Round = 41  logCost = 9.5338    logGamma = -1.9580  Value = -0.0186 
## elapsed = 0.33   Round = 42  logCost = 17.6858   logGamma = -1.9475  Value = -0.0271 
## elapsed = 0.06   Round = 43  logCost = 16.1313   logGamma = -7.7448  Value = -0.0203 
## elapsed = 0.13   Round = 44  logCost = 0.8071    logGamma = -7.7905  Value = -0.6678 
## elapsed = 0.05   Round = 45  logCost = 11.4682   logGamma = -4.7437  Value = -0.0186 
## elapsed = 0.35   Round = 46  logCost = 14.4283   logGamma = -0.8029  Value = -0.0271 
## elapsed = 0.13   Round = 47  logCost = 1.0083    logGamma = -5.9347  Value = -0.4542 
## elapsed = 0.05   Round = 48  logCost = 10.7092   logGamma = -5.9287  Value = -0.0186 
## elapsed = 0.05   Round = 49  logCost = 9.8284    logGamma = -3.3159  Value = -0.0203 
## elapsed = 0.13   Round = 50  logCost = -1.8423   logGamma = -4.7731  Value = -0.6678 
## elapsed = 0.20   Round = 51  logCost = 20.0000   logGamma = -5.5360  Value = -0.0220 
## elapsed = 0.14   Round = 52  logCost = -5.0000   logGamma = -0.7500  Value = -0.6678 
## elapsed = 0.07   Round = 53  logCost = 1.0188    logGamma = -0.7500  Value = -0.0492 
## elapsed = 0.96   Round = 54  logCost = 20.0000   logGamma = -0.7500  Value = -0.0271 
## elapsed = 0.87   Round = 55  logCost = 20.0000   logGamma = -3.1638  Value = -0.0288 
## elapsed = 0.06   Round = 56  logCost = 4.4943    logGamma = -3.9107  Value = -0.0458 
## elapsed = 0.10   Round = 57  logCost = 9.1076    logGamma = -9.0000  Value = -0.0475 
## elapsed = 0.06   Round = 58  logCost = 4.7001    logGamma = -0.7500  Value = -0.0254 
## elapsed = 0.09   Round = 59  logCost = 18.2389   logGamma = -5.8728  Value = -0.0237 
## elapsed = 0.07   Round = 60  logCost = 20.0000   logGamma = -9.0000  Value = -0.0220 
## elapsed = 0.10   Round = 61  logCost = 20.0000   logGamma = -7.4594  Value = -0.0186 
## elapsed = 0.05   Round = 62  logCost = 13.5600   logGamma = -9.0000  Value = -0.0186 
## elapsed = 1.62   Round = 63  logCost = 18.2429   logGamma = -0.7500  Value = -0.0271 
## elapsed = 0.06   Round = 64  logCost = 16.5019   logGamma = -9.0000  Value = -0.0153 
## elapsed = 0.54   Round = 65  logCost = 16.0990   logGamma = -0.7500  Value = -0.0271 
## elapsed = 0.06   Round = 66  logCost = 6.3791    logGamma = -0.7500  Value = -0.0186 
## elapsed = 0.76   Round = 67  logCost = 20.0000   logGamma = -1.7383  Value = -0.0220 
## elapsed = 0.09   Round = 68  logCost = 0.5536    logGamma = -1.6880  Value = -0.0644 
## elapsed = 0.11   Round = 69  logCost = -0.2543   logGamma = -0.7500  Value = -0.0695 
## elapsed = 0.15   Round = 70  logCost = 4.4249    logGamma = -9.0000  Value = -0.4169 
## 
##  Best Parameters Found: 
## Round = 30   logCost = 17.0627   logGamma = -7.9927  Value = -0.0153
  OPT_Res$Best_Par
##   logCost  logGamma 
## 17.062727 -7.992727
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 25718660
  as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.0003379114
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="radial", 
                cost=25718660, 
                gamma=0.0003379114)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.01525424
logCost<-c(13.6405, 14.5428, -4.4991, 14.4021, -3.3272, 11.1199, 18.2346, 12.9411, 18.1934, 2.1058, 8.8931, 8.6925, 9.5712, 9.5747, -4.9700, 6.0279, 2.8288, 13.5004, -1.5418, 16.7944, 8.0767, 9.4776, 16.6300, 10.4356, 7.2445, 4.3700, 12.4167, -0.2072, 15.9732, 17.0627, 10.4807, 1.3078, -0.5958, 12.6207, 8.3371, 12.2033, 12.5207, -1.1632, 7.6583, 3.8680, 9.5338, 17.6858, 16.1313, 0.8071, 11.4682, 14.4283, 1.0083, 10.7092, 9.8284, -1.8423, 20.0000, -5.0000, 1.0188, 20.0000, 20.0000, 4.4943, 9.1076, 4.7001, 18.2389, 20.0000, 20.0000, 13.5600, 18.2429, 16.5019, 16.0990, 6.3791, 20.0000, 0.5536, -0.2543, 4.4249)

logGamma<-c(-3.7612, -5.8652, -7.7302, -2.3362, -8.3482, -7.8096, -8.2880, -5.1917, -2.3282, -3.3696, -5.9625, -4.9629, -6.8974, -1.8662, -5.2061, -5.7480, -2.7361, -4.8409, -3.4781, -4.0321, -1.3042, -6.1408, -4.1688, -6.4218, -8.4438, -5.8644, -4.4647, -5.7881, -7.7657, -7.9927, -4.1554, -4.5781, -6.2720, -2.0301, -1.3244, -3.6795, -6.6158, -1.1124, -6.2656, -2.2080, -1.9580, -1.9475, -7.7448, -7.7905, -4.7437, -0.8029, -5.9347, -5.9287, -3.3159, -4.7731, -5.5360, -0.7500, -0.7500, -0.7500, -3.1638, -3.9107, -9.0000, -0.7500, -5.8728, -9.0000, -7.4594, -9.0000, -0.7500, -9.0000, -0.7500, -0.7500, -1.7383, -1.6880, -0.7500, -9.0000)

Values<-c()
p <- ggplot() + geom_curve(aes(x = logGamma[51:69], y = logCost[51:69], 
                                 xend = logGamma[52:70], yend = logCost[52:70]),
                             arrow = arrow(length = unit(0.03, "npc")), curvature = 0.15) 

p + geom_point(aes(x=logGamma[1:50], y=logCost[1:50])) + 
    geom_point(aes(x=logGamma[51:70], y=logCost[51:70]), colour = "red") + 
    labs(x = "Log Gamma", y = "Log Cost", title = " B.O. Iterations")

4 Conclusion

Comparison between Bayesian Optimization and my sweep of hyperparameters.

SVM linear kernel:

SVM polynomial kernel:

SVM radial kernel: