1 Reading in the Data

ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")

colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.

Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information

Data$Group<-as.factor(Data$Group)

2 For Longitutde + Latitude + OverallAvg

In order to use each model, I prepare a suitable data frame - splitting it into training and test sets and then splitting the training set into 5 folds.

Data2<-data.frame(Data[, c("Group", "Latitude", "Longitude", "OverallAvg")])

# Stratify the entire training set into training and test sets

set.seed(234)

library(caret)
train.index<-createDataPartition(Data2$Group, p=0.8, list = FALSE)
train.set<-Data2[train.index, ]
test.set<-Data2[-train.index, ]

# Stratify the training set into 5 folds

folds <- createFolds(y=factor(train.set$Group), k = 5, list = FALSE)
train.set$fold <- folds

3 Bayesian Optimization

Three main ways of choosing hyperparameters apart from selecting all possible combinations or just randomly performing a self selected sweep of what we think would perform well is to use a Grid Search, Random Search or Bayesian Optimization.

The upper confidence bound was used as the acquisition function.

The log of cost and gamma were used for reasons of scale.

3.1 SVM Linear Kernel

#linear

svm_fit_bayes<-function(logCost){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="linear", 
                cost=exp(logCost), scale= FALSE)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 21.40  Round = 1   logCost = 13.6405   Value = -0.1119 
## elapsed = 22.29  Round = 2   logCost = 14.5428   Value = -0.1102 
## elapsed = 0.16   Round = 3   logCost = -4.4991   Value = -0.1102 
## elapsed = 21.75  Round = 4   logCost = 14.4021   Value = -0.1102 
## elapsed = 0.16   Round = 5   logCost = -3.3272   Value = -0.0746 
## elapsed = 1332.70    Round = 6   logCost = 11.1199   Value = -0.1475 
## elapsed = 29.29  Round = 7   logCost = 18.2346   Value = -0.1305 
## elapsed = 22.46  Round = 8   logCost = 12.9411   Value = -0.1271 
## elapsed = 26.16  Round = 9   logCost = 18.1934   Value = -0.1220 
## elapsed = 1.38   Round = 10  logCost = 2.1058    Value = -0.0576 
## elapsed = 19.01  Round = 11  logCost = 8.8931    Value = -0.0661 
## elapsed = 19.25  Round = 12  logCost = 8.6925    Value = -0.0797 
## elapsed = 19.98  Round = 13  logCost = 9.5712    Value = -0.0627 
## elapsed = 19.77  Round = 14  logCost = 9.5747    Value = -0.0712 
## elapsed = 0.13   Round = 15  logCost = -4.9700   Value = -0.1390 
## elapsed = 12.41  Round = 16  logCost = 6.0279    Value = -0.1220 
## elapsed = 1.87   Round = 17  logCost = 2.8288    Value = -0.0593 
## elapsed = 22.43  Round = 18  logCost = 13.5004   Value = -0.1339 
## elapsed = 0.19   Round = 19  logCost = -1.5418   Value = -0.0644 
## elapsed = 25.39  Round = 20  logCost = 16.7944   Value = -0.1169 
## elapsed = 19.75  Round = 21  logCost = 8.0767    Value = -0.1542 
## elapsed = 20.70  Round = 22  logCost = 9.4776    Value = -0.0610 
## elapsed = 25.95  Round = 23  logCost = 16.6300   Value = -0.1271 
## elapsed = 20.38  Round = 24  logCost = 10.4356   Value = -0.1254 
## elapsed = 16.29  Round = 25  logCost = 7.2445    Value = -0.0729 
## elapsed = 6.38   Round = 26  logCost = 4.3700    Value = -0.0576 
## elapsed = 22.07  Round = 27  logCost = 12.4167   Value = -0.1153 
## elapsed = 0.41   Round = 28  logCost = -0.2072   Value = -0.0610 
## elapsed = 24.95  Round = 29  logCost = 15.9732   Value = -0.1153 
## elapsed = 25.74  Round = 30  logCost = 17.0627   Value = -0.1169 
## elapsed = 20.28  Round = 31  logCost = 10.4807   Value = -0.1153 
## elapsed = 1.60   Round = 32  logCost = 1.3078    Value = -0.0593 
## elapsed = 0.28   Round = 33  logCost = -0.5958   Value = -0.0610 
## elapsed = 22.02  Round = 34  logCost = 12.6207   Value = -0.1254 
## elapsed = 18.17  Round = 35  logCost = 8.3371    Value = -0.1390 
## elapsed = 21.29  Round = 36  logCost = 12.2033   Value = -0.1356 
## elapsed = 22.11  Round = 37  logCost = 12.5207   Value = -0.1271 
## elapsed = 0.32   Round = 38  logCost = -1.1632   Value = -0.0627 
## elapsed = 19.30  Round = 39  logCost = 7.6583    Value = -0.1305 
## elapsed = 4.71   Round = 40  logCost = 3.8680    Value = -0.0627 
## elapsed = 19.73  Round = 41  logCost = 9.5338    Value = -0.0661 
## elapsed = 26.63  Round = 42  logCost = 17.6858   Value = -0.1237 
## elapsed = 25.13  Round = 43  logCost = 16.1313   Value = -0.1254 
## elapsed = 0.71   Round = 44  logCost = 0.8071    Value = -0.0576 
## elapsed = 20.60  Round = 45  logCost = 11.4682   Value = -0.1593 
## elapsed = 23.55  Round = 46  logCost = 14.4283   Value = -0.1186 
## elapsed = 0.97   Round = 47  logCost = 1.0083    Value = -0.0576 
## elapsed = 19.83  Round = 48  logCost = 10.7092   Value = -0.1695 
## elapsed = 20.19  Round = 49  logCost = 9.8284    Value = -0.0678 
## elapsed = 0.20   Round = 50  logCost = -1.8423   Value = -0.0627 
## elapsed = 0.18   Round = 51  logCost = -2.1029   Value = -0.0661 
## elapsed = 0.15   Round = 52  logCost = -3.9821   Value = -0.0864 
## elapsed = 0.14   Round = 53  logCost = -4.2291   Value = -0.0932 
## elapsed = 0.17   Round = 54  logCost = -1.9667   Value = -0.0661 
## elapsed = 0.26   Round = 55  logCost = -1.6643   Value = -0.0644 
## elapsed = 0.13   Round = 56  logCost = -5.0000   Value = -0.1373 
## elapsed = 1.45   Round = 57  logCost = 1.3483    Value = -0.0593 
## elapsed = 0.91   Round = 58  logCost = 0.9726    Value = -0.0576 
## elapsed = 0.53   Round = 59  logCost = 0.1759    Value = -0.0593 
## elapsed = 1.00   Round = 60  logCost = 0.5298    Value = -0.0576 
## elapsed = 1.50   Round = 61  logCost = 2.6920    Value = -0.0559 
## elapsed = 0.53   Round = 62  logCost = 0.1837    Value = -0.0593 
## elapsed = 1.33   Round = 63  logCost = 1.6624    Value = -0.0559 
## elapsed = 1.18   Round = 64  logCost = 1.2573    Value = -0.0610 
## elapsed = 0.68   Round = 65  logCost = 0.8416    Value = -0.0576 
## elapsed = 0.79   Round = 66  logCost = 1.0708    Value = -0.0576 
## elapsed = 0.70   Round = 67  logCost = 0.7018    Value = -0.0593 
## elapsed = 1.13   Round = 68  logCost = 2.0346    Value = -0.0576 
## elapsed = 0.33   Round = 69  logCost = 0.0781    Value = -0.0593 
## elapsed = 0.92   Round = 70  logCost = 1.0846    Value = -0.0610 
## 
##  Best Parameters Found: 
## Round = 61   logCost = 2.6920    Value = -0.0559
  OPT_Res$Best_Par
##  logCost 
## 2.692035
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 14.76168
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="linear", 
                cost=exp(OPT_Res$Best_Par["logCost"]), scale = FALSE)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.0559322

3.2 SVM polynomial kernel

svm_fit_bayes<-function(logCost, logGamma, Degree){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="polynomial", 
                cost=exp(logCost), gamma=exp(logGamma), degree=Degree)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           logGamma = c(-9, -0.75),
                                                           Degree = c(1L, 5L)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.08   Round = 1   logCost = 13.6405   logGamma = -3.7612  Degree = 4.0000 Value = -0.1254 
## elapsed = 0.11   Round = 2   logCost = 14.5428   logGamma = -5.8652  Degree = 4.0000 Value = -0.4746 
## elapsed = 0.12   Round = 3   logCost = -4.4991   logGamma = -7.7302  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.33   Round = 4   logCost = 14.4021   logGamma = -2.3362  Degree = 3.0000 Value = -0.0542 
## elapsed = 0.11   Round = 5   logCost = -3.3272   logGamma = -8.3482  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.11   Round = 6   logCost = 11.1199   logGamma = -7.8096  Degree = 5.0000 Value = -0.6678 
## elapsed = 0.07   Round = 7   logCost = 18.2346   logGamma = -8.2880  Degree = 2.0000 Value = -0.0831 
## elapsed = 0.07   Round = 8   logCost = 12.9411   logGamma = -5.1917  Degree = 2.0000 Value = -0.0746 
## elapsed = 7.16   Round = 9   logCost = 18.1934   logGamma = -2.3282  Degree = 2.0000 Value = -0.0729 
## elapsed = 0.11   Round = 10  logCost = 2.1058    logGamma = -3.3696  Degree = 3.0000 Value = -0.5237 
## elapsed = 0.11   Round = 11  logCost = 8.8931    logGamma = -5.9625  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.08   Round = 12  logCost = 8.6925    logGamma = -4.9629  Degree = 2.0000 Value = -0.1441 
## elapsed = 0.11   Round = 13  logCost = 9.5712    logGamma = -6.8974  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.07   Round = 14  logCost = 9.5747    logGamma = -1.8662  Degree = 3.0000 Value = -0.0559 
## elapsed = 0.11   Round = 15  logCost = -4.9700   logGamma = -5.2061  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.07   Round = 16  logCost = 6.0279    logGamma = -5.7480  Degree = 1.0000 Value = -0.0780 
## elapsed = 0.10   Round = 17  logCost = 2.8288    logGamma = -2.7361  Degree = 2.0000 Value = -0.2186 
## elapsed = 0.07   Round = 18  logCost = 13.5004   logGamma = -4.8409  Degree = 2.0000 Value = -0.0712 
## elapsed = 0.11   Round = 19  logCost = -1.5418   logGamma = -3.4781  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.08   Round = 20  logCost = 16.7944   logGamma = -4.0321  Degree = 3.0000 Value = -0.0559 
## elapsed = 0.09   Round = 21  logCost = 8.0767    logGamma = -1.3042  Degree = 4.0000 Value = -0.0780 
## elapsed = 0.11   Round = 22  logCost = 9.4776    logGamma = -6.1408  Degree = 3.0000 Value = -0.6051 
## elapsed = 0.08   Round = 23  logCost = 16.6300   logGamma = -4.1688  Degree = 3.0000 Value = -0.0559 
## elapsed = 0.11   Round = 24  logCost = 10.4356   logGamma = -6.4218  Degree = 3.0000 Value = -0.5966 
## elapsed = 0.11   Round = 25  logCost = 7.2445    logGamma = -8.4438  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.11   Round = 26  logCost = 4.3700    logGamma = -5.8644  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.10   Round = 27  logCost = 12.4167   logGamma = -4.4647  Degree = 4.0000 Value = -0.3424 
## elapsed = 0.11   Round = 28  logCost = -0.2072   logGamma = -5.7881  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.07   Round = 29  logCost = 15.9732   logGamma = -7.7657  Degree = 2.0000 Value = -0.0949 
## elapsed = 0.07   Round = 30  logCost = 17.0627   logGamma = -7.9927  Degree = 2.0000 Value = -0.0915 
## elapsed = 0.08   Round = 31  logCost = 10.4807   logGamma = -4.1554  Degree = 3.0000 Value = -0.1169 
## elapsed = 0.11   Round = 32  logCost = 1.3078    logGamma = -4.5781  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.12   Round = 33  logCost = -0.5958   logGamma = -6.2720  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.10   Round = 34  logCost = 12.6207   logGamma = -2.0301  Degree = 5.0000 Value = -0.0729 
## elapsed = 0.08   Round = 35  logCost = 8.3371    logGamma = -1.3244  Degree = 3.0000 Value = -0.0559 
## elapsed = 0.09   Round = 36  logCost = 12.2033   logGamma = -3.6795  Degree = 5.0000 Value = -0.3102 
## elapsed = 0.12   Round = 37  logCost = 12.5207   logGamma = -6.6158  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.10   Round = 38  logCost = -1.1632   logGamma = -1.1124  Degree = 2.0000 Value = -0.3661 
## elapsed = 0.12   Round = 39  logCost = 7.6583    logGamma = -6.2656  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.08   Round = 40  logCost = 3.8680    logGamma = -2.2080  Degree = 2.0000 Value = -0.1102 
## elapsed = 0.07   Round = 41  logCost = 9.5338    logGamma = -1.9580  Degree = 4.0000 Value = -0.0763 
## elapsed = 3.62   Round = 42  logCost = 17.6858   logGamma = -1.9475  Degree = 5.0000 Value = -0.0864 
## elapsed = 0.10   Round = 43  logCost = 16.1313   logGamma = -7.7448  Degree = 3.0000 Value = -0.4695 
## elapsed = 0.11   Round = 44  logCost = 0.8071    logGamma = -7.7905  Degree = 4.0000 Value = -0.6678 
## elapsed = 0.12   Round = 45  logCost = 11.4682   logGamma = -4.7437  Degree = 5.0000 Value = -0.5864 
## elapsed = 9.95   Round = 46  logCost = 14.4283   logGamma = -0.8029  Degree = 5.0000 Value = -0.0864 
## elapsed = 0.11   Round = 47  logCost = 1.0083    logGamma = -5.9347  Degree = 2.0000 Value = -0.6678 
## elapsed = 0.08   Round = 48  logCost = 10.7092   logGamma = -5.9287  Degree = 2.0000 Value = -0.1356 
## elapsed = 0.07   Round = 49  logCost = 9.8284    logGamma = -3.3159  Degree = 2.0000 Value = -0.0797 
## elapsed = 0.11   Round = 50  logCost = -1.8423   logGamma = -4.7731  Degree = 3.0000 Value = -0.6678 
## elapsed = 0.06   Round = 51  logCost = 11.9697   logGamma = -8.2641  Degree = 1.0000 Value = -0.0492 
## elapsed = 0.06   Round = 52  logCost = 10.6776   logGamma = -5.7603  Degree = 1.0000 Value = -0.0492 
## elapsed = 0.59   Round = 53  logCost = 18.7900   logGamma = -9.0000  Degree = 1.0000 Value = -0.0475 
## elapsed = 16.08  Round = 54  logCost = 20.0000   logGamma = -0.7500  Degree = 1.0000 Value = -0.1068 
## elapsed = 6.63   Round = 55  logCost = 20.0000   logGamma = -6.5306  Degree = 1.0000 Value = -0.0441 
## elapsed = 14.12  Round = 56  logCost = 20.0000   logGamma = -3.4631  Degree = 1.0000 Value = -0.0644 
## elapsed = 0.06   Round = 57  logCost = 5.6769    logGamma = -3.1207  Degree = 1.0000 Value = -0.0525 
## elapsed = 0.49   Round = 58  logCost = 20.0000   logGamma = -5.4035  Degree = 2.0000 Value = -0.0695 
## elapsed = 0.07   Round = 59  logCost = 20.0000   logGamma = -4.4330  Degree = 5.0000 Value = -0.1322 
## elapsed = 9.20   Round = 60  logCost = 15.1902   logGamma = -0.7500  Degree = 1.0000 Value = -0.0475 
## elapsed = 0.09   Round = 61  logCost = 6.2156    logGamma = -9.0000  Degree = 1.0000 Value = -0.3864 
## elapsed = 0.12   Round = 62  logCost = 15.7092   logGamma = -7.3439  Degree = 1.0000 Value = -0.0475 
## elapsed = 0.10   Round = 63  logCost = 14.4484   logGamma = -9.0000  Degree = 2.0000 Value = -0.3847 
## elapsed = 0.08   Round = 64  logCost = 20.0000   logGamma = -7.6508  Degree = 2.0000 Value = -0.0763 
## elapsed = 12.23  Round = 65  logCost = 20.0000   logGamma = -0.7500  Degree = 3.0000 Value = -0.2271 
## elapsed = 5.27   Round = 66  logCost = 12.5897   logGamma = -0.7500  Degree = 4.0000 Value = -0.0763 
## elapsed = 0.07   Round = 67  logCost = 2.8518    logGamma = -1.3725  Degree = 1.0000 Value = -0.0627 
## elapsed = 0.09   Round = 68  logCost = 8.3254    logGamma = -0.7500  Degree = 1.0000 Value = -0.0508 
## elapsed = 3.70   Round = 69  logCost = 13.7655   logGamma = -0.7500  Degree = 2.0000 Value = -0.0695 
## elapsed = 0.07   Round = 70  logCost = 9.3236    logGamma = -7.0984  Degree = 1.0000 Value = -0.0559 
## 
##  Best Parameters Found: 
## Round = 55   logCost = 20.0000   logGamma = -6.5306  Degree = 1.0000 Value = -0.0441
  OPT_Res$Best_Par
##   logCost  logGamma    Degree 
## 20.000000 -6.530609  1.000000
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 485165195
  as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.001458117
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="polynomial", 
                cost=exp(OPT_Res$Best_Par["logCost"]), 
                gamma=exp(OPT_Res$Best_Par["logGamma"]), 
                degree=OPT_Res$Best_Par["Degree"])
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.0440678

3.3 SVM Radial Kernel

svm_fit_bayes<-function(logCost, logGamma){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="radial", 
                cost=exp(logCost), gamma=exp(logGamma))
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}

set.seed(234)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           logGamma = c(-9, -0.75)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.20   Round = 1   logCost = 13.6405   logGamma = -3.7612  Value = -0.0492 
## elapsed = 0.15   Round = 2   logCost = 14.5428   logGamma = -5.8652  Value = -0.0542 
## elapsed = 0.14   Round = 3   logCost = -4.4991   logGamma = -7.7302  Value = -0.6678 
## elapsed = 0.88   Round = 4   logCost = 14.4021   logGamma = -2.3362  Value = -0.0576 
## elapsed = 0.13   Round = 5   logCost = -3.3272   logGamma = -8.3482  Value = -0.6678 
## elapsed = 0.07   Round = 6   logCost = 11.1199   logGamma = -7.8096  Value = -0.0525 
## elapsed = 0.21   Round = 7   logCost = 18.2346   logGamma = -8.2880  Value = -0.0576 
## elapsed = 0.10   Round = 8   logCost = 12.9411   logGamma = -5.1917  Value = -0.0542 
## elapsed = 3.70   Round = 9   logCost = 18.1934   logGamma = -2.3282  Value = -0.0644 
## elapsed = 0.10   Round = 10  logCost = 2.1058    logGamma = -3.3696  Value = -0.1051 
## elapsed = 0.07   Round = 11  logCost = 8.8931    logGamma = -5.9625  Value = -0.0492 
## elapsed = 0.07   Round = 12  logCost = 8.6925    logGamma = -4.9629  Value = -0.0373 
## elapsed = 0.07   Round = 13  logCost = 9.5712    logGamma = -6.8974  Value = -0.0492 
## elapsed = 0.07   Round = 14  logCost = 9.5747    logGamma = -1.8662  Value = -0.0492 
## elapsed = 0.14   Round = 15  logCost = -4.9700   logGamma = -5.2061  Value = -0.6678 
## elapsed = 0.08   Round = 16  logCost = 6.0279    logGamma = -5.7480  Value = -0.0576 
## elapsed = 0.09   Round = 17  logCost = 2.8288    logGamma = -2.7361  Value = -0.0780 
## elapsed = 0.14   Round = 18  logCost = 13.5004   logGamma = -4.8409  Value = -0.0508 
## elapsed = 0.13   Round = 19  logCost = -1.5418   logGamma = -3.4781  Value = -0.5593 
## elapsed = 1.88   Round = 20  logCost = 16.7944   logGamma = -4.0321  Value = -0.0508 
## elapsed = 0.07   Round = 21  logCost = 8.0767    logGamma = -1.3042  Value = -0.0492 
## elapsed = 0.07   Round = 22  logCost = 9.4776    logGamma = -6.1408  Value = -0.0458 
## elapsed = 0.81   Round = 23  logCost = 16.6300   logGamma = -4.1688  Value = -0.0593 
## elapsed = 0.07   Round = 24  logCost = 10.4356   logGamma = -6.4218  Value = -0.0390 
## elapsed = 0.10   Round = 25  logCost = 7.2445    logGamma = -8.4438  Value = -0.1203 
## elapsed = 0.10   Round = 26  logCost = 4.3700    logGamma = -5.8644  Value = -0.1305 
## elapsed = 0.09   Round = 27  logCost = 12.4167   logGamma = -4.4647  Value = -0.0559 
## elapsed = 0.13   Round = 28  logCost = -0.2072   logGamma = -5.7881  Value = -0.5593 
## elapsed = 0.13   Round = 29  logCost = 15.9732   logGamma = -7.7657  Value = -0.0593 
## elapsed = 0.15   Round = 30  logCost = 17.0627   logGamma = -7.9927  Value = -0.0644 
## elapsed = 0.08   Round = 31  logCost = 10.4807   logGamma = -4.1554  Value = -0.0542 
## elapsed = 0.13   Round = 32  logCost = 1.3078    logGamma = -4.5781  Value = -0.3203 
## elapsed = 0.14   Round = 33  logCost = -0.5958   logGamma = -6.2720  Value = -0.6678 
## elapsed = 0.31   Round = 34  logCost = 12.6207   logGamma = -2.0301  Value = -0.0627 
## elapsed = 0.07   Round = 35  logCost = 8.3371    logGamma = -1.3244  Value = -0.0475 
## elapsed = 0.11   Round = 36  logCost = 12.2033   logGamma = -3.6795  Value = -0.0441 
## elapsed = 0.07   Round = 37  logCost = 12.5207   logGamma = -6.6158  Value = -0.0492 
## elapsed = 0.12   Round = 38  logCost = -1.1632   logGamma = -1.1124  Value = -0.1712 
## elapsed = 0.07   Round = 39  logCost = 7.6583    logGamma = -6.2656  Value = -0.0492 
## elapsed = 0.07   Round = 40  logCost = 3.8680    logGamma = -2.2080  Value = -0.0627 
## elapsed = 0.07   Round = 41  logCost = 9.5338    logGamma = -1.9580  Value = -0.0475 
## elapsed = 2.56   Round = 42  logCost = 17.6858   logGamma = -1.9475  Value = -0.0627 
## elapsed = 0.16   Round = 43  logCost = 16.1313   logGamma = -7.7448  Value = -0.0492 
## elapsed = 0.14   Round = 44  logCost = 0.8071    logGamma = -7.7905  Value = -0.6678 
## elapsed = 0.08   Round = 45  logCost = 11.4682   logGamma = -4.7437  Value = -0.0508 
## elapsed = 0.34   Round = 46  logCost = 14.4283   logGamma = -0.8029  Value = -0.0610 
## elapsed = 0.14   Round = 47  logCost = 1.0083    logGamma = -5.9347  Value = -0.5559 
## elapsed = 0.07   Round = 48  logCost = 10.7092   logGamma = -5.9287  Value = -0.0407 
## elapsed = 0.07   Round = 49  logCost = 9.8284    logGamma = -3.3159  Value = -0.0458 
## elapsed = 0.14   Round = 50  logCost = -1.8423   logGamma = -4.7731  Value = -0.6237 
## elapsed = 0.34   Round = 51  logCost = 20.0000   logGamma = -6.9054  Value = -0.0525 
## elapsed = 0.14   Round = 52  logCost = -5.0000   logGamma = -0.7500  Value = -0.6678 
## elapsed = 0.08   Round = 53  logCost = 2.1555    logGamma = -0.7500  Value = -0.0746 
## elapsed = 0.55   Round = 54  logCost = 20.0000   logGamma = -0.7500  Value = -0.0627 
## elapsed = 1.83   Round = 55  logCost = 20.0000   logGamma = -3.9462  Value = -0.0525 
## elapsed = 0.07   Round = 56  logCost = 5.2349    logGamma = -3.4542  Value = -0.0542 
## elapsed = 0.14   Round = 57  logCost = 4.0103    logGamma = -9.0000  Value = -0.5576 
## elapsed = 0.08   Round = 58  logCost = 9.9047    logGamma = -9.0000  Value = -0.0610 
## elapsed = 0.46   Round = 59  logCost = 17.3290   logGamma = -0.7500  Value = -0.0644 
## elapsed = 0.20   Round = 60  logCost = 20.0000   logGamma = -7.8768  Value = -0.0525 
## elapsed = 0.09   Round = 61  logCost = 0.4371    logGamma = -0.7500  Value = -0.0983 
## elapsed = 0.35   Round = 62  logCost = 17.6451   logGamma = -6.6718  Value = -0.0458 
## elapsed = 0.13   Round = 63  logCost = 20.0000   logGamma = -9.0000  Value = -0.0475 
## elapsed = 0.11   Round = 64  logCost = 5.3181    logGamma = -7.1587  Value = -0.1339 
## elapsed = 0.07   Round = 65  logCost = 5.4836    logGamma = -0.7500  Value = -0.0508 
## elapsed = 0.26   Round = 66  logCost = 15.9008   logGamma = -0.7500  Value = -0.0610 
## elapsed = 0.86   Round = 67  logCost = 20.0000   logGamma = -5.2973  Value = -0.0610 
## elapsed = 0.09   Round = 68  logCost = 1.3582    logGamma = -2.1158  Value = -0.0966 
## elapsed = 0.10   Round = 69  logCost = 10.6771   logGamma = -0.7500  Value = -0.0644 
## elapsed = 0.06   Round = 70  logCost = 6.9453    logGamma = -2.7971  Value = -0.0458 
## 
##  Best Parameters Found: 
## Round = 12   logCost = 8.6925    logGamma = -4.9629  Value = -0.0373
  OPT_Res$Best_Par
##   logCost  logGamma 
##  8.692541 -4.962923
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 5958.305
  as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.006992461
      CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="radial", 
                cost=exp(OPT_Res$Best_Par["logCost"]), 
                gamma=exp(OPT_Res$Best_Par["logGamma"]))
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
  }
  
  sum(CV.error)
## [1] 0.03728814
logCost<-c(13.6405, 14.5428, -4.4991, 14.4021, -3.3272, 11.1199, 18.2346, 12.9411, 18.1934, 2.1058, 8.8931, 8.6925, 9.5712, 9.5747, -4.9700, 6.0279, 2.8288, 13.5004, -1.5418, 16.7944, 8.0767, 9.4776, 16.6300, 10.4356, 7.2445, 4.3700, 12.4167, -0.2072, 15.9732, 17.0627, 10.4807, 1.3078, -0.5958, 12.6207, 8.3371, 12.2033, 12.5207, -1.1632, 7.6583, 3.8680, 9.5338, 17.6858, 16.1313, 0.8071, 11.4682, 14.4283, 1.0083, 10.7092, 9.8284, -1.8423, 20.0000, -5.0000, 2.1555, 20.0000, 20.0000, 5.2349, 4.0103, 9.9047, 17.3290, 20.0000, 0.4371, 17.6451, 20.0000, 5.3181, 5.4836, 15.9008, 20.0000, 1.3582, 10.6771, 6.9453)

logGamma<-c(-3.7612, -5.8652, -7.7302, -2.3362, -8.3482, -7.8096, -8.2880, -5.1917, -2.3282, -3.3696, -5.9625, -4.9629, -6.8974, -1.8662, -5.2061, -5.7480, -2.7361, -4.8409, -3.4781, -4.0321, -1.3042, -6.1408, -4.1688, -6.4218, -8.4438, -5.8644, -4.4647, -5.7881, -7.7657, -7.9927, -4.1554, -4.5781, -6.2720, -2.0301, -1.3244, -3.6795, -6.6158, -1.1124, -6.2656, -2.2080, -1.9580, -1.9475, -7.7448, -7.7905, -4.7437, -0.8029, -5.9347, -5.9287, -3.3159, -4.7731, -6.9054, -0.7500, -0.7500, -0.7500, -3.9462, -3.4542, -9.0000, -9.0000, -0.7500, -7.8768, -0.7500, -6.6718, -9.0000, -7.1587, -0.7500, -0.7500, -5.2973, -2.1158, -0.7500, -2.7971)

Values<-c()
p <- ggplot() + geom_curve(aes(x = logGamma[51:69], y = logCost[51:69], 
                                 xend = logGamma[52:70], yend = logCost[52:70]),
                             arrow = arrow(length = unit(0.03, "npc")), curvature = 0.1) 

p + geom_point(aes(x=logGamma[1:50], y=logCost[1:50])) + 
    geom_point(aes(x=logGamma[51:70], y=logCost[51:70]), colour = "red") + 
    labs(x = "Log Gamma", y = "Log Cost", title = " B.O Iterations")

4 Conclusion

Comparison between Bayesian Optimization and my sweep of hyperparameters.

SVM linear kernel:

SVM polynomial kernel:

SVM radial kernel: