1 Reading in the Data

ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")

colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.

Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information

Data$Group<-as.factor(Data$Group)

Data1<-data.frame(Data[,c("Group","PC1","PC2")])

# Stratify the entire training set into training and test sets

set.seed(1)

train.index<-createDataPartition(Data1$Group, p=0.8, list = FALSE)
train.set<-Data1[train.index, ]
test.set<-Data1[-train.index, ]

# Stratify the training set into 5 folds

set.seed(1)
folds <- createFolds(y=factor(train.set$Group), k = 5, list = FALSE)
train.set$fold <- folds

SVM Linear

#linear

svm_fit_bayes<-function(logCost){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="linear", 
                cost=exp(logCost), scale= FALSE)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}



set.seed(1)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20)),
                               init_grid_dt = NULL, init_points = 10, 
                               n_iter = 10, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.07   Round = 1   logCost = 1.6377    Value = -0.0305 
## elapsed = 0.17   Round = 2   logCost = 4.3031    Value = -0.0271 
## elapsed = 4.09   Round = 3   logCost = 9.3213    Value = -0.0220 
## elapsed = 8.37   Round = 4   logCost = 17.7052   Value = -0.0932 
## elapsed = 0.05   Round = 5   logCost = 0.0420    Value = -0.0271 
## elapsed = 8.11   Round = 6   logCost = 17.4597   Value = -0.0932 
## elapsed = 9.00   Round = 7   logCost = 18.6169   Value = -0.1492 
## elapsed = 5.46   Round = 8   logCost = 11.5199   Value = -0.0305 
## elapsed = 5.50   Round = 9   logCost = 10.7279   Value = -0.0271 
## elapsed = 0.05   Round = 10  logCost = -3.4553   Value = -0.0305 
## elapsed = 2.89   Round = 11  logCost = 8.0887    Value = -0.0254 
## elapsed = 0.08   Round = 12  logCost = 3.0179    Value = -0.0288 
## elapsed = 0.05   Round = 13  logCost = -1.3529   Value = -0.0288 
## elapsed = 1.23   Round = 14  logCost = 6.7139    Value = -0.0271 
## elapsed = 0.67   Round = 15  logCost = 5.5094    Value = -0.0271 
## elapsed = 0.05   Round = 16  logCost = -4.9678   Value = -0.0356 
## elapsed = 7.25   Round = 17  logCost = 13.2629   Value = -0.0288 
## elapsed = 7.03   Round = 18  logCost = 14.6334   Value = -0.0271 
## elapsed = 0.04   Round = 19  logCost = -2.3972   Value = -0.0271 
## elapsed = 0.82   Round = 20  logCost = 6.6037    Value = -0.0271 
## 
##  Best Parameters Found: 
## Round = 3    logCost = 9.3213    Value = -0.0220
  OPT_Res$Best_Par
##  logCost 
## 9.321334
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 11173.88

1.1 SVM polynomial kernel

svm_fit_bayes<-function(logCost, Degree){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="polynomial", 
                cost=exp(logCost), gamma=1, degree=Degree)
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}





set.seed(1)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           Degree = c(1L, 5L)),
                               init_grid_dt = NULL, init_points = 20, 
                               n_iter = 20, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.06   Round = 1   logCost = 1.6377    Degree = 5.0000 Value = -0.0678 
## elapsed = 0.10   Round = 2   logCost = 4.3031    Degree = 2.0000 Value = -0.0949 
## elapsed = 8.94   Round = 3   logCost = 9.3213    Degree = 4.0000 Value = -0.0983 
## elapsed = 63.48  Round = 4   logCost = 17.7052   Degree = 2.0000 Value = -0.1153 
## elapsed = 0.06   Round = 5   logCost = 0.0420    Degree = 2.0000 Value = -0.1186 
## elapsed = 7.86   Round = 6   logCost = 17.4597   Degree = 3.0000 Value = -0.0322 
## elapsed = 8.42   Round = 7   logCost = 18.6169   Degree = 1.0000 Value = -0.1271 
## elapsed = 0.09   Round = 8   logCost = 11.5199   Degree = 3.0000 Value = -0.0288 
## elapsed = 12.56  Round = 9   logCost = 10.7279   Degree = 4.0000 Value = -0.1102 
## elapsed = 0.11   Round = 10  logCost = -3.4553   Degree = 2.0000 Value = -0.2746 
## elapsed = 0.06   Round = 11  logCost = 0.1494    Degree = 3.0000 Value = -0.0763 
## elapsed = 0.06   Round = 12  logCost = -0.5861   Degree = 3.0000 Value = -0.0966 
## elapsed = 0.11   Round = 13  logCost = 12.1756   Degree = 3.0000 Value = -0.0288 
## elapsed = 0.10   Round = 14  logCost = 4.6026    Degree = 2.0000 Value = -0.0932 
## elapsed = 28.37  Round = 15  logCost = 14.2460   Degree = 4.0000 Value = -0.1373 
## elapsed = 5.94   Round = 16  logCost = 7.4425    Degree = 4.0000 Value = -0.0966 
## elapsed = 25.15  Round = 17  logCost = 12.9405   Degree = 4.0000 Value = -0.1305 
## elapsed = 9.69   Round = 18  logCost = 19.7977   Degree = 1.0000 Value = -0.1203 
## elapsed = 1.09   Round = 19  logCost = 4.5009    Degree = 4.0000 Value = -0.1102 
## elapsed = 0.35   Round = 20  logCost = 14.4361   Degree = 3.0000 Value = -0.0305 
## elapsed = 0.12   Round = 21  logCost = 10.0906   Degree = 1.0000 Value = -0.0322 
## elapsed = 0.09   Round = 22  logCost = -5.0000   Degree = 5.0000 Value = -0.2508 
## elapsed = 7.32   Round = 23  logCost = 11.0685   Degree = 2.0000 Value = -0.0949 
## elapsed = 0.05   Round = 24  logCost = 6.1590    Degree = 1.0000 Value = -0.0271 
## elapsed = 0.07   Round = 25  logCost = 6.7293    Degree = 5.0000 Value = -0.0593 
## elapsed = 0.06   Round = 26  logCost = 0.7787    Degree = 1.0000 Value = -0.0576 
## elapsed = 11.31  Round = 27  logCost = 20.0000   Degree = 5.0000 Value = -0.1407 
## elapsed = 9.39   Round = 28  logCost = 20.0000   Degree = 3.0000 Value = -0.0407 
## elapsed = 0.08   Round = 29  logCost = 8.1137    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.36   Round = 30  logCost = 11.9032   Degree = 5.0000 Value = -0.0390 
## elapsed = 0.06   Round = 31  logCost = 5.2880    Degree = 3.0000 Value = -0.0373 
## elapsed = 0.07   Round = 32  logCost = 8.0336    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.06   Round = 33  logCost = 8.1854    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.07   Round = 34  logCost = 8.1789    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.06   Round = 35  logCost = 8.1685    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.06   Round = 36  logCost = 8.1660    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.06   Round = 37  logCost = 8.1597    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.06   Round = 38  logCost = 8.1524    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.12   Round = 39  logCost = 8.1463    Degree = 1.0000 Value = -0.0288 
## elapsed = 0.07   Round = 40  logCost = 8.1368    Degree = 1.0000 Value = -0.0288 
## 
##  Best Parameters Found: 
## Round = 24   logCost = 6.1590    Degree = 1.0000 Value = -0.0271
  OPT_Res$Best_Par
##  logCost   Degree 
## 6.159008 1.000000
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 472.9585

1.2 SVM Radial Kernel

svm_fit_bayes<-function(logCost, logGamma){
  
   CV.error<-NULL 
  
    for (i in 1:5) { 
    valid.data <- subset(train.set, fold == i)
    train.data <- subset(train.set, fold != i) 
    
    svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="radial", 
                cost=exp(logCost), gamma=exp(logGamma))
    
    svm.y<-valid.data$Group
    
    svm.predy<-predict(svmfit, valid.data)
    
    ith.test.error<- mean(svm.y!=svm.predy) 
    
    CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)  
    }
   
   list(Score=-sum(CV.error), pred=0)

}





set.seed(1)

OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
                                                           logGamma = c(-9, -0.75)),
                               init_grid_dt = NULL, init_points = 50, 
                               n_iter = 50, acq = "ucb", kappa =2.576,
                               eps=0, verbose = TRUE)
## elapsed = 0.13   Round = 1   logCost = 1.6377    logGamma = -5.0596  Value = -0.2458 
## elapsed = 0.07   Round = 2   logCost = 4.3031    logGamma = -1.8950  Value = -0.0424 
## elapsed = 0.08   Round = 3   logCost = 9.3213    logGamma = -5.3857  Value = -0.0339 
## elapsed = 0.14   Round = 4   logCost = 17.7052   logGamma = -6.9804  Value = -0.0322 
## elapsed = 0.17   Round = 5   logCost = 0.0420    logGamma = -8.4169  Value = -0.6678 
## elapsed = 0.09   Round = 6   logCost = 17.4597   logGamma = -8.1794  Value = -0.0356 
## elapsed = 0.18   Round = 7   logCost = 18.6169   logGamma = -6.3908  Value = -0.0288 
## elapsed = 0.10   Round = 8   logCost = 11.5199   logGamma = -4.7213  Value = -0.0271 
## elapsed = 0.14   Round = 9   logCost = 10.7279   logGamma = -3.5385  Value = -0.0254 
## elapsed = 0.20   Round = 10  logCost = -3.4553   logGamma = -5.6437  Value = -0.6678 
## elapsed = 0.12   Round = 11  logCost = 0.1494    logGamma = -1.4688  Value = -0.0780 
## elapsed = 0.17   Round = 12  logCost = -0.5861   logGamma = -6.5778  Value = -0.6678 
## elapsed = 0.08   Round = 13  logCost = 12.1756   logGamma = -5.2127  Value = -0.0288 
## elapsed = 0.14   Round = 14  logCost = 4.6026    logGamma = -6.2577  Value = -0.0797 
## elapsed = 0.29   Round = 15  logCost = 14.2460   logGamma = -3.6303  Value = -0.0220 
## elapsed = 0.10   Round = 16  logCost = 7.4425    logGamma = -6.8714  Value = -0.0525 
## elapsed = 0.10   Round = 17  logCost = 12.9405   logGamma = -5.0520  Value = -0.0271 
## elapsed = 2.01   Round = 18  logCost = 19.7977   logGamma = -2.6779  Value = -0.0441 
## elapsed = 0.15   Round = 19  logCost = 4.5009    logGamma = -8.3050  Value = -0.3136 
## elapsed = 0.34   Round = 20  logCost = 14.4361   logGamma = -1.7786  Value = -0.0271 
## elapsed = 0.13   Round = 21  logCost = 18.3676   logGamma = -6.2026  Value = -0.0288 
## elapsed = 0.13   Round = 22  logCost = 0.3036    logGamma = -2.0746  Value = -0.0881 
## elapsed = 0.08   Round = 23  logCost = 11.2918   logGamma = -6.1399  Value = -0.0288 
## elapsed = 0.15   Round = 24  logCost = -1.8611   logGamma = -6.2464  Value = -0.6678 
## elapsed = 0.12   Round = 25  logCost = 1.6805    logGamma = -5.0701  Value = -0.2390 
## elapsed = 0.09   Round = 26  logCost = 4.6529    logGamma = -1.6394  Value = -0.0390 
## elapsed = 0.17   Round = 27  logCost = -4.6652   logGamma = -1.8692  Value = -0.6678 
## elapsed = 0.09   Round = 28  logCost = 4.5597    logGamma = -5.7826  Value = -0.0780 
## elapsed = 0.79   Round = 29  logCost = 16.7423   logGamma = -2.5871  Value = -0.0271 
## elapsed = 0.13   Round = 30  logCost = 3.5087    logGamma = -1.0749  Value = -0.0441 
## elapsed = 0.10   Round = 31  logCost = 7.0520    logGamma = -5.4141  Value = -0.0525 
## elapsed = 0.08   Round = 32  logCost = 9.9891    logGamma = -3.1218  Value = -0.0254 
## elapsed = 0.08   Round = 33  logCost = 7.3385    logGamma = -5.7000  Value = -0.0525 
## elapsed = 0.18   Round = 34  logCost = -0.3446   logGamma = -6.3158  Value = -0.6678 
## elapsed = 0.46   Round = 35  logCost = 15.6843   logGamma = -2.7540  Value = -0.0288 
## elapsed = 0.07   Round = 36  logCost = 11.7117   logGamma = -7.3278  Value = -0.0288 
## elapsed = 0.23   Round = 37  logCost = 14.8560   logGamma = -3.1332  Value = -0.0220 
## elapsed = 0.15   Round = 38  logCost = -2.3014   logGamma = -7.9960  Value = -0.6678 
## elapsed = 0.07   Round = 39  logCost = 13.0928   logGamma = -6.9747  Value = -0.0305 
## elapsed = 0.14   Round = 40  logCost = 5.2819    logGamma = -7.8177  Value = -0.1169 
## elapsed = 0.08   Round = 41  logCost = 15.5237   logGamma = -7.0231  Value = -0.0271 
## elapsed = 0.09   Round = 42  logCost = 11.1765   logGamma = -8.5138  Value = -0.0407 
## elapsed = 0.18   Round = 43  logCost = 14.5733   logGamma = -3.7011  Value = -0.0220 
## elapsed = 0.07   Round = 44  logCost = 8.8259    logGamma = -1.7708  Value = -0.0254 
## elapsed = 0.08   Round = 45  logCost = 8.2430    logGamma = -2.5740  Value = -0.0271 
## elapsed = 0.31   Round = 46  logCost = 14.7339   logGamma = -2.4222  Value = -0.0220 
## elapsed = 0.17   Round = 47  logCost = -4.4167   logGamma = -5.2440  Value = -0.6678 
## elapsed = 0.08   Round = 48  logCost = 6.9308    logGamma = -5.6168  Value = -0.0508 
## elapsed = 0.14   Round = 49  logCost = 13.3078   logGamma = -2.3103  Value = -0.0237 
## elapsed = 0.07   Round = 50  logCost = 12.3183   logGamma = -4.0093  Value = -0.0254 
## elapsed = 0.08   Round = 51  logCost = 20.0000   logGamma = -9.0000  Value = -0.0305 
## elapsed = 4.02   Round = 52  logCost = 19.2323   logGamma = -0.7500  Value = -0.0492 
## elapsed = 0.09   Round = 53  logCost = 1.8564    logGamma = -2.0000  Value = -0.0644 
## elapsed = 0.06   Round = 54  logCost = 14.4783   logGamma = -9.0000  Value = -0.0305 
## elapsed = 0.09   Round = 55  logCost = 10.8986   logGamma = -0.7500  Value = -0.0305 
## elapsed = 0.32   Round = 56  logCost = 20.0000   logGamma = -5.1785  Value = -0.0305 
## elapsed = 3.47   Round = 57  logCost = 17.6014   logGamma = -0.7500  Value = -0.0390 
## elapsed = 0.08   Round = 58  logCost = 8.1570    logGamma = -9.0000  Value = -0.0729 
## elapsed = 0.08   Round = 59  logCost = 20.0000   logGamma = -7.4126  Value = -0.0305 
## elapsed = 0.12   Round = 60  logCost = -1.5878   logGamma = -0.7500  Value = -0.1153 
## elapsed = 0.06   Round = 61  logCost = 7.1208    logGamma = -0.7500  Value = -0.0288 
## elapsed = 0.97   Round = 62  logCost = 18.7157   logGamma = -3.5596  Value = -0.0288 
## elapsed = 0.06   Round = 63  logCost = 18.7582   logGamma = -9.0000  Value = -0.0322 
## elapsed = 2.05   Round = 64  logCost = 18.4870   logGamma = -1.7908  Value = -0.0322 
## elapsed = 0.09   Round = 65  logCost = 2.7137    logGamma = -3.4041  Value = -0.0712 
## elapsed = 0.06   Round = 66  logCost = 9.0777    logGamma = -0.7500  Value = -0.0288 
## elapsed = 0.05   Round = 67  logCost = 12.7748   logGamma = -9.0000  Value = -0.0322 
## elapsed = 0.22   Round = 68  logCost = 16.6634   logGamma = -4.7444  Value = -0.0288 
## elapsed = 0.06   Round = 69  logCost = 9.7680    logGamma = -7.6261  Value = -0.0458 
## elapsed = 0.06   Round = 70  logCost = 16.2111   logGamma = -9.0000  Value = -0.0322 
## elapsed = 0.14   Round = 71  logCost = 12.4044   logGamma = -0.7500  Value = -0.0339 
## elapsed = 3.93   Round = 72  logCost = 20.0000   logGamma = -0.7500  Value = -0.0525 
## elapsed = 0.08   Round = 73  logCost = 4.7604    logGamma = -0.7500  Value = -0.0373 
## elapsed = 0.12   Round = 74  logCost = 6.8592    logGamma = -9.0000  Value = -0.0966 
## elapsed = 0.08   Round = 75  logCost = 14.9251   logGamma = -5.8481  Value = -0.0271 
## elapsed = 0.09   Round = 76  logCost = 11.4591   logGamma = -1.7821  Value = -0.0237 
## elapsed = 0.11   Round = 77  logCost = 19.3660   logGamma = -8.2613  Value = -0.0288 
## elapsed = 0.91   Round = 78  logCost = 17.6739   logGamma = -3.5970  Value = -0.0271 
## elapsed = 0.06   Round = 79  logCost = 8.8437    logGamma = -3.9020  Value = -0.0288 
## elapsed = 0.06   Round = 80  logCost = 13.8841   logGamma = -8.2071  Value = -0.0288 
## elapsed = 0.06   Round = 81  logCost = 8.0300    logGamma = -0.7500  Value = -0.0288 
## elapsed = 0.15   Round = 82  logCost = 16.4134   logGamma = -6.4076  Value = -0.0271 
## elapsed = 0.07   Round = 83  logCost = 10.2696   logGamma = -1.7542  Value = -0.0254 
## elapsed = 0.30   Round = 84  logCost = 19.2910   logGamma = -5.0650  Value = -0.0322 
## elapsed = 0.70   Round = 85  logCost = 20.0000   logGamma = -3.9245  Value = -0.0288 
## elapsed = 0.07   Round = 86  logCost = 15.6145   logGamma = -8.3291  Value = -0.0271 
## elapsed = 1.66   Round = 87  logCost = 17.5340   logGamma = -1.9107  Value = -0.0339 
## elapsed = 0.11   Round = 88  logCost = 5.5756    logGamma = -5.9366  Value = -0.0559 
## elapsed = 0.05   Round = 89  logCost = 6.7310    logGamma = -1.9490  Value = -0.0305 
## elapsed = 0.06   Round = 90  logCost = 5.9593    logGamma = -0.7500  Value = -0.0322 
## elapsed = 0.06   Round = 91  logCost = 14.0498   logGamma = -6.0956  Value = -0.0288 
## elapsed = 0.08   Round = 92  logCost = 6.3014    logGamma = -7.1814  Value = -0.0763 
## elapsed = 0.14   Round = 93  logCost = -5.0000   logGamma = -9.0000  Value = -0.6678 
## elapsed = 1.26   Round = 94  logCost = 16.2558   logGamma = -0.7500  Value = -0.0356 
## elapsed = 0.08   Round = 95  logCost = 1.0194    logGamma = -0.7500  Value = -0.0576 
## elapsed = 0.06   Round = 96  logCost = 9.8648    logGamma = -9.0000  Value = -0.0508 
## elapsed = 0.12   Round = 97  logCost = -2.5825   logGamma = -0.7500  Value = -0.2797 
## elapsed = 0.07   Round = 98  logCost = 10.0041   logGamma = -0.7500  Value = -0.0288 
## elapsed = 0.60   Round = 99  logCost = 15.1660   logGamma = -0.7500  Value = -0.0356 
## elapsed = 0.05   Round = 100 logCost = 11.9830   logGamma = -9.0000  Value = -0.0390 
## 
##  Best Parameters Found: 
## Round = 15   logCost = 14.2460   logGamma = -3.6303  Value = -0.0220
  OPT_Res$Best_Par
##   logCost  logGamma 
## 14.246035 -3.630319
  as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 1538065
  as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.02650774