ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")
colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.
Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information
Data$Group<-as.factor(Data$Group)
Data1<-data.frame(Data[,c("Group","PC1","PC2")])
# Stratify the entire training set into training and test sets
set.seed(1)
train.index<-createDataPartition(Data1$Group, p=0.8, list = FALSE)
train.set<-Data1[train.index, ]
test.set<-Data1[-train.index, ]
# Stratify the training set into 5 folds
set.seed(1)
folds <- createFolds(y=factor(train.set$Group), k = 5, list = FALSE)
train.set$fold <- folds
SVM Linear
#linear
svm_fit_bayes<-function(logCost){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="linear",
cost=exp(logCost), scale= FALSE)
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(1)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20)),
init_grid_dt = NULL, init_points = 10,
n_iter = 10, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 0.07 Round = 1 logCost = 1.6377 Value = -0.0305
## elapsed = 0.17 Round = 2 logCost = 4.3031 Value = -0.0271
## elapsed = 4.09 Round = 3 logCost = 9.3213 Value = -0.0220
## elapsed = 8.37 Round = 4 logCost = 17.7052 Value = -0.0932
## elapsed = 0.05 Round = 5 logCost = 0.0420 Value = -0.0271
## elapsed = 8.11 Round = 6 logCost = 17.4597 Value = -0.0932
## elapsed = 9.00 Round = 7 logCost = 18.6169 Value = -0.1492
## elapsed = 5.46 Round = 8 logCost = 11.5199 Value = -0.0305
## elapsed = 5.50 Round = 9 logCost = 10.7279 Value = -0.0271
## elapsed = 0.05 Round = 10 logCost = -3.4553 Value = -0.0305
## elapsed = 2.89 Round = 11 logCost = 8.0887 Value = -0.0254
## elapsed = 0.08 Round = 12 logCost = 3.0179 Value = -0.0288
## elapsed = 0.05 Round = 13 logCost = -1.3529 Value = -0.0288
## elapsed = 1.23 Round = 14 logCost = 6.7139 Value = -0.0271
## elapsed = 0.67 Round = 15 logCost = 5.5094 Value = -0.0271
## elapsed = 0.05 Round = 16 logCost = -4.9678 Value = -0.0356
## elapsed = 7.25 Round = 17 logCost = 13.2629 Value = -0.0288
## elapsed = 7.03 Round = 18 logCost = 14.6334 Value = -0.0271
## elapsed = 0.04 Round = 19 logCost = -2.3972 Value = -0.0271
## elapsed = 0.82 Round = 20 logCost = 6.6037 Value = -0.0271
##
## Best Parameters Found:
## Round = 3 logCost = 9.3213 Value = -0.0220
OPT_Res$Best_Par
## logCost
## 9.321334
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 11173.88
svm_fit_bayes<-function(logCost, Degree){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="polynomial",
cost=exp(logCost), gamma=1, degree=Degree)
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(1)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
Degree = c(1L, 5L)),
init_grid_dt = NULL, init_points = 20,
n_iter = 20, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 0.06 Round = 1 logCost = 1.6377 Degree = 5.0000 Value = -0.0678
## elapsed = 0.10 Round = 2 logCost = 4.3031 Degree = 2.0000 Value = -0.0949
## elapsed = 8.94 Round = 3 logCost = 9.3213 Degree = 4.0000 Value = -0.0983
## elapsed = 63.48 Round = 4 logCost = 17.7052 Degree = 2.0000 Value = -0.1153
## elapsed = 0.06 Round = 5 logCost = 0.0420 Degree = 2.0000 Value = -0.1186
## elapsed = 7.86 Round = 6 logCost = 17.4597 Degree = 3.0000 Value = -0.0322
## elapsed = 8.42 Round = 7 logCost = 18.6169 Degree = 1.0000 Value = -0.1271
## elapsed = 0.09 Round = 8 logCost = 11.5199 Degree = 3.0000 Value = -0.0288
## elapsed = 12.56 Round = 9 logCost = 10.7279 Degree = 4.0000 Value = -0.1102
## elapsed = 0.11 Round = 10 logCost = -3.4553 Degree = 2.0000 Value = -0.2746
## elapsed = 0.06 Round = 11 logCost = 0.1494 Degree = 3.0000 Value = -0.0763
## elapsed = 0.06 Round = 12 logCost = -0.5861 Degree = 3.0000 Value = -0.0966
## elapsed = 0.11 Round = 13 logCost = 12.1756 Degree = 3.0000 Value = -0.0288
## elapsed = 0.10 Round = 14 logCost = 4.6026 Degree = 2.0000 Value = -0.0932
## elapsed = 28.37 Round = 15 logCost = 14.2460 Degree = 4.0000 Value = -0.1373
## elapsed = 5.94 Round = 16 logCost = 7.4425 Degree = 4.0000 Value = -0.0966
## elapsed = 25.15 Round = 17 logCost = 12.9405 Degree = 4.0000 Value = -0.1305
## elapsed = 9.69 Round = 18 logCost = 19.7977 Degree = 1.0000 Value = -0.1203
## elapsed = 1.09 Round = 19 logCost = 4.5009 Degree = 4.0000 Value = -0.1102
## elapsed = 0.35 Round = 20 logCost = 14.4361 Degree = 3.0000 Value = -0.0305
## elapsed = 0.12 Round = 21 logCost = 10.0906 Degree = 1.0000 Value = -0.0322
## elapsed = 0.09 Round = 22 logCost = -5.0000 Degree = 5.0000 Value = -0.2508
## elapsed = 7.32 Round = 23 logCost = 11.0685 Degree = 2.0000 Value = -0.0949
## elapsed = 0.05 Round = 24 logCost = 6.1590 Degree = 1.0000 Value = -0.0271
## elapsed = 0.07 Round = 25 logCost = 6.7293 Degree = 5.0000 Value = -0.0593
## elapsed = 0.06 Round = 26 logCost = 0.7787 Degree = 1.0000 Value = -0.0576
## elapsed = 11.31 Round = 27 logCost = 20.0000 Degree = 5.0000 Value = -0.1407
## elapsed = 9.39 Round = 28 logCost = 20.0000 Degree = 3.0000 Value = -0.0407
## elapsed = 0.08 Round = 29 logCost = 8.1137 Degree = 1.0000 Value = -0.0288
## elapsed = 0.36 Round = 30 logCost = 11.9032 Degree = 5.0000 Value = -0.0390
## elapsed = 0.06 Round = 31 logCost = 5.2880 Degree = 3.0000 Value = -0.0373
## elapsed = 0.07 Round = 32 logCost = 8.0336 Degree = 1.0000 Value = -0.0288
## elapsed = 0.06 Round = 33 logCost = 8.1854 Degree = 1.0000 Value = -0.0288
## elapsed = 0.07 Round = 34 logCost = 8.1789 Degree = 1.0000 Value = -0.0288
## elapsed = 0.06 Round = 35 logCost = 8.1685 Degree = 1.0000 Value = -0.0288
## elapsed = 0.06 Round = 36 logCost = 8.1660 Degree = 1.0000 Value = -0.0288
## elapsed = 0.06 Round = 37 logCost = 8.1597 Degree = 1.0000 Value = -0.0288
## elapsed = 0.06 Round = 38 logCost = 8.1524 Degree = 1.0000 Value = -0.0288
## elapsed = 0.12 Round = 39 logCost = 8.1463 Degree = 1.0000 Value = -0.0288
## elapsed = 0.07 Round = 40 logCost = 8.1368 Degree = 1.0000 Value = -0.0288
##
## Best Parameters Found:
## Round = 24 logCost = 6.1590 Degree = 1.0000 Value = -0.0271
OPT_Res$Best_Par
## logCost Degree
## 6.159008 1.000000
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 472.9585
svm_fit_bayes<-function(logCost, logGamma){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~PC1+PC2, data = train.data, kernel="radial",
cost=exp(logCost), gamma=exp(logGamma))
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(1)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
logGamma = c(-9, -0.75)),
init_grid_dt = NULL, init_points = 50,
n_iter = 50, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 0.13 Round = 1 logCost = 1.6377 logGamma = -5.0596 Value = -0.2458
## elapsed = 0.07 Round = 2 logCost = 4.3031 logGamma = -1.8950 Value = -0.0424
## elapsed = 0.08 Round = 3 logCost = 9.3213 logGamma = -5.3857 Value = -0.0339
## elapsed = 0.14 Round = 4 logCost = 17.7052 logGamma = -6.9804 Value = -0.0322
## elapsed = 0.17 Round = 5 logCost = 0.0420 logGamma = -8.4169 Value = -0.6678
## elapsed = 0.09 Round = 6 logCost = 17.4597 logGamma = -8.1794 Value = -0.0356
## elapsed = 0.18 Round = 7 logCost = 18.6169 logGamma = -6.3908 Value = -0.0288
## elapsed = 0.10 Round = 8 logCost = 11.5199 logGamma = -4.7213 Value = -0.0271
## elapsed = 0.14 Round = 9 logCost = 10.7279 logGamma = -3.5385 Value = -0.0254
## elapsed = 0.20 Round = 10 logCost = -3.4553 logGamma = -5.6437 Value = -0.6678
## elapsed = 0.12 Round = 11 logCost = 0.1494 logGamma = -1.4688 Value = -0.0780
## elapsed = 0.17 Round = 12 logCost = -0.5861 logGamma = -6.5778 Value = -0.6678
## elapsed = 0.08 Round = 13 logCost = 12.1756 logGamma = -5.2127 Value = -0.0288
## elapsed = 0.14 Round = 14 logCost = 4.6026 logGamma = -6.2577 Value = -0.0797
## elapsed = 0.29 Round = 15 logCost = 14.2460 logGamma = -3.6303 Value = -0.0220
## elapsed = 0.10 Round = 16 logCost = 7.4425 logGamma = -6.8714 Value = -0.0525
## elapsed = 0.10 Round = 17 logCost = 12.9405 logGamma = -5.0520 Value = -0.0271
## elapsed = 2.01 Round = 18 logCost = 19.7977 logGamma = -2.6779 Value = -0.0441
## elapsed = 0.15 Round = 19 logCost = 4.5009 logGamma = -8.3050 Value = -0.3136
## elapsed = 0.34 Round = 20 logCost = 14.4361 logGamma = -1.7786 Value = -0.0271
## elapsed = 0.13 Round = 21 logCost = 18.3676 logGamma = -6.2026 Value = -0.0288
## elapsed = 0.13 Round = 22 logCost = 0.3036 logGamma = -2.0746 Value = -0.0881
## elapsed = 0.08 Round = 23 logCost = 11.2918 logGamma = -6.1399 Value = -0.0288
## elapsed = 0.15 Round = 24 logCost = -1.8611 logGamma = -6.2464 Value = -0.6678
## elapsed = 0.12 Round = 25 logCost = 1.6805 logGamma = -5.0701 Value = -0.2390
## elapsed = 0.09 Round = 26 logCost = 4.6529 logGamma = -1.6394 Value = -0.0390
## elapsed = 0.17 Round = 27 logCost = -4.6652 logGamma = -1.8692 Value = -0.6678
## elapsed = 0.09 Round = 28 logCost = 4.5597 logGamma = -5.7826 Value = -0.0780
## elapsed = 0.79 Round = 29 logCost = 16.7423 logGamma = -2.5871 Value = -0.0271
## elapsed = 0.13 Round = 30 logCost = 3.5087 logGamma = -1.0749 Value = -0.0441
## elapsed = 0.10 Round = 31 logCost = 7.0520 logGamma = -5.4141 Value = -0.0525
## elapsed = 0.08 Round = 32 logCost = 9.9891 logGamma = -3.1218 Value = -0.0254
## elapsed = 0.08 Round = 33 logCost = 7.3385 logGamma = -5.7000 Value = -0.0525
## elapsed = 0.18 Round = 34 logCost = -0.3446 logGamma = -6.3158 Value = -0.6678
## elapsed = 0.46 Round = 35 logCost = 15.6843 logGamma = -2.7540 Value = -0.0288
## elapsed = 0.07 Round = 36 logCost = 11.7117 logGamma = -7.3278 Value = -0.0288
## elapsed = 0.23 Round = 37 logCost = 14.8560 logGamma = -3.1332 Value = -0.0220
## elapsed = 0.15 Round = 38 logCost = -2.3014 logGamma = -7.9960 Value = -0.6678
## elapsed = 0.07 Round = 39 logCost = 13.0928 logGamma = -6.9747 Value = -0.0305
## elapsed = 0.14 Round = 40 logCost = 5.2819 logGamma = -7.8177 Value = -0.1169
## elapsed = 0.08 Round = 41 logCost = 15.5237 logGamma = -7.0231 Value = -0.0271
## elapsed = 0.09 Round = 42 logCost = 11.1765 logGamma = -8.5138 Value = -0.0407
## elapsed = 0.18 Round = 43 logCost = 14.5733 logGamma = -3.7011 Value = -0.0220
## elapsed = 0.07 Round = 44 logCost = 8.8259 logGamma = -1.7708 Value = -0.0254
## elapsed = 0.08 Round = 45 logCost = 8.2430 logGamma = -2.5740 Value = -0.0271
## elapsed = 0.31 Round = 46 logCost = 14.7339 logGamma = -2.4222 Value = -0.0220
## elapsed = 0.17 Round = 47 logCost = -4.4167 logGamma = -5.2440 Value = -0.6678
## elapsed = 0.08 Round = 48 logCost = 6.9308 logGamma = -5.6168 Value = -0.0508
## elapsed = 0.14 Round = 49 logCost = 13.3078 logGamma = -2.3103 Value = -0.0237
## elapsed = 0.07 Round = 50 logCost = 12.3183 logGamma = -4.0093 Value = -0.0254
## elapsed = 0.08 Round = 51 logCost = 20.0000 logGamma = -9.0000 Value = -0.0305
## elapsed = 4.02 Round = 52 logCost = 19.2323 logGamma = -0.7500 Value = -0.0492
## elapsed = 0.09 Round = 53 logCost = 1.8564 logGamma = -2.0000 Value = -0.0644
## elapsed = 0.06 Round = 54 logCost = 14.4783 logGamma = -9.0000 Value = -0.0305
## elapsed = 0.09 Round = 55 logCost = 10.8986 logGamma = -0.7500 Value = -0.0305
## elapsed = 0.32 Round = 56 logCost = 20.0000 logGamma = -5.1785 Value = -0.0305
## elapsed = 3.47 Round = 57 logCost = 17.6014 logGamma = -0.7500 Value = -0.0390
## elapsed = 0.08 Round = 58 logCost = 8.1570 logGamma = -9.0000 Value = -0.0729
## elapsed = 0.08 Round = 59 logCost = 20.0000 logGamma = -7.4126 Value = -0.0305
## elapsed = 0.12 Round = 60 logCost = -1.5878 logGamma = -0.7500 Value = -0.1153
## elapsed = 0.06 Round = 61 logCost = 7.1208 logGamma = -0.7500 Value = -0.0288
## elapsed = 0.97 Round = 62 logCost = 18.7157 logGamma = -3.5596 Value = -0.0288
## elapsed = 0.06 Round = 63 logCost = 18.7582 logGamma = -9.0000 Value = -0.0322
## elapsed = 2.05 Round = 64 logCost = 18.4870 logGamma = -1.7908 Value = -0.0322
## elapsed = 0.09 Round = 65 logCost = 2.7137 logGamma = -3.4041 Value = -0.0712
## elapsed = 0.06 Round = 66 logCost = 9.0777 logGamma = -0.7500 Value = -0.0288
## elapsed = 0.05 Round = 67 logCost = 12.7748 logGamma = -9.0000 Value = -0.0322
## elapsed = 0.22 Round = 68 logCost = 16.6634 logGamma = -4.7444 Value = -0.0288
## elapsed = 0.06 Round = 69 logCost = 9.7680 logGamma = -7.6261 Value = -0.0458
## elapsed = 0.06 Round = 70 logCost = 16.2111 logGamma = -9.0000 Value = -0.0322
## elapsed = 0.14 Round = 71 logCost = 12.4044 logGamma = -0.7500 Value = -0.0339
## elapsed = 3.93 Round = 72 logCost = 20.0000 logGamma = -0.7500 Value = -0.0525
## elapsed = 0.08 Round = 73 logCost = 4.7604 logGamma = -0.7500 Value = -0.0373
## elapsed = 0.12 Round = 74 logCost = 6.8592 logGamma = -9.0000 Value = -0.0966
## elapsed = 0.08 Round = 75 logCost = 14.9251 logGamma = -5.8481 Value = -0.0271
## elapsed = 0.09 Round = 76 logCost = 11.4591 logGamma = -1.7821 Value = -0.0237
## elapsed = 0.11 Round = 77 logCost = 19.3660 logGamma = -8.2613 Value = -0.0288
## elapsed = 0.91 Round = 78 logCost = 17.6739 logGamma = -3.5970 Value = -0.0271
## elapsed = 0.06 Round = 79 logCost = 8.8437 logGamma = -3.9020 Value = -0.0288
## elapsed = 0.06 Round = 80 logCost = 13.8841 logGamma = -8.2071 Value = -0.0288
## elapsed = 0.06 Round = 81 logCost = 8.0300 logGamma = -0.7500 Value = -0.0288
## elapsed = 0.15 Round = 82 logCost = 16.4134 logGamma = -6.4076 Value = -0.0271
## elapsed = 0.07 Round = 83 logCost = 10.2696 logGamma = -1.7542 Value = -0.0254
## elapsed = 0.30 Round = 84 logCost = 19.2910 logGamma = -5.0650 Value = -0.0322
## elapsed = 0.70 Round = 85 logCost = 20.0000 logGamma = -3.9245 Value = -0.0288
## elapsed = 0.07 Round = 86 logCost = 15.6145 logGamma = -8.3291 Value = -0.0271
## elapsed = 1.66 Round = 87 logCost = 17.5340 logGamma = -1.9107 Value = -0.0339
## elapsed = 0.11 Round = 88 logCost = 5.5756 logGamma = -5.9366 Value = -0.0559
## elapsed = 0.05 Round = 89 logCost = 6.7310 logGamma = -1.9490 Value = -0.0305
## elapsed = 0.06 Round = 90 logCost = 5.9593 logGamma = -0.7500 Value = -0.0322
## elapsed = 0.06 Round = 91 logCost = 14.0498 logGamma = -6.0956 Value = -0.0288
## elapsed = 0.08 Round = 92 logCost = 6.3014 logGamma = -7.1814 Value = -0.0763
## elapsed = 0.14 Round = 93 logCost = -5.0000 logGamma = -9.0000 Value = -0.6678
## elapsed = 1.26 Round = 94 logCost = 16.2558 logGamma = -0.7500 Value = -0.0356
## elapsed = 0.08 Round = 95 logCost = 1.0194 logGamma = -0.7500 Value = -0.0576
## elapsed = 0.06 Round = 96 logCost = 9.8648 logGamma = -9.0000 Value = -0.0508
## elapsed = 0.12 Round = 97 logCost = -2.5825 logGamma = -0.7500 Value = -0.2797
## elapsed = 0.07 Round = 98 logCost = 10.0041 logGamma = -0.7500 Value = -0.0288
## elapsed = 0.60 Round = 99 logCost = 15.1660 logGamma = -0.7500 Value = -0.0356
## elapsed = 0.05 Round = 100 logCost = 11.9830 logGamma = -9.0000 Value = -0.0390
##
## Best Parameters Found:
## Round = 15 logCost = 14.2460 logGamma = -3.6303 Value = -0.0220
OPT_Res$Best_Par
## logCost logGamma
## 14.246035 -3.630319
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 1538065
as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.02650774