ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")
colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.
Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information
Data$Group<-as.factor(Data$Group)
In order to use each model, I prepare a suitable data frame - splitting it into training and test sets and then splitting the training set into 5 folds.
Data2<-data.frame(Data[, c("Group", "Latitude", "Longitude", "OverallAvg")])
# Stratify the entire training set into training and test sets
set.seed(234)
library(caret)
train.index<-createDataPartition(Data2$Group, p=0.8, list = FALSE)
train.set<-Data2[train.index, ]
test.set<-Data2[-train.index, ]
# Stratify the training set into 5 folds
folds <- createFolds(y=factor(train.set$Group), k = 5, list = FALSE)
train.set$fold <- folds
Three main ways of choosing hyperparameters apart from selecting all possible combinations or just randomly performing a self selected sweep of what we think would perform well is to use a Grid Search, Random Search or Bayesian Optimization.
The upper confidence bound was used as the acquisition function.
The log of cost and gamma were used for reasons of scale.
#linear
svm_fit_bayes<-function(logCost){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="linear",
cost=exp(logCost), scale= FALSE)
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(234)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20)),
init_grid_dt = NULL, init_points = 50,
n_iter = 20, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 21.40 Round = 1 logCost = 13.6405 Value = -0.1119
## elapsed = 22.29 Round = 2 logCost = 14.5428 Value = -0.1102
## elapsed = 0.16 Round = 3 logCost = -4.4991 Value = -0.1102
## elapsed = 21.75 Round = 4 logCost = 14.4021 Value = -0.1102
## elapsed = 0.16 Round = 5 logCost = -3.3272 Value = -0.0746
## elapsed = 1332.70 Round = 6 logCost = 11.1199 Value = -0.1475
## elapsed = 29.29 Round = 7 logCost = 18.2346 Value = -0.1305
## elapsed = 22.46 Round = 8 logCost = 12.9411 Value = -0.1271
## elapsed = 26.16 Round = 9 logCost = 18.1934 Value = -0.1220
## elapsed = 1.38 Round = 10 logCost = 2.1058 Value = -0.0576
## elapsed = 19.01 Round = 11 logCost = 8.8931 Value = -0.0661
## elapsed = 19.25 Round = 12 logCost = 8.6925 Value = -0.0797
## elapsed = 19.98 Round = 13 logCost = 9.5712 Value = -0.0627
## elapsed = 19.77 Round = 14 logCost = 9.5747 Value = -0.0712
## elapsed = 0.13 Round = 15 logCost = -4.9700 Value = -0.1390
## elapsed = 12.41 Round = 16 logCost = 6.0279 Value = -0.1220
## elapsed = 1.87 Round = 17 logCost = 2.8288 Value = -0.0593
## elapsed = 22.43 Round = 18 logCost = 13.5004 Value = -0.1339
## elapsed = 0.19 Round = 19 logCost = -1.5418 Value = -0.0644
## elapsed = 25.39 Round = 20 logCost = 16.7944 Value = -0.1169
## elapsed = 19.75 Round = 21 logCost = 8.0767 Value = -0.1542
## elapsed = 20.70 Round = 22 logCost = 9.4776 Value = -0.0610
## elapsed = 25.95 Round = 23 logCost = 16.6300 Value = -0.1271
## elapsed = 20.38 Round = 24 logCost = 10.4356 Value = -0.1254
## elapsed = 16.29 Round = 25 logCost = 7.2445 Value = -0.0729
## elapsed = 6.38 Round = 26 logCost = 4.3700 Value = -0.0576
## elapsed = 22.07 Round = 27 logCost = 12.4167 Value = -0.1153
## elapsed = 0.41 Round = 28 logCost = -0.2072 Value = -0.0610
## elapsed = 24.95 Round = 29 logCost = 15.9732 Value = -0.1153
## elapsed = 25.74 Round = 30 logCost = 17.0627 Value = -0.1169
## elapsed = 20.28 Round = 31 logCost = 10.4807 Value = -0.1153
## elapsed = 1.60 Round = 32 logCost = 1.3078 Value = -0.0593
## elapsed = 0.28 Round = 33 logCost = -0.5958 Value = -0.0610
## elapsed = 22.02 Round = 34 logCost = 12.6207 Value = -0.1254
## elapsed = 18.17 Round = 35 logCost = 8.3371 Value = -0.1390
## elapsed = 21.29 Round = 36 logCost = 12.2033 Value = -0.1356
## elapsed = 22.11 Round = 37 logCost = 12.5207 Value = -0.1271
## elapsed = 0.32 Round = 38 logCost = -1.1632 Value = -0.0627
## elapsed = 19.30 Round = 39 logCost = 7.6583 Value = -0.1305
## elapsed = 4.71 Round = 40 logCost = 3.8680 Value = -0.0627
## elapsed = 19.73 Round = 41 logCost = 9.5338 Value = -0.0661
## elapsed = 26.63 Round = 42 logCost = 17.6858 Value = -0.1237
## elapsed = 25.13 Round = 43 logCost = 16.1313 Value = -0.1254
## elapsed = 0.71 Round = 44 logCost = 0.8071 Value = -0.0576
## elapsed = 20.60 Round = 45 logCost = 11.4682 Value = -0.1593
## elapsed = 23.55 Round = 46 logCost = 14.4283 Value = -0.1186
## elapsed = 0.97 Round = 47 logCost = 1.0083 Value = -0.0576
## elapsed = 19.83 Round = 48 logCost = 10.7092 Value = -0.1695
## elapsed = 20.19 Round = 49 logCost = 9.8284 Value = -0.0678
## elapsed = 0.20 Round = 50 logCost = -1.8423 Value = -0.0627
## elapsed = 0.18 Round = 51 logCost = -2.1029 Value = -0.0661
## elapsed = 0.15 Round = 52 logCost = -3.9821 Value = -0.0864
## elapsed = 0.14 Round = 53 logCost = -4.2291 Value = -0.0932
## elapsed = 0.17 Round = 54 logCost = -1.9667 Value = -0.0661
## elapsed = 0.26 Round = 55 logCost = -1.6643 Value = -0.0644
## elapsed = 0.13 Round = 56 logCost = -5.0000 Value = -0.1373
## elapsed = 1.45 Round = 57 logCost = 1.3483 Value = -0.0593
## elapsed = 0.91 Round = 58 logCost = 0.9726 Value = -0.0576
## elapsed = 0.53 Round = 59 logCost = 0.1759 Value = -0.0593
## elapsed = 1.00 Round = 60 logCost = 0.5298 Value = -0.0576
## elapsed = 1.50 Round = 61 logCost = 2.6920 Value = -0.0559
## elapsed = 0.53 Round = 62 logCost = 0.1837 Value = -0.0593
## elapsed = 1.33 Round = 63 logCost = 1.6624 Value = -0.0559
## elapsed = 1.18 Round = 64 logCost = 1.2573 Value = -0.0610
## elapsed = 0.68 Round = 65 logCost = 0.8416 Value = -0.0576
## elapsed = 0.79 Round = 66 logCost = 1.0708 Value = -0.0576
## elapsed = 0.70 Round = 67 logCost = 0.7018 Value = -0.0593
## elapsed = 1.13 Round = 68 logCost = 2.0346 Value = -0.0576
## elapsed = 0.33 Round = 69 logCost = 0.0781 Value = -0.0593
## elapsed = 0.92 Round = 70 logCost = 1.0846 Value = -0.0610
##
## Best Parameters Found:
## Round = 61 logCost = 2.6920 Value = -0.0559
OPT_Res$Best_Par
## logCost
## 2.692035
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 14.76168
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="linear",
cost=exp(OPT_Res$Best_Par["logCost"]), scale = FALSE)
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
sum(CV.error)
## [1] 0.0559322
svm_fit_bayes<-function(logCost, logGamma, Degree){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="polynomial",
cost=exp(logCost), gamma=exp(logGamma), degree=Degree)
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(234)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
logGamma = c(-9, -0.75),
Degree = c(1L, 5L)),
init_grid_dt = NULL, init_points = 50,
n_iter = 20, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 0.08 Round = 1 logCost = 13.6405 logGamma = -3.7612 Degree = 4.0000 Value = -0.1254
## elapsed = 0.11 Round = 2 logCost = 14.5428 logGamma = -5.8652 Degree = 4.0000 Value = -0.4746
## elapsed = 0.12 Round = 3 logCost = -4.4991 logGamma = -7.7302 Degree = 2.0000 Value = -0.6678
## elapsed = 0.33 Round = 4 logCost = 14.4021 logGamma = -2.3362 Degree = 3.0000 Value = -0.0542
## elapsed = 0.11 Round = 5 logCost = -3.3272 logGamma = -8.3482 Degree = 2.0000 Value = -0.6678
## elapsed = 0.11 Round = 6 logCost = 11.1199 logGamma = -7.8096 Degree = 5.0000 Value = -0.6678
## elapsed = 0.07 Round = 7 logCost = 18.2346 logGamma = -8.2880 Degree = 2.0000 Value = -0.0831
## elapsed = 0.07 Round = 8 logCost = 12.9411 logGamma = -5.1917 Degree = 2.0000 Value = -0.0746
## elapsed = 7.16 Round = 9 logCost = 18.1934 logGamma = -2.3282 Degree = 2.0000 Value = -0.0729
## elapsed = 0.11 Round = 10 logCost = 2.1058 logGamma = -3.3696 Degree = 3.0000 Value = -0.5237
## elapsed = 0.11 Round = 11 logCost = 8.8931 logGamma = -5.9625 Degree = 4.0000 Value = -0.6678
## elapsed = 0.08 Round = 12 logCost = 8.6925 logGamma = -4.9629 Degree = 2.0000 Value = -0.1441
## elapsed = 0.11 Round = 13 logCost = 9.5712 logGamma = -6.8974 Degree = 3.0000 Value = -0.6678
## elapsed = 0.07 Round = 14 logCost = 9.5747 logGamma = -1.8662 Degree = 3.0000 Value = -0.0559
## elapsed = 0.11 Round = 15 logCost = -4.9700 logGamma = -5.2061 Degree = 2.0000 Value = -0.6678
## elapsed = 0.07 Round = 16 logCost = 6.0279 logGamma = -5.7480 Degree = 1.0000 Value = -0.0780
## elapsed = 0.10 Round = 17 logCost = 2.8288 logGamma = -2.7361 Degree = 2.0000 Value = -0.2186
## elapsed = 0.07 Round = 18 logCost = 13.5004 logGamma = -4.8409 Degree = 2.0000 Value = -0.0712
## elapsed = 0.11 Round = 19 logCost = -1.5418 logGamma = -3.4781 Degree = 4.0000 Value = -0.6678
## elapsed = 0.08 Round = 20 logCost = 16.7944 logGamma = -4.0321 Degree = 3.0000 Value = -0.0559
## elapsed = 0.09 Round = 21 logCost = 8.0767 logGamma = -1.3042 Degree = 4.0000 Value = -0.0780
## elapsed = 0.11 Round = 22 logCost = 9.4776 logGamma = -6.1408 Degree = 3.0000 Value = -0.6051
## elapsed = 0.08 Round = 23 logCost = 16.6300 logGamma = -4.1688 Degree = 3.0000 Value = -0.0559
## elapsed = 0.11 Round = 24 logCost = 10.4356 logGamma = -6.4218 Degree = 3.0000 Value = -0.5966
## elapsed = 0.11 Round = 25 logCost = 7.2445 logGamma = -8.4438 Degree = 4.0000 Value = -0.6678
## elapsed = 0.11 Round = 26 logCost = 4.3700 logGamma = -5.8644 Degree = 2.0000 Value = -0.6678
## elapsed = 0.10 Round = 27 logCost = 12.4167 logGamma = -4.4647 Degree = 4.0000 Value = -0.3424
## elapsed = 0.11 Round = 28 logCost = -0.2072 logGamma = -5.7881 Degree = 4.0000 Value = -0.6678
## elapsed = 0.07 Round = 29 logCost = 15.9732 logGamma = -7.7657 Degree = 2.0000 Value = -0.0949
## elapsed = 0.07 Round = 30 logCost = 17.0627 logGamma = -7.9927 Degree = 2.0000 Value = -0.0915
## elapsed = 0.08 Round = 31 logCost = 10.4807 logGamma = -4.1554 Degree = 3.0000 Value = -0.1169
## elapsed = 0.11 Round = 32 logCost = 1.3078 logGamma = -4.5781 Degree = 2.0000 Value = -0.6678
## elapsed = 0.12 Round = 33 logCost = -0.5958 logGamma = -6.2720 Degree = 4.0000 Value = -0.6678
## elapsed = 0.10 Round = 34 logCost = 12.6207 logGamma = -2.0301 Degree = 5.0000 Value = -0.0729
## elapsed = 0.08 Round = 35 logCost = 8.3371 logGamma = -1.3244 Degree = 3.0000 Value = -0.0559
## elapsed = 0.09 Round = 36 logCost = 12.2033 logGamma = -3.6795 Degree = 5.0000 Value = -0.3102
## elapsed = 0.12 Round = 37 logCost = 12.5207 logGamma = -6.6158 Degree = 4.0000 Value = -0.6678
## elapsed = 0.10 Round = 38 logCost = -1.1632 logGamma = -1.1124 Degree = 2.0000 Value = -0.3661
## elapsed = 0.12 Round = 39 logCost = 7.6583 logGamma = -6.2656 Degree = 4.0000 Value = -0.6678
## elapsed = 0.08 Round = 40 logCost = 3.8680 logGamma = -2.2080 Degree = 2.0000 Value = -0.1102
## elapsed = 0.07 Round = 41 logCost = 9.5338 logGamma = -1.9580 Degree = 4.0000 Value = -0.0763
## elapsed = 3.62 Round = 42 logCost = 17.6858 logGamma = -1.9475 Degree = 5.0000 Value = -0.0864
## elapsed = 0.10 Round = 43 logCost = 16.1313 logGamma = -7.7448 Degree = 3.0000 Value = -0.4695
## elapsed = 0.11 Round = 44 logCost = 0.8071 logGamma = -7.7905 Degree = 4.0000 Value = -0.6678
## elapsed = 0.12 Round = 45 logCost = 11.4682 logGamma = -4.7437 Degree = 5.0000 Value = -0.5864
## elapsed = 9.95 Round = 46 logCost = 14.4283 logGamma = -0.8029 Degree = 5.0000 Value = -0.0864
## elapsed = 0.11 Round = 47 logCost = 1.0083 logGamma = -5.9347 Degree = 2.0000 Value = -0.6678
## elapsed = 0.08 Round = 48 logCost = 10.7092 logGamma = -5.9287 Degree = 2.0000 Value = -0.1356
## elapsed = 0.07 Round = 49 logCost = 9.8284 logGamma = -3.3159 Degree = 2.0000 Value = -0.0797
## elapsed = 0.11 Round = 50 logCost = -1.8423 logGamma = -4.7731 Degree = 3.0000 Value = -0.6678
## elapsed = 0.06 Round = 51 logCost = 11.9697 logGamma = -8.2641 Degree = 1.0000 Value = -0.0492
## elapsed = 0.06 Round = 52 logCost = 10.6776 logGamma = -5.7603 Degree = 1.0000 Value = -0.0492
## elapsed = 0.59 Round = 53 logCost = 18.7900 logGamma = -9.0000 Degree = 1.0000 Value = -0.0475
## elapsed = 16.08 Round = 54 logCost = 20.0000 logGamma = -0.7500 Degree = 1.0000 Value = -0.1068
## elapsed = 6.63 Round = 55 logCost = 20.0000 logGamma = -6.5306 Degree = 1.0000 Value = -0.0441
## elapsed = 14.12 Round = 56 logCost = 20.0000 logGamma = -3.4631 Degree = 1.0000 Value = -0.0644
## elapsed = 0.06 Round = 57 logCost = 5.6769 logGamma = -3.1207 Degree = 1.0000 Value = -0.0525
## elapsed = 0.49 Round = 58 logCost = 20.0000 logGamma = -5.4035 Degree = 2.0000 Value = -0.0695
## elapsed = 0.07 Round = 59 logCost = 20.0000 logGamma = -4.4330 Degree = 5.0000 Value = -0.1322
## elapsed = 9.20 Round = 60 logCost = 15.1902 logGamma = -0.7500 Degree = 1.0000 Value = -0.0475
## elapsed = 0.09 Round = 61 logCost = 6.2156 logGamma = -9.0000 Degree = 1.0000 Value = -0.3864
## elapsed = 0.12 Round = 62 logCost = 15.7092 logGamma = -7.3439 Degree = 1.0000 Value = -0.0475
## elapsed = 0.10 Round = 63 logCost = 14.4484 logGamma = -9.0000 Degree = 2.0000 Value = -0.3847
## elapsed = 0.08 Round = 64 logCost = 20.0000 logGamma = -7.6508 Degree = 2.0000 Value = -0.0763
## elapsed = 12.23 Round = 65 logCost = 20.0000 logGamma = -0.7500 Degree = 3.0000 Value = -0.2271
## elapsed = 5.27 Round = 66 logCost = 12.5897 logGamma = -0.7500 Degree = 4.0000 Value = -0.0763
## elapsed = 0.07 Round = 67 logCost = 2.8518 logGamma = -1.3725 Degree = 1.0000 Value = -0.0627
## elapsed = 0.09 Round = 68 logCost = 8.3254 logGamma = -0.7500 Degree = 1.0000 Value = -0.0508
## elapsed = 3.70 Round = 69 logCost = 13.7655 logGamma = -0.7500 Degree = 2.0000 Value = -0.0695
## elapsed = 0.07 Round = 70 logCost = 9.3236 logGamma = -7.0984 Degree = 1.0000 Value = -0.0559
##
## Best Parameters Found:
## Round = 55 logCost = 20.0000 logGamma = -6.5306 Degree = 1.0000 Value = -0.0441
OPT_Res$Best_Par
## logCost logGamma Degree
## 20.000000 -6.530609 1.000000
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 485165195
as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.001458117
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="polynomial",
cost=exp(OPT_Res$Best_Par["logCost"]),
gamma=exp(OPT_Res$Best_Par["logGamma"]),
degree=OPT_Res$Best_Par["Degree"])
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
sum(CV.error)
## [1] 0.0440678
svm_fit_bayes<-function(logCost, logGamma){
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="radial",
cost=exp(logCost), gamma=exp(logGamma))
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
list(Score=-sum(CV.error), pred=0)
}
set.seed(234)
OPT_Res<- BayesianOptimization(svm_fit_bayes, bounds= list(logCost = c(-5, 20),
logGamma = c(-9, -0.75)),
init_grid_dt = NULL, init_points = 50,
n_iter = 20, acq = "ucb", kappa =2.576,
eps=0, verbose = TRUE)
## elapsed = 0.20 Round = 1 logCost = 13.6405 logGamma = -3.7612 Value = -0.0492
## elapsed = 0.15 Round = 2 logCost = 14.5428 logGamma = -5.8652 Value = -0.0542
## elapsed = 0.14 Round = 3 logCost = -4.4991 logGamma = -7.7302 Value = -0.6678
## elapsed = 0.88 Round = 4 logCost = 14.4021 logGamma = -2.3362 Value = -0.0576
## elapsed = 0.13 Round = 5 logCost = -3.3272 logGamma = -8.3482 Value = -0.6678
## elapsed = 0.07 Round = 6 logCost = 11.1199 logGamma = -7.8096 Value = -0.0525
## elapsed = 0.21 Round = 7 logCost = 18.2346 logGamma = -8.2880 Value = -0.0576
## elapsed = 0.10 Round = 8 logCost = 12.9411 logGamma = -5.1917 Value = -0.0542
## elapsed = 3.70 Round = 9 logCost = 18.1934 logGamma = -2.3282 Value = -0.0644
## elapsed = 0.10 Round = 10 logCost = 2.1058 logGamma = -3.3696 Value = -0.1051
## elapsed = 0.07 Round = 11 logCost = 8.8931 logGamma = -5.9625 Value = -0.0492
## elapsed = 0.07 Round = 12 logCost = 8.6925 logGamma = -4.9629 Value = -0.0373
## elapsed = 0.07 Round = 13 logCost = 9.5712 logGamma = -6.8974 Value = -0.0492
## elapsed = 0.07 Round = 14 logCost = 9.5747 logGamma = -1.8662 Value = -0.0492
## elapsed = 0.14 Round = 15 logCost = -4.9700 logGamma = -5.2061 Value = -0.6678
## elapsed = 0.08 Round = 16 logCost = 6.0279 logGamma = -5.7480 Value = -0.0576
## elapsed = 0.09 Round = 17 logCost = 2.8288 logGamma = -2.7361 Value = -0.0780
## elapsed = 0.14 Round = 18 logCost = 13.5004 logGamma = -4.8409 Value = -0.0508
## elapsed = 0.13 Round = 19 logCost = -1.5418 logGamma = -3.4781 Value = -0.5593
## elapsed = 1.88 Round = 20 logCost = 16.7944 logGamma = -4.0321 Value = -0.0508
## elapsed = 0.07 Round = 21 logCost = 8.0767 logGamma = -1.3042 Value = -0.0492
## elapsed = 0.07 Round = 22 logCost = 9.4776 logGamma = -6.1408 Value = -0.0458
## elapsed = 0.81 Round = 23 logCost = 16.6300 logGamma = -4.1688 Value = -0.0593
## elapsed = 0.07 Round = 24 logCost = 10.4356 logGamma = -6.4218 Value = -0.0390
## elapsed = 0.10 Round = 25 logCost = 7.2445 logGamma = -8.4438 Value = -0.1203
## elapsed = 0.10 Round = 26 logCost = 4.3700 logGamma = -5.8644 Value = -0.1305
## elapsed = 0.09 Round = 27 logCost = 12.4167 logGamma = -4.4647 Value = -0.0559
## elapsed = 0.13 Round = 28 logCost = -0.2072 logGamma = -5.7881 Value = -0.5593
## elapsed = 0.13 Round = 29 logCost = 15.9732 logGamma = -7.7657 Value = -0.0593
## elapsed = 0.15 Round = 30 logCost = 17.0627 logGamma = -7.9927 Value = -0.0644
## elapsed = 0.08 Round = 31 logCost = 10.4807 logGamma = -4.1554 Value = -0.0542
## elapsed = 0.13 Round = 32 logCost = 1.3078 logGamma = -4.5781 Value = -0.3203
## elapsed = 0.14 Round = 33 logCost = -0.5958 logGamma = -6.2720 Value = -0.6678
## elapsed = 0.31 Round = 34 logCost = 12.6207 logGamma = -2.0301 Value = -0.0627
## elapsed = 0.07 Round = 35 logCost = 8.3371 logGamma = -1.3244 Value = -0.0475
## elapsed = 0.11 Round = 36 logCost = 12.2033 logGamma = -3.6795 Value = -0.0441
## elapsed = 0.07 Round = 37 logCost = 12.5207 logGamma = -6.6158 Value = -0.0492
## elapsed = 0.12 Round = 38 logCost = -1.1632 logGamma = -1.1124 Value = -0.1712
## elapsed = 0.07 Round = 39 logCost = 7.6583 logGamma = -6.2656 Value = -0.0492
## elapsed = 0.07 Round = 40 logCost = 3.8680 logGamma = -2.2080 Value = -0.0627
## elapsed = 0.07 Round = 41 logCost = 9.5338 logGamma = -1.9580 Value = -0.0475
## elapsed = 2.56 Round = 42 logCost = 17.6858 logGamma = -1.9475 Value = -0.0627
## elapsed = 0.16 Round = 43 logCost = 16.1313 logGamma = -7.7448 Value = -0.0492
## elapsed = 0.14 Round = 44 logCost = 0.8071 logGamma = -7.7905 Value = -0.6678
## elapsed = 0.08 Round = 45 logCost = 11.4682 logGamma = -4.7437 Value = -0.0508
## elapsed = 0.34 Round = 46 logCost = 14.4283 logGamma = -0.8029 Value = -0.0610
## elapsed = 0.14 Round = 47 logCost = 1.0083 logGamma = -5.9347 Value = -0.5559
## elapsed = 0.07 Round = 48 logCost = 10.7092 logGamma = -5.9287 Value = -0.0407
## elapsed = 0.07 Round = 49 logCost = 9.8284 logGamma = -3.3159 Value = -0.0458
## elapsed = 0.14 Round = 50 logCost = -1.8423 logGamma = -4.7731 Value = -0.6237
## elapsed = 0.34 Round = 51 logCost = 20.0000 logGamma = -6.9054 Value = -0.0525
## elapsed = 0.14 Round = 52 logCost = -5.0000 logGamma = -0.7500 Value = -0.6678
## elapsed = 0.08 Round = 53 logCost = 2.1555 logGamma = -0.7500 Value = -0.0746
## elapsed = 0.55 Round = 54 logCost = 20.0000 logGamma = -0.7500 Value = -0.0627
## elapsed = 1.83 Round = 55 logCost = 20.0000 logGamma = -3.9462 Value = -0.0525
## elapsed = 0.07 Round = 56 logCost = 5.2349 logGamma = -3.4542 Value = -0.0542
## elapsed = 0.14 Round = 57 logCost = 4.0103 logGamma = -9.0000 Value = -0.5576
## elapsed = 0.08 Round = 58 logCost = 9.9047 logGamma = -9.0000 Value = -0.0610
## elapsed = 0.46 Round = 59 logCost = 17.3290 logGamma = -0.7500 Value = -0.0644
## elapsed = 0.20 Round = 60 logCost = 20.0000 logGamma = -7.8768 Value = -0.0525
## elapsed = 0.09 Round = 61 logCost = 0.4371 logGamma = -0.7500 Value = -0.0983
## elapsed = 0.35 Round = 62 logCost = 17.6451 logGamma = -6.6718 Value = -0.0458
## elapsed = 0.13 Round = 63 logCost = 20.0000 logGamma = -9.0000 Value = -0.0475
## elapsed = 0.11 Round = 64 logCost = 5.3181 logGamma = -7.1587 Value = -0.1339
## elapsed = 0.07 Round = 65 logCost = 5.4836 logGamma = -0.7500 Value = -0.0508
## elapsed = 0.26 Round = 66 logCost = 15.9008 logGamma = -0.7500 Value = -0.0610
## elapsed = 0.86 Round = 67 logCost = 20.0000 logGamma = -5.2973 Value = -0.0610
## elapsed = 0.09 Round = 68 logCost = 1.3582 logGamma = -2.1158 Value = -0.0966
## elapsed = 0.10 Round = 69 logCost = 10.6771 logGamma = -0.7500 Value = -0.0644
## elapsed = 0.06 Round = 70 logCost = 6.9453 logGamma = -2.7971 Value = -0.0458
##
## Best Parameters Found:
## Round = 12 logCost = 8.6925 logGamma = -4.9629 Value = -0.0373
OPT_Res$Best_Par
## logCost logGamma
## 8.692541 -4.962923
as.numeric(exp(OPT_Res$Best_Par["logCost"]))
## [1] 5958.305
as.numeric(exp(OPT_Res$Best_Par["logGamma"]))
## [1] 0.006992461
CV.error<-NULL
for (i in 1:5) {
valid.data <- subset(train.set, fold == i)
train.data <- subset(train.set, fold != i)
svmfit<-svm(Group~Longitude + Latitude + OverallAvg, data = train.data, kernel="radial",
cost=exp(OPT_Res$Best_Par["logCost"]),
gamma=exp(OPT_Res$Best_Par["logGamma"]))
svm.y<-valid.data$Group
svm.predy<-predict(svmfit, valid.data)
ith.test.error<- mean(svm.y!=svm.predy)
CV.error<-c(CV.error,(nrow(valid.data)/nrow(train.set))*ith.test.error)
}
sum(CV.error)
## [1] 0.03728814
logCost<-c(13.6405, 14.5428, -4.4991, 14.4021, -3.3272, 11.1199, 18.2346, 12.9411, 18.1934, 2.1058, 8.8931, 8.6925, 9.5712, 9.5747, -4.9700, 6.0279, 2.8288, 13.5004, -1.5418, 16.7944, 8.0767, 9.4776, 16.6300, 10.4356, 7.2445, 4.3700, 12.4167, -0.2072, 15.9732, 17.0627, 10.4807, 1.3078, -0.5958, 12.6207, 8.3371, 12.2033, 12.5207, -1.1632, 7.6583, 3.8680, 9.5338, 17.6858, 16.1313, 0.8071, 11.4682, 14.4283, 1.0083, 10.7092, 9.8284, -1.8423, 20.0000, -5.0000, 2.1555, 20.0000, 20.0000, 5.2349, 4.0103, 9.9047, 17.3290, 20.0000, 0.4371, 17.6451, 20.0000, 5.3181, 5.4836, 15.9008, 20.0000, 1.3582, 10.6771, 6.9453)
logGamma<-c(-3.7612, -5.8652, -7.7302, -2.3362, -8.3482, -7.8096, -8.2880, -5.1917, -2.3282, -3.3696, -5.9625, -4.9629, -6.8974, -1.8662, -5.2061, -5.7480, -2.7361, -4.8409, -3.4781, -4.0321, -1.3042, -6.1408, -4.1688, -6.4218, -8.4438, -5.8644, -4.4647, -5.7881, -7.7657, -7.9927, -4.1554, -4.5781, -6.2720, -2.0301, -1.3244, -3.6795, -6.6158, -1.1124, -6.2656, -2.2080, -1.9580, -1.9475, -7.7448, -7.7905, -4.7437, -0.8029, -5.9347, -5.9287, -3.3159, -4.7731, -6.9054, -0.7500, -0.7500, -0.7500, -3.9462, -3.4542, -9.0000, -9.0000, -0.7500, -7.8768, -0.7500, -6.6718, -9.0000, -7.1587, -0.7500, -0.7500, -5.2973, -2.1158, -0.7500, -2.7971)
Values<-c()
p <- ggplot() + geom_curve(aes(x = logGamma[51:69], y = logCost[51:69],
xend = logGamma[52:70], yend = logCost[52:70]),
arrow = arrow(length = unit(0.03, "npc")), curvature = 0.1)
p + geom_point(aes(x=logGamma[1:50], y=logCost[1:50])) +
geom_point(aes(x=logGamma[51:70], y=logCost[51:70]), colour = "red") +
labs(x = "Log Gamma", y = "Log Cost", title = " B.O Iterations")
Comparison between Bayesian Optimization and my sweep of hyperparameters.
SVM linear kernel:
SVM polynomial kernel:
SVM radial kernel: