Merging & Looking at the Data
ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
flakefpc_csv <- read_csv("~/Desktop/EPSRC Project /flakefpc.csv.xls")
## Warning: Missing column names filled in: 'X1' [1]
colnames(flakefpc_csv)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.
Data1<-merge(ArcLakeGroupSummary[, c(1, 3, 11, 12)], flakefpc_csv, by = "GloboLakes_ID", all = TRUE )
Flake.Data <- Data1[, c(2, 5, 6)]
Flake.Data$Group<-factor(Flake.Data$Group)
colnames(Flake.Data)[2]<-"PC1"
colnames(Flake.Data)[3]<-"PC2"
PC.Data <- ArcLakeGroupSummary[, c( 3, 11, 12)]
PC.Data$Group<-factor(PC.Data$Group)
For PCs
svmfit<-svm(Group ~ PC1+PC2, data = PC.Data, kernel="radial", cost = 1538065, gamma = 0.02650774)
svm.y<-PC.Data$Group # This is the training error
svm.predy<-predict(svmfit, PC.Data) # This is the training error
svmfit
##
## Call:
## svm(formula = Group ~ PC1 + PC2, data = PC.Data, kernel = "radial",
## cost = 1538065, gamma = 0.02650774)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1538065
## gamma: 0.02650774
##
## Number of Support Vectors: 73
mean(svm.y!=svm.predy) # This is the training error
## [1] 0.009562842
## svm.predy
## svm.y 1 2 3 4 5 6 7 8 9
## 1 55 0 0 0 0 0 0 0 0
## 2 0 43 0 0 0 0 0 0 0
## 3 0 0 79 0 0 0 0 0 0
## 4 0 0 0 117 4 0 0 0 0
## 5 0 0 0 1 241 0 0 0 2
## 6 0 0 0 0 0 42 0 0 0
## 7 0 0 0 0 0 0 19 0 0
## 8 0 0 0 0 0 0 0 29 0
## 9 0 0 0 0 0 0 0 0 100
xgrid<-expand.grid(X1=seq(min(PC.Data$PC1), max(PC.Data$PC1), length.out = 150),
X2=seq(min(PC.Data$PC2), max(PC.Data$PC2), length.out =150))
colnames(xgrid)<-c("PC1", "PC2")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
r1<-ggplot(xgrid, aes(x=PC1,y=PC2)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = PC.Data[-svmfit$index,], aes(x=PC1, y=PC2, colour=Group)) +
geom_point(data = PC.Data[svmfit$index,], aes(x=PC1, y=PC2, colour=Group), shape=4) +
labs(colour = "Class", title="99.04% Correctly Classified")
r1

For Flake Data
svmfit<-svm(Group ~ PC1+PC2, data = PC.Data, kernel="radial", cost = 1538065, gamma = 0.02650774)
svm.y<-Flake.Data$Group
svm.predy<-predict(svmfit, Flake.Data)
svmfit
##
## Call:
## svm(formula = Group ~ PC1 + PC2, data = PC.Data, kernel = "radial",
## cost = 1538065, gamma = 0.02650774)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1538065
## gamma: 0.02650774
##
## Number of Support Vectors: 73
## [1] 0.1502732
## svm.predy
## svm.y 1 2 3 4 5 6 7 8 9
## 1 44 6 0 0 0 0 0 0 5
## 2 1 41 1 0 0 0 0 0 0
## 3 0 2 68 0 0 9 0 0 0
## 4 0 0 0 85 35 0 0 0 1
## 5 0 0 0 10 222 0 0 0 12
## 6 0 0 0 0 0 41 0 1 0
## 7 0 0 0 0 0 0 16 3 0
## 8 0 0 0 0 0 0 0 29 0
## 9 7 0 0 0 17 0 0 0 76
xgrid<-expand.grid(X1=seq(min(Flake.Data$PC1), max(Flake.Data$PC1), length.out = 150),
X2=seq(min(Flake.Data$PC2), max(Flake.Data$PC2), length.out =150))
colnames(xgrid)<-c("PC1", "PC2")
group.train.set.pred<-predict(svmfit, xgrid)
xgrid<-cbind(xgrid, group.train.set.pred)
r2<-ggplot(xgrid, aes(x=PC1,y=PC2)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = Flake.Data, aes(x=PC1, y=PC2, colour=Group)) +
labs(colour = "Class", title="84.97% Correctly Classified")
r2

QDA PCs
qdafit<-qda(Group ~ PC1+PC2, data = PC.Data)
qda.y<-PC.Data$Group # This is the training error
qda.predy<-predict(qdafit, PC.Data)$class # This is the training error
qdafit
## Call:
## qda(Group ~ PC1 + PC2, data = PC.Data)
##
## Prior probabilities of groups:
## 1 2 3 4 5 6
## 0.07513661 0.05874317 0.10792350 0.16530055 0.33333333 0.05737705
## 7 8 9
## 0.02595628 0.03961749 0.13661202
##
## Group means:
## PC1 PC2
## 1 23.66381 26.2872858
## 2 66.50153 26.5181533
## 3 124.82346 -0.5058272
## 4 -57.09390 -21.5758004
## 5 -42.14542 6.8994269
## 6 96.28750 -25.2998674
## 7 -12.99238 -62.4515947
## 8 41.41366 -56.0352703
## 9 -18.28499 22.5528829
mean(qda.y!=qda.predy) # This is the training error
## [1] 0.03961749
## qda.predy
## qda.y 1 2 3 4 5 6 7 8 9
## 1 54 0 0 0 0 0 0 0 1
## 2 1 42 0 0 0 0 0 0 0
## 3 0 0 79 0 0 0 0 0 0
## 4 0 0 0 115 6 0 0 0 0
## 5 0 0 0 6 232 0 0 0 6
## 6 0 0 0 0 0 42 0 0 0
## 7 0 0 0 0 0 0 18 1 0
## 8 0 0 0 0 0 0 0 29 0
## 9 1 0 0 0 7 0 0 0 92
xgrid<-expand.grid(X1=seq(min(PC.Data$PC1), max(PC.Data$PC1), length.out = 150),
X2=seq(min(PC.Data$PC2), max(PC.Data$PC2), length.out =150))
colnames(xgrid)<-c("PC1", "PC2")
group.train.set.pred<-predict(qdafit, xgrid)$class
xgrid<-cbind(xgrid, group.train.set.pred)
r3<-ggplot(xgrid, aes(x=PC1,y=PC2)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = PC.Data, aes(x=PC1, y=PC2, colour=Group)) +
labs(colour = "Class", title="96.04% Correctly Classified")
r3

QDA FLAKE
qdafit<-qda(Group ~ PC1+PC2, data = PC.Data)
qda.y<-Flake.Data$Group
qda.predy<-predict(qdafit, Flake.Data)$class
qdafit
## Call:
## qda(Group ~ PC1 + PC2, data = PC.Data)
##
## Prior probabilities of groups:
## 1 2 3 4 5 6
## 0.07513661 0.05874317 0.10792350 0.16530055 0.33333333 0.05737705
## 7 8 9
## 0.02595628 0.03961749 0.13661202
##
## Group means:
## PC1 PC2
## 1 23.66381 26.2872858
## 2 66.50153 26.5181533
## 3 124.82346 -0.5058272
## 4 -57.09390 -21.5758004
## 5 -42.14542 6.8994269
## 6 96.28750 -25.2998674
## 7 -12.99238 -62.4515947
## 8 41.41366 -56.0352703
## 9 -18.28499 22.5528829
## [1] 0.1489071
## qda.predy
## qda.y 1 2 3 4 5 6 7 8 9
## 1 43 6 0 0 0 0 0 0 6
## 2 1 41 1 0 0 0 0 0 0
## 3 0 2 69 0 0 8 0 0 0
## 4 0 0 0 80 40 0 0 0 1
## 5 0 0 0 9 228 0 0 0 7
## 6 0 0 0 0 0 40 0 2 0
## 7 0 0 0 0 0 0 18 1 0
## 8 0 0 0 0 0 1 1 27 0
## 9 2 0 0 0 21 0 0 0 77
xgrid<-expand.grid(X1=seq(min(Flake.Data$PC1), max(Flake.Data$PC1), length.out = 150),
X2=seq(min(Flake.Data$PC2), max(Flake.Data$PC2), length.out =150))
colnames(xgrid)<-c("PC1", "PC2")
group.train.set.pred<-predict(qdafit, xgrid)$class
xgrid<-cbind(xgrid, group.train.set.pred)
r4<-ggplot(xgrid, aes(x=PC1,y=PC2)) +
geom_point(aes(colour=group.train.set.pred), alpha = 1/10) +
geom_point(data = Flake.Data, aes(x=PC1, y=PC2, colour=Group)) +
labs(colour = "Class", title="85.11% Correctly Classified")
r4
