library(GGally)
library(ggiraph)
library(ggplot2)
library(stringr)
library(DT)
library(corrplot)
library(klaR)
library(plotly)
library(plyr)
library(caret)
library(leaflet)
library(shiny)
library(MASS) # for LDA and QDA.
library(readxl)
library(readr)
library(e1071)
library(knitr)
library(DT)
ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")
## Parsed with column specification:
## cols(
##   GloboLID = col_integer(),
##   `1_L_Area_Perimeter_UTM_(997L).AREA_sqkm` = col_double(),
##   `1_L_Area_Perimeter_UTM_(997L).PERIMETER_km` = col_double(),
##   KG_Coding = col_character(),
##   KG_Class = col_character(),
##   KG_ID = col_integer(),
##   KG_RGB = col_character(),
##   TEOW = col_character(),
##   Rocktype = col_character(),
##   GLiM = col_integer(),
##   RiverDensity = col_double(),
##   `1_c_Area_Perimeter_UTM_(996c).AREA_sqkm` = col_double(),
##   `1_c_Area_Perimeter_UTM_(996c).PERIMETER_km` = col_double()
## )
colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.

Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information

Data$Group<-as.factor(Data$Group)
set.seed(35)

library(caret)
train.index<-createDataPartition(Data$Group, p=0.8, list = FALSE)
train.set<-Data[train.index, ]
test.set<-Data[-train.index, ]

# Stratify the training set into 5 folds
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1627119 8.905487e-05 Non-Stratified
## 2     10 0.1610169 2.186473e-04 Non-Stratified
## 3     20 0.1661017 2.099586e-04 Non-Stratified
## 4     50 0.1677966 2.708457e-04 Non-Stratified
## 5    100 0.1677966 2.689686e-04 Non-Stratified
## 6      5 0.1542373 2.856142e-04     Stratified
## 7     10 0.1610169 5.780209e-05     Stratified
## 8     20 0.1576271 2.025203e-04     Stratified
## 9     50 0.1644068 2.109892e-04     Stratified
## 10   100 0.1644068 2.077677e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.4000000 2.039644e-04 Non-Stratified
## 2     10 0.3983051 4.165470e-04 Non-Stratified
## 3     20 0.4033898 2.318238e-04 Non-Stratified
## 4     50 0.4033898 3.160342e-04 Non-Stratified
## 5    100 0.4016949 2.883266e-04 Non-Stratified
## 6      5 0.4016949 4.472328e-05     Stratified
## 7     10 0.3983051 1.753652e-04     Stratified
## 8     20 0.3949153 2.424563e-04     Stratified
## 9     50 0.4016949 1.735399e-04     Stratified
## 10   100 0.4016949 3.422186e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.04576271 1.867280e-05 Non-Stratified
## 2     10 0.04406780 3.957994e-05 Non-Stratified
## 3     20 0.04576271 3.755668e-05 Non-Stratified
## 4     50 0.04745763 7.281769e-05 Non-Stratified
## 5    100 0.04745763 7.053984e-05 Non-Stratified
## 6      5 0.04237288 5.802235e-05     Stratified
## 7     10 0.04576271 1.018619e-04     Stratified
## 8     20 0.04406780 7.395330e-05     Stratified
## 9     50 0.04745763 7.413477e-05     Stratified
## 10   100 0.04576271 8.081163e-05     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.3000000 0.0003059466 Non-Stratified
## 2     10 0.2915254 0.0005221999 Non-Stratified
## 3     20 0.3050847 0.0002939255 Non-Stratified
## 4     50 0.3067797 0.0002728585 Non-Stratified
## 5    100 0.3118644 0.0002951246 Non-Stratified
## 6      5 0.3000000 0.0004472251     Stratified
## 7     10 0.3050847 0.0001286575     Stratified
## 8     20 0.3016949 0.0004303967     Stratified
## 9     50 0.3084746 0.0003090873     Stratified
## 10   100 0.3084746 0.0003002881     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.6491525 1.910371e-04 Non-Stratified
## 2     10 0.6474576 5.094322e-04 Non-Stratified
## 3     20 0.6559322 2.354334e-04 Non-Stratified
## 4     50 0.6542373 2.495943e-04 Non-Stratified
## 5    100 0.6542373 2.791874e-04 Non-Stratified
## 6      5 0.6491525 1.516560e-04     Stratified
## 7     10 0.6525424 3.144451e-05     Stratified
## 8     20 0.6542373 6.920554e-05     Stratified
## 9     50 0.6542373 1.135882e-04     Stratified
## 10   100 0.6542373 1.131025e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1593220 1.005458e-05 Non-Stratified
## 2     10 0.1542373 4.755977e-05 Non-Stratified
## 3     20 0.1576271 1.863061e-04 Non-Stratified
## 4     50 0.1542373 3.029877e-04 Non-Stratified
## 5    100 0.1542373 2.316622e-04 Non-Stratified
## 6      5 0.1508475 1.462038e-04     Stratified
## 7     10 0.1457627 8.131736e-05     Stratified
## 8     20 0.1491525 1.114989e-04     Stratified
## 9     50 0.1525424 1.913996e-04     Stratified
## 10   100 0.1525424 1.712387e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.4050847 0.0000459638 Non-Stratified
## 2     10 0.4050847 0.0004369753 Non-Stratified
## 3     20 0.4101695 0.0002739518 Non-Stratified
## 4     50 0.4152542 0.0004184709 Non-Stratified
## 5    100 0.4135593 0.0003665309 Non-Stratified
## 6      5 0.4203390 0.0001103696     Stratified
## 7     10 0.4152542 0.0001257227     Stratified
## 8     20 0.4118644 0.0002342849     Stratified
## 9     50 0.4135593 0.0002229094     Stratified
## 10   100 0.4135593 0.0002755137     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2694915 0.0004337834 Non-Stratified
## 2     10 0.2711864 0.0004341026 Non-Stratified
## 3     20 0.2711864 0.0002347208 Non-Stratified
## 4     50 0.2762712 0.0002713166 Non-Stratified
## 5    100 0.2813559 0.0002811347 Non-Stratified
## 6      5 0.2881356 0.0002618622     Stratified
## 7     10 0.2779661 0.0002462504     Stratified
## 8     20 0.2847458 0.0003507084     Stratified
## 9     50 0.2813559 0.0003440222     Stratified
## 10   100 0.2847458 0.0002578309     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.04915254 4.596380e-05 Non-Stratified
## 2     10 0.05593220 1.279964e-04 Non-Stratified
## 3     20 0.05423729 6.172119e-05 Non-Stratified
## 4     50 0.04915254 5.426435e-05 Non-Stratified
## 5    100 0.04915254 6.429181e-05 Non-Stratified
## 6      5 0.05423729 1.157842e-05     Stratified
## 7     10 0.05084746 4.487324e-05     Stratified
## 8     20 0.05084746 6.831031e-05     Stratified
## 9     50 0.05084746 1.062618e-04     Stratified
## 10   100 0.04915254 9.167317e-05     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2254237 0.0001192186 Non-Stratified
## 2     10 0.2254237 0.0003386639 Non-Stratified
## 3     20 0.2220339 0.0001830249 Non-Stratified
## 4     50 0.2220339 0.0002109762 Non-Stratified
## 5    100 0.2237288 0.0002065937 Non-Stratified
## 6      5 0.2186441 0.0003414589     Stratified
## 7     10 0.2203390 0.0002413760     Stratified
## 8     20 0.2254237 0.0004166039     Stratified
## 9     50 0.2220339 0.0003508542     Stratified
## 10   100 0.2237288 0.0003204718     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1338983 1.752370e-04 Non-Stratified
## 2     10 0.1355932 1.915158e-04 Non-Stratified
## 3     20 0.1389831 1.435722e-04 Non-Stratified
## 4     50 0.1338983 1.877912e-04 Non-Stratified
## 5    100 0.1355932 1.678294e-04 Non-Stratified
## 6      5 0.1372881 1.425363e-05     Stratified
## 7     10 0.1389831 6.028036e-05     Stratified
## 8     20 0.1372881 2.032629e-04     Stratified
## 9     50 0.1389831 1.864347e-04     Stratified
## 10   100 0.1355932 1.401540e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.4135593 8.187302e-05 Non-Stratified
## 2     10 0.4135593 3.396214e-04 Non-Stratified
## 3     20 0.4135593 1.918133e-04 Non-Stratified
## 4     50 0.4169492 4.216544e-04 Non-Stratified
## 5    100 0.4186441 4.137946e-04 Non-Stratified
## 6      5 0.4118644 1.761805e-04     Stratified
## 7     10 0.4067797 1.359387e-04     Stratified
## 8     20 0.4118644 1.599069e-04     Stratified
## 9     50 0.4169492 2.427546e-04     Stratified
## 10   100 0.4152542 2.686508e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.08813559 6.176386e-05 Non-Stratified
## 2     10 0.08813559 4.979412e-05 Non-Stratified
## 3     20 0.08644068 1.625270e-04 Non-Stratified
## 4     50 0.08813559 1.309215e-04 Non-Stratified
## 5    100 0.08813559 1.343479e-04 Non-Stratified
## 6      5 0.08644068 5.669977e-05     Stratified
## 7     10 0.08983051 7.086856e-05     Stratified
## 8     20 0.08644068 1.537240e-04     Stratified
## 9     50 0.08983051 1.897113e-04     Stratified
## 10   100 0.08644068 1.526075e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.05423729 7.612755e-05 Non-Stratified
## 2     10 0.05423729 8.809729e-05 Non-Stratified
## 3     20 0.04915254 3.934453e-05 Non-Stratified
## 4     50 0.05423729 8.287888e-05 Non-Stratified
## 5    100 0.05084746 8.316049e-05 Non-Stratified
## 6      5 0.05762712 1.052475e-04     Stratified
## 7     10 0.05084746 8.490871e-05     Stratified
## 8     20 0.04745763 7.114697e-05     Stratified
## 9     50 0.04915254 9.429893e-05     Stratified
## 10   100 0.05084746 1.006815e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2271186 0.0002255099 Non-Stratified
## 2     10 0.2237288 0.0002157745 Non-Stratified
## 3     20 0.2288136 0.0001377798 Non-Stratified
## 4     50 0.2220339 0.0002216394 Non-Stratified
## 5    100 0.2203390 0.0002152402 Non-Stratified
## 6      5 0.2186441 0.0003109048     Stratified
## 7     10 0.2237288 0.0003399354     Stratified
## 8     20 0.2271186 0.0002830925     Stratified
## 9     50 0.2135593 0.0003496768     Stratified
## 10   100 0.2186441 0.0003360817     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.06610169 3.878196e-05 Non-Stratified
## 2     10 0.06610169 5.394363e-05 Non-Stratified
## 3     20 0.06101695 7.651983e-05 Non-Stratified
## 4     50 0.05932203 9.674716e-05 Non-Stratified
## 5    100 0.05932203 8.879461e-05 Non-Stratified
## 6      5 0.05762712 6.958333e-05     Stratified
## 7     10 0.05932203 3.918959e-05     Stratified
## 8     20 0.05932203 7.722765e-05     Stratified
## 9     50 0.05762712 1.302408e-04     Stratified
## 10   100 0.05762712 1.162857e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.08813559 3.303648e-05 Non-Stratified
## 2     10 0.08813559 6.256184e-05 Non-Stratified
## 3     20 0.09322034 1.573828e-04 Non-Stratified
## 4     50 0.08983051 1.581797e-04 Non-Stratified
## 5    100 0.08813559 1.455713e-04 Non-Stratified
## 6      5 0.09322034 1.447659e-04     Stratified
## 7     10 0.09322034 1.306155e-04     Stratified
## 8     20 0.08813559 9.498660e-05     Stratified
## 9     50 0.08983051 1.778303e-04     Stratified
## 10   100 0.08644068 1.655649e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 35)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 35)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 35)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 35)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 35)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.07118644 4.021833e-05 Non-Stratified
## 2     10 0.07118644 7.532957e-05 Non-Stratified
## 3     20 0.06440678 8.256728e-05 Non-Stratified
## 4     50 0.06610169 1.090595e-04 Non-Stratified
## 5    100 0.06271186 1.007082e-04 Non-Stratified
## 6      5 0.06610169 6.968849e-05     Stratified
## 7     10 0.06779661 8.886140e-05     Stratified
## 8     20 0.06610169 7.315631e-05     Stratified
## 9     50 0.06271186 1.124009e-04     Stratified
## 10   100 0.06271186 1.353914e-04     Stratified