library(GGally)
library(ggiraph)
library(ggplot2)
library(stringr)
library(DT)
library(corrplot)
library(klaR)
library(plotly)
library(plyr)
library(caret)
library(leaflet)
library(shiny)
library(MASS) # for LDA and QDA.
library(readxl)
library(readr)
library(e1071)
library(knitr)
library(DT)
ArcLakeGroupSummary <- read_excel("~/Desktop/EPSRC Project /ArcLakeGroupSummary.xlsx")
dundeedata <- read_csv("~/Desktop/EPSRC Project /dundeedata.csv.xls")
## Parsed with column specification:
## cols(
##   GloboLID = col_integer(),
##   `1_L_Area_Perimeter_UTM_(997L).AREA_sqkm` = col_double(),
##   `1_L_Area_Perimeter_UTM_(997L).PERIMETER_km` = col_double(),
##   KG_Coding = col_character(),
##   KG_Class = col_character(),
##   KG_ID = col_integer(),
##   KG_RGB = col_character(),
##   TEOW = col_character(),
##   Rocktype = col_character(),
##   GLiM = col_integer(),
##   RiverDensity = col_double(),
##   `1_c_Area_Perimeter_UTM_(996c).AREA_sqkm` = col_double(),
##   `1_c_Area_Perimeter_UTM_(996c).PERIMETER_km` = col_double()
## )
colnames(dundeedata)[1]<-"GloboLakes_ID" # change the GloboLID column name to GloboLakes_ID to make the merge easier.

Data<-merge(ArcLakeGroupSummary, dundeedata, by = "GloboLakes_ID", all = TRUE )
Data<-subset(Data, Group!="NA") # The data set is back to the original 732 rows just with extra columns of information

Data$Group<-as.factor(Data$Group)
set.seed(38)

library(caret)
train.index<-createDataPartition(Data$Group, p=0.8, list = FALSE)
train.set<-Data[train.index, ]
test.set<-Data[-train.index, ]

# Stratify the training set into 5 folds
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1694915 2.441827e-04 Non-Stratified
## 2     10 0.1728814 2.668454e-04 Non-Stratified
## 3     20 0.1677966 3.456893e-04 Non-Stratified
## 4     50 0.1728814 3.742551e-04 Non-Stratified
## 5    100 0.1728814 2.937329e-04 Non-Stratified
## 6      5 0.1661017 1.763365e-04     Stratified
## 7     10 0.1728814 8.961251e-05     Stratified
## 8     20 0.1711864 3.091951e-04     Stratified
## 9     50 0.1711864 2.463290e-04     Stratified
## 10   100 0.1694915 2.279250e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC2, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC2, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.3983051 0.0002226372 Non-Stratified
## 2     10 0.4000000 0.0003013183 Non-Stratified
## 3     20 0.3983051 0.0002924667 Non-Stratified
## 4     50 0.4000000 0.0004566931 Non-Stratified
## 5    100 0.4000000 0.0004556678 Non-Stratified
## 6      5 0.4050847 0.0002446748     Stratified
## 7     10 0.3983051 0.0001025774     Stratified
## 8     20 0.4000000 0.0001762496     Stratified
## 9     50 0.3932203 0.0002753981     Stratified
## 10   100 0.3949153 0.0003534662     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~PC1+PC2, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.04745763 1.551278e-04 Non-Stratified
## 2     10 0.04576271 1.024610e-04 Non-Stratified
## 3     20 0.04745763 8.082561e-05 Non-Stratified
## 4     50 0.04576271 5.917056e-05 Non-Stratified
## 5    100 0.04576271 6.931089e-05 Non-Stratified
## 6      5 0.04406780 3.856508e-05     Stratified
## 7     10 0.04915254 1.397393e-04     Stratified
## 8     20 0.04406780 3.840637e-05     Stratified
## 9     50 0.04406780 8.185923e-05     Stratified
## 10   100 0.04576271 7.985987e-05     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.3050847 5.745475e-05 Non-Stratified
## 2     10 0.2983051 1.098024e-04 Non-Stratified
## 3     20 0.2932203 4.356229e-04 Non-Stratified
## 4     50 0.2915254 2.664043e-04 Non-Stratified
## 5    100 0.2932203 3.321425e-04 Non-Stratified
## 6      5 0.2949153 3.002685e-04     Stratified
## 7     10 0.2898305 3.154574e-04     Stratified
## 8     20 0.2915254 3.325383e-04     Stratified
## 9     50 0.2915254 2.953677e-04     Stratified
## 10   100 0.2949153 3.426487e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.6491525 3.346739e-04 Non-Stratified
## 2     10 0.6508475 2.055603e-04 Non-Stratified
## 3     20 0.6525424 1.978002e-04 Non-Stratified
## 4     50 0.6491525 4.235472e-04 Non-Stratified
## 5    100 0.6508475 3.944826e-04 Non-Stratified
## 6      5 0.6491525 1.066235e-04     Stratified
## 7     10 0.6525424 5.185409e-05     Stratified
## 8     20 0.6491525 4.246543e-05     Stratified
## 9     50 0.6491525 1.102279e-04     Stratified
## 10   100 0.6508475 1.535187e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1677966 1.824188e-04 Non-Stratified
## 2     10 0.1593220 2.885505e-04 Non-Stratified
## 3     20 0.1559322 3.160609e-04 Non-Stratified
## 4     50 0.1610169 2.663012e-04 Non-Stratified
## 5    100 0.1576271 2.712402e-04 Non-Stratified
## 6      5 0.1627119 5.127479e-05     Stratified
## 7     10 0.1593220 3.313464e-04     Stratified
## 8     20 0.1542373 1.900090e-04     Stratified
## 9     50 0.1576271 1.638498e-04     Stratified
## 10   100 0.1542373 1.870022e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.4050847 0.0005271474 Non-Stratified
## 2     10 0.4050847 0.0003029142 Non-Stratified
## 3     20 0.3949153 0.0001894021 Non-Stratified
## 4     50 0.4000000 0.0003662488 Non-Stratified
## 5    100 0.3983051 0.0003662896 Non-Stratified
## 6      5 0.4000000 0.0001485166     Stratified
## 7     10 0.3966102 0.0002566395     Stratified
## 8     20 0.3966102 0.0001124670     Stratified
## 9     50 0.3949153 0.0002755572     Stratified
## 10   100 0.3966102 0.0002740585     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2847458 7.612755e-05 Non-Stratified
## 2     10 0.2830508 3.067445e-04 Non-Stratified
## 3     20 0.2830508 2.942464e-04 Non-Stratified
## 4     50 0.2847458 3.143213e-04 Non-Stratified
## 5    100 0.2898305 3.251403e-04 Non-Stratified
## 6      5 0.2966102 5.614980e-05     Stratified
## 7     10 0.2779661 2.062769e-04     Stratified
## 8     20 0.2813559 2.093759e-04     Stratified
## 9     50 0.2864407 2.371660e-04     Stratified
## 10   100 0.2847458 2.674923e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.05762712 9.623671e-05 Non-Stratified
## 2     10 0.05423729 1.327843e-04 Non-Stratified
## 3     20 0.05423729 9.985631e-05 Non-Stratified
## 4     50 0.05593220 8.526827e-05 Non-Stratified
## 5    100 0.05423729 9.031987e-05 Non-Stratified
## 6      5 0.05932203 2.772618e-05     Stratified
## 7     10 0.05254237 5.943084e-05     Stratified
## 8     20 0.05423729 1.196775e-04     Stratified
## 9     50 0.05593220 8.199551e-05     Stratified
## 10   100 0.05593220 7.648669e-05     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2152542 0.0001479460 Non-Stratified
## 2     10 0.2271186 0.0002310958 Non-Stratified
## 3     20 0.2237288 0.0002836928 Non-Stratified
## 4     50 0.2220339 0.0001823448 Non-Stratified
## 5    100 0.2237288 0.0002866622 Non-Stratified
## 6      5 0.2254237 0.0001148861     Stratified
## 7     10 0.2203390 0.0001510409     Stratified
## 8     20 0.2186441 0.0002080653     Stratified
## 9     50 0.2220339 0.0002087534     Stratified
## 10   100 0.2203390 0.0002536754     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.1406780 0.0002341281 Non-Stratified
## 2     10 0.1406780 0.0002429059 Non-Stratified
## 3     20 0.1423729 0.0003151564 Non-Stratified
## 4     50 0.1440678 0.0002964098 Non-Stratified
## 5    100 0.1423729 0.0002604658 Non-Stratified
## 6      5 0.1423729 0.0001305974     Stratified
## 7     10 0.1440678 0.0001474994     Stratified
## 8     20 0.1440678 0.0002673919     Stratified
## 9     50 0.1440678 0.0001434750     Stratified
## 10   100 0.1406780 0.0001946942     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.3966102 4.553289e-04 Non-Stratified
## 2     10 0.3932203 3.945226e-04 Non-Stratified
## 3     20 0.4033898 2.306619e-04 Non-Stratified
## 4     50 0.4000000 3.526925e-04 Non-Stratified
## 5    100 0.4016949 3.238653e-04 Non-Stratified
## 6      5 0.3915254 3.287400e-04     Stratified
## 7     10 0.3881356 9.669624e-05     Stratified
## 8     20 0.3966102 1.568625e-04     Stratified
## 9     50 0.3915254 2.946672e-04     Stratified
## 10   100 0.3932203 3.193421e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.09152542 1.321459e-04 Non-Stratified
## 2     10 0.09152542 1.736410e-04 Non-Stratified
## 3     20 0.09322034 1.060074e-04 Non-Stratified
## 4     50 0.09152542 1.144544e-04 Non-Stratified
## 5    100 0.09152542 1.257856e-04 Non-Stratified
## 6      5 0.08813559 6.979111e-05     Stratified
## 7     10 0.08983051 4.824734e-05     Stratified
## 8     20 0.08983051 1.281874e-04     Stratified
## 9     50 0.09322034 1.710002e-04     Stratified
## 10   100 0.09322034 1.591984e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.07796610 3.878196e-05 Non-Stratified
## 2     10 0.07457627 1.225701e-04 Non-Stratified
## 3     20 0.06610169 1.354032e-04 Non-Stratified
## 4     50 0.06440678 1.062222e-04 Non-Stratified
## 5    100 0.06440678 1.086016e-04 Non-Stratified
## 6      5 0.07118644 1.310797e-04     Stratified
## 7     10 0.06440678 1.431101e-04     Stratified
## 8     20 0.06610169 6.552619e-05     Stratified
## 9     50 0.06610169 1.044415e-04     Stratified
## 10   100 0.06440678 9.330015e-05     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds     Error Est.Variance   Partitioning
## 1      5 0.2254237 0.0003203103 Non-Stratified
## 2     10 0.2203390 0.0003191931 Non-Stratified
## 3     20 0.2101695 0.0001985282 Non-Stratified
## 4     50 0.2101695 0.0002048331 Non-Stratified
## 5    100 0.2084746 0.0002487935 Non-Stratified
## 6      5 0.2169492 0.0001244966     Stratified
## 7     10 0.2067797 0.0001956487     Stratified
## 8     20 0.2084746 0.0002011095     Stratified
## 9     50 0.2050847 0.0002265820     Stratified
## 10   100 0.2067797 0.0003193924     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.06949153 7.469118e-05 Non-Stratified
## 2     10 0.07966102 1.790673e-04 Non-Stratified
## 3     20 0.07118644 7.322896e-05 Non-Stratified
## 4     50 0.06779661 9.599520e-05 Non-Stratified
## 5    100 0.07118644 1.019125e-04 Non-Stratified
## 6      5 0.07118644 1.761383e-05     Stratified
## 7     10 0.07796610 2.311420e-04     Stratified
## 8     20 0.07627119 1.031133e-04     Stratified
## 9     50 0.07288136 1.463037e-04     Stratified
## 10   100 0.07288136 1.230996e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.09661017 1.120368e-04 Non-Stratified
## 2     10 0.10000000 1.305500e-04 Non-Stratified
## 3     20 0.09491525 9.698085e-05 Non-Stratified
## 4     50 0.09491525 1.177007e-04 Non-Stratified
## 5    100 0.10000000 1.318732e-04 Non-Stratified
## 6      5 0.09661017 2.446247e-04     Stratified
## 7     10 0.09491525 1.035357e-04     Stratified
## 8     20 0.09830508 1.059261e-04     Stratified
## 9     50 0.10000000 1.521168e-04     Stratified
## 10   100 0.10000000 1.584357e-04     Stratified
Matrix<-matrix(data=NA, nrow = 10, ncol = 4)
Matrix<-as.data.frame(Matrix)

colnames(Matrix)<-c("Folds", "Error", "Est.Variance", "Partitioning")
Matrix[1:5,4]<-"Non-Stratified"
Matrix[6:10,4]<-"Stratified"

# Non-Stratified

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[1,1]<-5
Matrix[1,2]<-info$qda_error_rate
Matrix[1,3]<-info$qda_sd_error_rate^2 # squaring the estimated standard deviation gives us the estimated                variance.

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[2,1]<-10
Matrix[2,2]<-info$qda_error_rate
Matrix[2,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[3,1]<-20
Matrix[3,2]<-info$qda_error_rate
Matrix[3,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[4,1]<-50
Matrix[4,2]<-info$qda_error_rate
Matrix[4,3]<-info$qda_sd_error_rate^2

info<-cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[5,1]<-100
Matrix[5,2]<-info$qda_error_rate
Matrix[5,3]<-info$qda_sd_error_rate^2

# Stratified

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=5, seed = 38)
Matrix[6,1]<-5
Matrix[6,2]<-info$qda_error_rate
Matrix[6,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=10, seed = 38)
Matrix[7,1]<-10
Matrix[7,2]<-info$qda_error_rate
Matrix[7,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=20, seed = 38)
Matrix[8,1]<-20
Matrix[8,2]<-info$qda_error_rate
Matrix[8,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=50, seed = 38)
Matrix[9,1]<-50
Matrix[9,2]<-info$qda_error_rate
Matrix[9,3]<-info$qda_sd_error_rate^2

info<-strat.cv.qda(data = train.set, model=Group~Latitude+Longitude+OverallAvg+OverallMeanAmp, y = "Group", K=100, seed = 38)
Matrix[10,1]<-100
Matrix[10,2]<-info$qda_error_rate
Matrix[10,3]<-info$qda_sd_error_rate^2

print(Matrix)
##    Folds      Error Est.Variance   Partitioning
## 1      5 0.08813559 9.767308e-05 Non-Stratified
## 2     10 0.09152542 2.502474e-04 Non-Stratified
## 3     20 0.08135593 8.055592e-05 Non-Stratified
## 4     50 0.08305085 1.129083e-04 Non-Stratified
## 5    100 0.08305085 1.121201e-04 Non-Stratified
## 6      5 0.08813559 1.613706e-04     Stratified
## 7     10 0.09152542 2.126416e-04     Stratified
## 8     20 0.08983051 1.486268e-04     Stratified
## 9     50 0.08644068 1.428786e-04     Stratified
## 10   100 0.08305085 1.333597e-04     Stratified