###Assignment 14.

#Part 1: Classification of Wines

library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(raster)
## Warning: package 'raster' was built under R version 4.0.5
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.0.4
library(rgdal)
## Warning: package 'rgdal' was built under R version 4.0.5
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj
library(tmap)
## Warning: package 'tmap' was built under R version 4.0.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
library(caret)
## Loading required package: lattice
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
## 
##     intersect, select, union
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(sf)
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.0.5
## 
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
## 
##     precision, recall
## The following object is masked from 'package:pROC':
## 
##     auc
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(gvlma)
library(spdep)
## Warning: package 'spdep' was built under R version 4.0.5
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.0.5
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spgwr)
## Warning: package 'spgwr' was built under R version 4.0.5
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(ModelMetrics)
## 
## Attaching package: 'ModelMetrics'
## The following objects are masked from 'package:Metrics':
## 
##     auc, ce, logLoss, mae, mse, msle, precision, recall, rmse, rmsle
## The following objects are masked from 'package:caret':
## 
##     confusionMatrix, precision, recall, sensitivity, specificity
## The following object is masked from 'package:pROC':
## 
##     auc
## The following object is masked from 'package:base':
## 
##     kappa
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## The following objects are masked from 'package:raster':
## 
##     buffer, rotated
library(readr)
wine <- read_csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/wine_data.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   class = col_character(),
##   Alc = col_double(),
##   Mal = col_double(),
##   Ash = col_double(),
##   Alca = col_double(),
##   Mag = col_double(),
##   Phen = col_double(),
##   Flav = col_double(),
##   Nonflav = col_double(),
##   Pro = col_double(),
##   ColorInt = col_double(),
##   Hue = col_double(),
##   Dil = col_double(),
##   Proline = col_double()
## )
set.seed(49)
val <- wine %>% group_by(class) %>% sample_n(30, replace = FALSE)
train <- setdiff(wine, val)
val<- as.data.frame(val)
train <- as.data.frame(train)
set.seed(42)
trainctrl <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
#Run models using caret

set.seed(49)
knn.model <- train(class~., data=train, method = "knn",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
dt.model <- train(class~., data=train, method = "rpart", 
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
rf.model <- train(class~., data=train, method = "rf", 
tuneLength = 10,
ntree=100,
importance=TRUE,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
svm.model <- train(class~., data=train, method = "svmRadial",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
knn.predict <-predict(knn.model, val)
dt.predict <-predict(dt.model, val)
rf.predict <-predict(rf.model, val)
svm.predict <-predict(svm.model, val)
caret::confusionMatrix(knn.predict, as.factor(val$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction WineA WineB WineC
##      WineA    30     2     0
##      WineB     0    28     1
##      WineC     0     0    29
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.9057, 0.9931)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: WineA Class: WineB Class: WineC
## Sensitivity                1.0000       0.9333       0.9667
## Specificity                0.9667       0.9833       1.0000
## Pos Pred Value             0.9375       0.9655       1.0000
## Neg Pred Value             1.0000       0.9672       0.9836
## Prevalence                 0.3333       0.3333       0.3333
## Detection Rate             0.3333       0.3111       0.3222
## Detection Prevalence       0.3556       0.3222       0.3222
## Balanced Accuracy          0.9833       0.9583       0.9833
caret::confusionMatrix(dt.predict, as.factor(val$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction WineA WineB WineC
##      WineA    26     5     0
##      WineB     4    24     1
##      WineC     0     1    29
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8778          
##                  95% CI : (0.7918, 0.9374)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8167          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: WineA Class: WineB Class: WineC
## Sensitivity                0.8667       0.8000       0.9667
## Specificity                0.9167       0.9167       0.9833
## Pos Pred Value             0.8387       0.8276       0.9667
## Neg Pred Value             0.9322       0.9016       0.9833
## Prevalence                 0.3333       0.3333       0.3333
## Detection Rate             0.2889       0.2667       0.3222
## Detection Prevalence       0.3444       0.3222       0.3333
## Balanced Accuracy          0.8917       0.8583       0.9750
caret::confusionMatrix(rf.predict, as.factor(val$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction WineA WineB WineC
##      WineA    27     0     0
##      WineB     3    29     0
##      WineC     0     1    30
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.8901, 0.9878)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9333          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: WineA Class: WineB Class: WineC
## Sensitivity                0.9000       0.9667       1.0000
## Specificity                1.0000       0.9500       0.9833
## Pos Pred Value             1.0000       0.9062       0.9677
## Neg Pred Value             0.9524       0.9828       1.0000
## Prevalence                 0.3333       0.3333       0.3333
## Detection Rate             0.3000       0.3222       0.3333
## Detection Prevalence       0.3000       0.3556       0.3444
## Balanced Accuracy          0.9500       0.9583       0.9917
caret::confusionMatrix(svm.predict, as.factor(val$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction WineA WineB WineC
##      WineA    29     0     0
##      WineB     1    30     1
##      WineC     0     0    29
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9778         
##                  95% CI : (0.922, 0.9973)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9667         
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: WineA Class: WineB Class: WineC
## Sensitivity                0.9667       1.0000       0.9667
## Specificity                1.0000       0.9667       1.0000
## Pos Pred Value             1.0000       0.9375       1.0000
## Neg Pred Value             0.9836       1.0000       0.9836
## Prevalence                 0.3333       0.3333       0.3333
## Detection Rate             0.3222       0.3333       0.3222
## Detection Prevalence       0.3222       0.3556       0.3222
## Balanced Accuracy          0.9833       0.9833       0.9833
rf.model.final <- rf.model$finalModel
importance(rf.model.final)
##               WineA      WineB      WineC MeanDecreaseAccuracy MeanDecreaseGini
## Alc       3.6607021  3.4134825  0.8036150            5.4418616        3.4916712
## Mal       2.1287432  2.0311387 -0.6576224            2.1956113        0.5235603
## Ash       0.0000000 -0.3868606  0.0000000           -0.3819779        0.2595416
## Alca      1.4632636  0.5246714  1.4507584            1.7205594        0.5300444
## Mag       1.8469094  0.6901531 -1.0050378            1.7365986        0.7370374
## Phen      2.6892831  0.4997858  2.7862972            3.2819460        1.6130932
## Flav      4.9747644  1.6874109  7.2791687            7.4746930        7.1216210
## Nonflav   0.6814502  1.0189200 -1.1017687            0.3848599        0.1405057
## Pro      -0.2000400  0.8378383  0.4476615            0.3935664        0.3751403
## ColorInt  7.1977421  8.8340892  9.1747753           11.7214428       15.0041466
## Hue       2.9670518  2.3898045  5.3062724            5.3033557        4.4562642
## Dil       4.2921833  2.3023016  5.5434501            6.1172550        5.5120552
## Proline  12.5843155  9.6098845  6.0264802           13.5241133       15.2655461

Questions for part 1:

Q1. Which algorithm yielded the highest overall accuracy?

A: Random forest and SVM had the same overall accuracy.

Q2. Which algorithm yielded the best Kappa statistic?

A: SVM had the highest kappa statistic fo 0.9657.

Q3. Based on the error matrices, what where the primary sources of confusion in this classification. Or, which wines were most confused?

A: Looking at the four error matrices, almost all misclassifications involved WineB. The decision tree classification had the worst accuracy overall.

Q4. Make a copy of the decision tree and provide it with your answers. What variables were used to split the data in the decision tree?

A: The decision tree used the ColorInt and Flav variables.

dt.model.final <- dt.model$finalModel
#prp(dt.model.final)

Q5. What variables were found to be most important based on the RF importance measures?

A: The varaibles ColorInt, Alc, Flav, Proline, and Dil had the highest importance meausres.

#Part 2: Forest Type Classification

training <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/training.csv")
validation <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/validation.csv")
band <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/Band_Order.csv")
train1 <- training %>% group_by(class) %>% sample_n(100, replace = FALSE)
set.seed(49)
trainctrl <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
set.seed(49)
knn.model1 <- train(class~., data=train1, method = "knn",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
dt.model1 <- train(class~., data=train1, method = "rpart", 
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
rf.model1 <- train(class~., data=train1, method = "rf", 
tuneLength = 10,
ntree=100,
importance=TRUE,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")


set.seed(49)
svm.model1 <- train(class~., data=train1, method = "svmRadial",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
knn.predict1 <-predict(knn.model1, validation)
dt.predict1 <-predict(dt.model1, validation)
rf.predict1 <-predict(rf.model1, validation)
svm.predict1 <-predict(svm.model1, validation)
caret::confusionMatrix(knn.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction ever  mix north  oak poak
##      ever  1626  116   381    3  144
##      mix     37  841   430  306   59
##      north  300  586  1048   26   16
##      oak     16  412   135 1154  277
##      poak    21   45     6  511 1504
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6173          
##                  95% CI : (0.6077, 0.6268)
##     No Information Rate : 0.2             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.5216          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity               0.8130     0.4205       0.5240     0.5770      0.7520
## Specificity               0.9195     0.8960       0.8840     0.8950      0.9271
## Pos Pred Value            0.7163     0.5027       0.5304     0.5787      0.7207
## Neg Pred Value            0.9516     0.8608       0.8814     0.8943      0.9373
## Prevalence                0.2000     0.2000       0.2000     0.2000      0.2000
## Detection Rate            0.1626     0.0841       0.1048     0.1154      0.1504
## Detection Prevalence      0.2270     0.1673       0.1976     0.1994      0.2087
## Balanced Accuracy         0.8662     0.6583       0.7040     0.7360      0.8396
caret::confusionMatrix(dt.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction ever  mix north  oak poak
##      ever  1463  106   334    0   77
##      mix     57  652   412  148   44
##      north  425  547  1078   98   73
##      oak     18  626   163 1321  389
##      poak    37   69    13  433 1417
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5931          
##                  95% CI : (0.5834, 0.6028)
##     No Information Rate : 0.2             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.4914          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity               0.7315     0.3260       0.5390     0.6605      0.7085
## Specificity               0.9354     0.9174       0.8571     0.8505      0.9310
## Pos Pred Value            0.7389     0.4966       0.4854     0.5248      0.7197
## Neg Pred Value            0.9330     0.8448       0.8815     0.9093      0.9274
## Prevalence                0.2000     0.2000       0.2000     0.2000      0.2000
## Detection Rate            0.1463     0.0652       0.1078     0.1321      0.1417
## Detection Prevalence      0.1980     0.1313       0.2221     0.2517      0.1969
## Balanced Accuracy         0.8334     0.6217       0.6981     0.7555      0.8197
caret::confusionMatrix(rf.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction ever  mix north  oak poak
##      ever  1632   90   328    1   73
##      mix     30  868   420  183   26
##      north  297  541  1145   37   14
##      oak     25  458   101 1308  306
##      poak    16   43     6  471 1581
## 
## Overall Statistics
##                                          
##                Accuracy : 0.6534         
##                  95% CI : (0.644, 0.6627)
##     No Information Rate : 0.2            
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.5668         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
## 
## Statistics by Class:
## 
##                      Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity               0.8160     0.4340       0.5725     0.6540      0.7905
## Specificity               0.9385     0.9176       0.8889     0.8888      0.9330
## Pos Pred Value            0.7684     0.5684       0.5629     0.5951      0.7468
## Neg Pred Value            0.9533     0.8664       0.8927     0.9113      0.9468
## Prevalence                0.2000     0.2000       0.2000     0.2000      0.2000
## Detection Rate            0.1632     0.0868       0.1145     0.1308      0.1581
## Detection Prevalence      0.2124     0.1527       0.2034     0.2198      0.2117
## Balanced Accuracy         0.8772     0.6758       0.7307     0.7714      0.8618
caret::confusionMatrix(svm.predict1, as.factor(validation$class)) 
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction ever  mix north  oak poak
##      ever  1577   82   262    0   73
##      mix     36  949   462  172    6
##      north  353  517  1163   11    4
##      oak     23  431   111 1394  304
##      poak    11   21     2  423 1613
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6696          
##                  95% CI : (0.6603, 0.6788)
##     No Information Rate : 0.2             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.587           
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity               0.7885     0.4745       0.5815     0.6970      0.8065
## Specificity               0.9479     0.9155       0.8894     0.8914      0.9429
## Pos Pred Value            0.7909     0.5840       0.5679     0.6160      0.7792
## Neg Pred Value            0.9472     0.8745       0.8947     0.9217      0.9512
## Prevalence                0.2000     0.2000       0.2000     0.2000      0.2000
## Detection Rate            0.1577     0.0949       0.1163     0.1394      0.1613
## Detection Prevalence      0.1994     0.1625       0.2048     0.2263      0.2070
## Balanced Accuracy         0.8682     0.6950       0.7354     0.7942      0.8747
#names(all) <- c("all.1","all.10","all.11","all.12","all.13","all.14","all.15","all.16","all.17","all.18","all.19","all.2","all.20","all.21","all.22","all.23","all.24","all.25","all.26","all.27","all.28","all.29","all.3","all.3","all.30","all.31","all.32","all.33","all.34","all.35","all.36","all.37","all.38","all.39","all.4","all.40","all.41","all.42","all.43","all.44","all.45","all.46","all.47","all.48", "all.49", "all.5", "all.50", "all.51", "all.52", "all.53", "all.54", "all.55", "all.56", "all.57", "all.58", "all.59", "all.6", "all.60", "all.61", "all.62", "all.63", "all.64", "all.65", "all.66", "all.67", "all.68", "all.69", "all.7", "all.70", "all.71", "all.72", "all.73", "all.74", "all.75", "all.76", "all.77", "all.78", "all.79", "all.8", "all.80", "all.81", "all.82", "all.83", "all.84", "all.85", "all.86", "all.87", "all.88", "all.89", "all.9") 

# could not get the names to apply, at first i did this to the 'train1' varaible, however realised i had to do this to the raster outout.
#knn_model
#predict_test <- predict(pred, knn_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#knn_raster_result <- raster("knn_class")
#masked_result1 <- mask*knn_raster_result
#tm_shape(masked_result1)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "KNN Model", title.size = 1.5)
#dt_model
#predict_test <- predict(pred, dt_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE, filename)
#dt_raster_result <- raster("dt_class")
#masked_result2 <- mask*dt_raster_result
#tm_shape(masked_result2)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "DT Model", title.size = 1.5)
#rf_model
#predict_test <- predict(pred, rf_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#rf_raster_result <- raster("rf_class")
#masked_result3 <- mask*rf_raster_result
#tm_shape(masked_result3)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "RF Model", title.size = 1.5)
#svm_model
#predict_test <- predict(pred, svm_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#svm_raster_result <- raster("svm_class")
#masked_result4 <- mask*svm_raster_result
#tm_shape(masked_result4)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "SVM Model", title.size = 1.5)
rf.model.final1 <- rf.model1$finalModel
importance(rf.model.final1)
##               ever         mix        north          oak        poak
## all.1   2.33986423  0.94218427 -1.079451156  1.197341704  2.44103581
## all.2  -0.13251720  1.73714729  0.610240659  0.220128330  2.43610826
## all.3   2.15590088  2.55579350 -1.037808783  1.851369203  1.01468302
## all.4   1.87455152  0.57728000  0.577388866 -0.683716240  1.48398984
## all.5   2.36408461  2.15225593 -0.989842324  2.819082925  2.90579080
## all.6   3.86330484  1.46250204 -2.398904958 -0.657183407  5.60445181
## all.7   1.78265127  1.76873610  0.694103357 -0.238285407  1.34983447
## all.8   3.38669752  2.59314718  1.893846897  2.719918533  2.21305298
## all.9   3.36717326  4.65708085  1.297000438  3.623522381  0.57227969
## all.10  3.08922658  2.76226415 -0.807303511 -0.306613455  4.87986949
## all.11  0.73106458  1.14150642  0.507851625  0.169579429  3.38028960
## all.12  2.51988167  5.06225951  1.645352955  2.985402453  0.18430640
## all.13  4.61524987  3.01899782  0.087466946  4.406130857  2.88059568
## all.14  2.29696216  2.78511847  1.317147687  4.712274611  2.07962085
## all.15  3.57215291 -1.23417972 -1.378777901  2.564684197  1.67519978
## all.16  1.28693573  0.51502284  0.001738139  0.540264941  0.99731412
## all.17 -1.36880211 -2.21791682  2.087434644 -0.122024448  1.42842686
## all.18  1.65373377  1.18011431  2.359919906  0.305488127  1.09234878
## all.19  0.89665168 -0.45330273 -0.650168153  1.114893082  0.58332893
## all.20  0.73466137 -0.23380393 -0.399385571 -0.430464832  1.37699342
## all.21  2.07553367  1.00503782  1.153855976  0.182944251  2.34238783
## all.22 -0.52155328  0.17787152 -0.115738830 -0.399159362 -0.34670202
## all.23  2.86034369  5.18670916  0.903748464  2.106136602  3.34291083
## all.24 -0.91826283  0.34613776  2.848226501 -0.410089203  3.46901541
## all.25  0.77263487  0.74078870 -0.902755858  1.306086376 -1.42288306
## all.26  0.97431080  3.05857383 -0.761002576  0.772780282  1.35257295
## all.27  1.62680960  2.39478043  1.191232704 -0.572097660  0.43405201
## all.28  6.07569957  3.83432284 -1.768330290  6.117737588  2.12601818
## all.29  1.38135273 -2.24315634  0.538314754 -0.107994866 -0.42095263
## all.30  1.88124025  1.52193664 -1.186246976  0.155939814  3.62045611
## all.31  1.69808131  0.63932068 -0.154462281 -0.037816997  1.38811665
## all.32  1.77873910  0.70870613  1.247623827  0.597148435  1.34273603
## all.33  1.69321731  0.00000000 -1.233168410 -1.005037815 -0.93744041
## all.34  2.84284548 -0.01150385  1.733552299 -0.380937532  2.18076280
## all.35  1.52090888  0.64912717 -0.265045003  0.872200462 -0.06520337
## all.36  1.31399277  0.62292947 -0.615850773  1.151959666 -0.94384182
## all.37  1.47129410  1.94992484  3.036228762  5.468343355 -0.60632362
## all.38  1.18481602  2.79955885  0.912928614  1.265861395  2.25151069
## all.39  1.27394561  1.57765907  1.283753740 -0.825299202  1.07488854
## all.40  1.57446744 -1.20315890  1.469404163  2.032822087  0.57798789
## all.41  0.92962796  0.41833466  3.743968870  4.513599882  1.06328504
## all.42  1.33278518 -0.06655494  0.513557718  3.133203734  1.33330321
## all.43  0.70922373  0.37786762 -1.140924118  1.482612020  0.60511043
## all.44  1.03668676 -1.82868490  2.346098647 -0.301974917  2.22910311
## all.45 -1.00183203  2.65459068  0.484251598 -1.235433334  1.17224074
## all.46  2.45009774  1.65069837 -1.340783655 -1.096749878  0.33143045
## all.47  2.07893071 -0.95303668  0.593782859  0.411852173  1.71604341
## all.48  3.01960722  2.69405278  2.322458535 -0.610495218  4.94718911
## all.49  1.77838699 -0.33157445 -0.306469014  2.182200110 -0.05254188
## all.50  2.58885315  0.60114140 -1.103532163 -1.005037815  0.78746133
## all.51  2.23771023  0.22958235  0.525589313  0.961578275  1.83227636
## all.52  2.18415731  4.85474376  7.740344347  4.703334111  9.98102040
## all.53  2.31125344  1.09940739 -1.641405752 -0.668949634  1.47432453
## all.54  2.21389662  0.22596932 -1.571826721  0.021050951  1.01175711
## all.55 -1.86762258  1.02514233  3.909977893  0.022200334  0.65610765
## all.56  2.22582306  2.09850904 -1.705207117 -0.646943367  1.96673717
## all.57  3.02794231  1.09227222  0.133260853  0.548996282  1.32396158
## all.58  0.07437449  1.54697962  4.390225865  0.919536701  0.85540981
## all.59  0.77462752  0.23022456  0.843196354  0.548318152  2.23438861
## all.60  2.10722974  1.68541636 -1.428347147  0.906698844 -0.33146681
## all.61 -0.18694038 -0.21844506  1.290534963  1.438946844 -0.49766629
## all.62  4.15036773  0.45354973  1.110735548 -0.812467007  0.90498897
## all.63  1.00503782  0.30105710  1.005037815 -1.178597970  1.28761895
## all.64  1.87738779 -0.45930651 -1.915473589 -1.688468452  0.63074241
## all.65  1.06227822 -0.65378668 -0.190447499  2.060928957 -0.05705317
## all.66  5.91325535 -2.22602067  2.170660152 -0.138866103  3.47950110
## all.67  0.42388217  1.01422496  0.021754609  0.298327913  1.50427220
## all.68  2.65314904 -0.14378728  0.208929175  0.333455832  0.57747105
## all.69 -0.12919664  0.62841192  2.918114475 -0.212741791 -0.10066026
## all.70  1.10156174  1.51517775 -0.462095403  1.241597085  2.98318011
## all.71 11.38498675 -0.86196592 10.960867520  8.351042872 16.23463541
## all.72  1.11638927  0.60095348  0.616859344  0.231260172 -0.45712426
## all.73 -1.17439142 -0.89106582  0.904401332 -0.740204310  1.89310187
## all.74  0.62180544 -0.71136762  0.282174685 -1.782616997  2.33662641
## all.75  3.03997337 -0.34740153 -0.357905237 -0.436348062  0.28415787
## all.76  1.81948318  0.21872602 -1.478234043 -0.197246399  1.62076987
## all.77  1.99879957 -1.29865820 -1.243422993  0.823731898 -0.71791914
## all.78  0.33058280 -0.76390171  1.821966230 -0.739620414 -1.44619537
## all.79 -0.52327489  0.02840870  2.136449209 -0.540464718 -0.96405148
## all.80 -1.15565153  1.04066562  1.677866656 -0.542873058  1.29345734
## all.81  2.58631355  0.89645219 -0.890754562  2.279824411 -0.67479207
## all.82  1.89466459 -1.45351946  0.263378637  0.822577972 -2.36020307
## all.83  0.27445017  1.23552776 -0.278985199  0.693709834 -0.67233293
## all.84  1.73744597  0.54760029 -2.485342883 -0.123699953 -0.16486454
## all.85  2.06423090 -1.14165604 -1.280038843  0.009863994  0.83092169
## all.86  0.77022799 -0.20381155  1.707840167  1.694211806  0.24756283
## all.87  0.24196601  0.90004112  0.654679128 -0.415226228  2.01300796
## all.88 -0.12149536  1.44808458  0.124790066  0.568666283 -0.47575017
## all.89 -0.46026798 -0.31769355  1.471595005  1.554353337  0.83844105
##        MeanDecreaseAccuracy MeanDecreaseGini
## all.1            2.38994729        4.2761145
## all.2            2.06607528        1.6730604
## all.3            2.94724112        5.2328727
## all.4            1.71422008        1.5401739
## all.5            3.21383349        5.1455515
## all.6            6.06700698        4.2146804
## all.7            2.07986261        2.2673081
## all.8            3.77918148        7.4245030
## all.9            5.46317264        8.9005648
## all.10           5.32204651        7.8947352
## all.11           3.02823599        1.7784507
## all.12           5.35893218        6.4469728
## all.13           5.34659643       15.5671847
## all.14           6.17142264        9.8243326
## all.15           4.55930544        2.5130964
## all.16           1.83042427        1.7480193
## all.17           0.64990521        1.3580779
## all.18           2.74240124        1.7769279
## all.19           1.38712385        0.8140621
## all.20           1.12852530        1.3760124
## all.21           2.68803811        0.8384222
## all.22          -0.71701078        0.8640131
## all.23           6.00722812       12.8794615
## all.24           3.29556625        4.8558756
## all.25           1.17666722        1.2030445
## all.26           2.72076516        2.5192793
## all.27           2.37139566        1.7796481
## all.28           7.91286806       26.7037708
## all.29          -0.82847360        3.1630354
## all.30           3.94697874        3.7085605
## all.31           2.21276291        3.3500351
## all.32           2.62760559        2.1869784
## all.33           1.50295848        0.5837549
## all.34           3.50969928        1.8348586
## all.35           1.68742150        1.4824160
## all.36           1.33486052        1.1754626
## all.37           5.64200218        7.4322231
## all.38           3.81551126        4.3028271
## all.39           2.08170756        1.5815296
## all.40           2.52872671        2.4248936
## all.41           4.99796971        6.0584873
## all.42           3.05851045        3.9102291
## all.43           0.82153800        3.2459309
## all.44           2.25075376        3.3298010
## all.45           1.95607612        3.9342314
## all.46           2.37140809        1.6702323
## all.47           2.40705123        1.8930974
## all.48           5.09522253        8.1262895
## all.49           1.97506187        1.5153295
## all.50           2.73843646        1.6638517
## all.51           2.78761348        3.6841138
## all.52          10.90178179       30.4118956
## all.53           2.28531517        1.7474956
## all.54           2.11324312        2.4801089
## all.55           3.27588279        3.2756865
## all.56           2.44608003        4.3168605
## all.57           3.20680217        4.3615595
## all.58           4.40032937        4.1846086
## all.59           2.20004745        2.1231182
## all.60           2.13488083        1.1637343
## all.61           0.86593528        0.9951390
## all.62           4.25369346        2.3945606
## all.63           0.85135758        0.8643895
## all.64           0.99235474        0.8053013
## all.65           1.63052035        2.4443377
## all.66           5.92494659        7.1440673
## all.67           1.70681326        1.1019673
## all.68           2.82227665        1.7042117
## all.69           2.48857226        3.3968484
## all.70           3.48776818        5.6936890
## all.71          16.94356778       53.5947647
## all.72           1.08865809        1.4953152
## all.73           0.62333202        2.6405992
## all.74           1.22523816        1.0394304
## all.75           1.54915973        3.1545937
## all.76           1.46493519        3.3002767
## all.77           0.91716056        2.2429708
## all.78          -0.12282588        2.0063177
## all.79           0.08884405        2.6834238
## all.80           1.55806010        2.3672658
## all.81           1.93273963        2.4384450
## all.82          -0.38872175        2.8527733
## all.83           1.19902145        4.1841188
## all.84           0.16155058        1.8710386
## all.85           0.66516248        3.0383082
## all.86           1.97647580        2.8645847
## all.87           1.74805599        4.1030804
## all.88           0.85683886        3.4110566
## all.89           1.25059838        3.5942316

Questions for part 2.

Q6. Which algorithm yielded the highest overall accuracy?

A: SVM had the highest accruacy overall.

Q7. Which algorithm yielded the best Kappa statistic?

A: SVM had the highest kappa value.

Q8. Which forest types proved most difficult to map?

A: Mixed for myself, was the most difficult to map.

Q9. Based on the error matrices, what where the primary sources of confusion in this classification. Or, which forest types were most confused.

A: They were all confused, but overall the mixed forest type was the hardest to map on this lab.

Q10. What variables were found to be most important based on the RF importance measures?

A: The most important variable was “all.71.” for the RF importance measures.