###Assignment 14.
#Part 1: Classification of Wines
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.0.5
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(pROC)
## Warning: package 'pROC' was built under R version 4.0.5
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(raster)
## Warning: package 'raster' was built under R version 4.0.5
## Loading required package: sp
## Warning: package 'sp' was built under R version 4.0.4
library(rgdal)
## Warning: package 'rgdal' was built under R version 4.0.5
## rgdal: version: 1.5-23, (SVN revision 1121)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
## Path to GDAL shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
## Path to PROJ shared files: C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj
## PROJ CDN enabled: FALSE
## Linking to sp version:1.4-5
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
## Overwritten PROJ_LIB was C:/Users/jmhp2/OneDrive/Documents/R/win-library/4.0/rgdal/proj
library(tmap)
## Warning: package 'tmap' was built under R version 4.0.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
library(caret)
## Loading required package: lattice
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sf)
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.0.5
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
## The following object is masked from 'package:pROC':
##
## auc
library(car)
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(gvlma)
library(spdep)
## Warning: package 'spdep' was built under R version 4.0.5
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.0.5
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spgwr)
## Warning: package 'spgwr' was built under R version 4.0.5
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)
library(ModelMetrics)
##
## Attaching package: 'ModelMetrics'
## The following objects are masked from 'package:Metrics':
##
## auc, ce, logLoss, mae, mse, msle, precision, recall, rmse, rmsle
## The following objects are masked from 'package:caret':
##
## confusionMatrix, precision, recall, sensitivity, specificity
## The following object is masked from 'package:pROC':
##
## auc
## The following object is masked from 'package:base':
##
## kappa
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following objects are masked from 'package:raster':
##
## buffer, rotated
library(readr)
wine <- read_csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/wine_data.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## class = col_character(),
## Alc = col_double(),
## Mal = col_double(),
## Ash = col_double(),
## Alca = col_double(),
## Mag = col_double(),
## Phen = col_double(),
## Flav = col_double(),
## Nonflav = col_double(),
## Pro = col_double(),
## ColorInt = col_double(),
## Hue = col_double(),
## Dil = col_double(),
## Proline = col_double()
## )
set.seed(49)
val <- wine %>% group_by(class) %>% sample_n(30, replace = FALSE)
train <- setdiff(wine, val)
val<- as.data.frame(val)
train <- as.data.frame(train)
set.seed(42)
trainctrl <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
#Run models using caret
set.seed(49)
knn.model <- train(class~., data=train, method = "knn",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
dt.model <- train(class~., data=train, method = "rpart",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
rf.model <- train(class~., data=train, method = "rf",
tuneLength = 10,
ntree=100,
importance=TRUE,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
svm.model <- train(class~., data=train, method = "svmRadial",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
knn.predict <-predict(knn.model, val)
dt.predict <-predict(dt.model, val)
rf.predict <-predict(rf.model, val)
svm.predict <-predict(svm.model, val)
caret::confusionMatrix(knn.predict, as.factor(val$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction WineA WineB WineC
## WineA 30 2 0
## WineB 0 28 1
## WineC 0 0 29
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9057, 0.9931)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: WineA Class: WineB Class: WineC
## Sensitivity 1.0000 0.9333 0.9667
## Specificity 0.9667 0.9833 1.0000
## Pos Pred Value 0.9375 0.9655 1.0000
## Neg Pred Value 1.0000 0.9672 0.9836
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3111 0.3222
## Detection Prevalence 0.3556 0.3222 0.3222
## Balanced Accuracy 0.9833 0.9583 0.9833
caret::confusionMatrix(dt.predict, as.factor(val$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction WineA WineB WineC
## WineA 26 5 0
## WineB 4 24 1
## WineC 0 1 29
##
## Overall Statistics
##
## Accuracy : 0.8778
## 95% CI : (0.7918, 0.9374)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8167
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: WineA Class: WineB Class: WineC
## Sensitivity 0.8667 0.8000 0.9667
## Specificity 0.9167 0.9167 0.9833
## Pos Pred Value 0.8387 0.8276 0.9667
## Neg Pred Value 0.9322 0.9016 0.9833
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.2889 0.2667 0.3222
## Detection Prevalence 0.3444 0.3222 0.3333
## Balanced Accuracy 0.8917 0.8583 0.9750
caret::confusionMatrix(rf.predict, as.factor(val$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction WineA WineB WineC
## WineA 27 0 0
## WineB 3 29 0
## WineC 0 1 30
##
## Overall Statistics
##
## Accuracy : 0.9556
## 95% CI : (0.8901, 0.9878)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9333
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: WineA Class: WineB Class: WineC
## Sensitivity 0.9000 0.9667 1.0000
## Specificity 1.0000 0.9500 0.9833
## Pos Pred Value 1.0000 0.9062 0.9677
## Neg Pred Value 0.9524 0.9828 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3000 0.3222 0.3333
## Detection Prevalence 0.3000 0.3556 0.3444
## Balanced Accuracy 0.9500 0.9583 0.9917
caret::confusionMatrix(svm.predict, as.factor(val$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction WineA WineB WineC
## WineA 29 0 0
## WineB 1 30 1
## WineC 0 0 29
##
## Overall Statistics
##
## Accuracy : 0.9778
## 95% CI : (0.922, 0.9973)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9667
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: WineA Class: WineB Class: WineC
## Sensitivity 0.9667 1.0000 0.9667
## Specificity 1.0000 0.9667 1.0000
## Pos Pred Value 1.0000 0.9375 1.0000
## Neg Pred Value 0.9836 1.0000 0.9836
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3222 0.3333 0.3222
## Detection Prevalence 0.3222 0.3556 0.3222
## Balanced Accuracy 0.9833 0.9833 0.9833
rf.model.final <- rf.model$finalModel
importance(rf.model.final)
## WineA WineB WineC MeanDecreaseAccuracy MeanDecreaseGini
## Alc 3.6607021 3.4134825 0.8036150 5.4418616 3.4916712
## Mal 2.1287432 2.0311387 -0.6576224 2.1956113 0.5235603
## Ash 0.0000000 -0.3868606 0.0000000 -0.3819779 0.2595416
## Alca 1.4632636 0.5246714 1.4507584 1.7205594 0.5300444
## Mag 1.8469094 0.6901531 -1.0050378 1.7365986 0.7370374
## Phen 2.6892831 0.4997858 2.7862972 3.2819460 1.6130932
## Flav 4.9747644 1.6874109 7.2791687 7.4746930 7.1216210
## Nonflav 0.6814502 1.0189200 -1.1017687 0.3848599 0.1405057
## Pro -0.2000400 0.8378383 0.4476615 0.3935664 0.3751403
## ColorInt 7.1977421 8.8340892 9.1747753 11.7214428 15.0041466
## Hue 2.9670518 2.3898045 5.3062724 5.3033557 4.4562642
## Dil 4.2921833 2.3023016 5.5434501 6.1172550 5.5120552
## Proline 12.5843155 9.6098845 6.0264802 13.5241133 15.2655461
Questions for part 1:
Q1. Which algorithm yielded the highest overall accuracy?
A: Random forest and SVM had the same overall accuracy.
Q2. Which algorithm yielded the best Kappa statistic?
A: SVM had the highest kappa statistic fo 0.9657.
Q3. Based on the error matrices, what where the primary sources of confusion in this classification. Or, which wines were most confused?
A: Looking at the four error matrices, almost all misclassifications involved WineB. The decision tree classification had the worst accuracy overall.
Q4. Make a copy of the decision tree and provide it with your answers. What variables were used to split the data in the decision tree?
A: The decision tree used the ColorInt and Flav variables.
dt.model.final <- dt.model$finalModel
#prp(dt.model.final)
Q5. What variables were found to be most important based on the RF importance measures?
A: The varaibles ColorInt, Alc, Flav, Proline, and Dil had the highest importance meausres.
#Part 2: Forest Type Classification
training <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/training.csv")
validation <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/validation.csv")
band <- read.csv("C:/Users/jmhp2/Downloads/ml_classification/ml_classification/Band_Order.csv")
train1 <- training %>% group_by(class) %>% sample_n(100, replace = FALSE)
set.seed(49)
trainctrl <- trainControl(method = "cv", number = 5, verboseIter = FALSE)
set.seed(49)
knn.model1 <- train(class~., data=train1, method = "knn",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
dt.model1 <- train(class~., data=train1, method = "rpart",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
rf.model1 <- train(class~., data=train1, method = "rf",
tuneLength = 10,
ntree=100,
importance=TRUE,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
set.seed(49)
svm.model1 <- train(class~., data=train1, method = "svmRadial",
tuneLength = 10,
preProcess = c("center", "scale"),
trControl = trainctrl,
metric="Kappa")
knn.predict1 <-predict(knn.model1, validation)
dt.predict1 <-predict(dt.model1, validation)
rf.predict1 <-predict(rf.model1, validation)
svm.predict1 <-predict(svm.model1, validation)
caret::confusionMatrix(knn.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction ever mix north oak poak
## ever 1626 116 381 3 144
## mix 37 841 430 306 59
## north 300 586 1048 26 16
## oak 16 412 135 1154 277
## poak 21 45 6 511 1504
##
## Overall Statistics
##
## Accuracy : 0.6173
## 95% CI : (0.6077, 0.6268)
## No Information Rate : 0.2
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5216
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity 0.8130 0.4205 0.5240 0.5770 0.7520
## Specificity 0.9195 0.8960 0.8840 0.8950 0.9271
## Pos Pred Value 0.7163 0.5027 0.5304 0.5787 0.7207
## Neg Pred Value 0.9516 0.8608 0.8814 0.8943 0.9373
## Prevalence 0.2000 0.2000 0.2000 0.2000 0.2000
## Detection Rate 0.1626 0.0841 0.1048 0.1154 0.1504
## Detection Prevalence 0.2270 0.1673 0.1976 0.1994 0.2087
## Balanced Accuracy 0.8662 0.6583 0.7040 0.7360 0.8396
caret::confusionMatrix(dt.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction ever mix north oak poak
## ever 1463 106 334 0 77
## mix 57 652 412 148 44
## north 425 547 1078 98 73
## oak 18 626 163 1321 389
## poak 37 69 13 433 1417
##
## Overall Statistics
##
## Accuracy : 0.5931
## 95% CI : (0.5834, 0.6028)
## No Information Rate : 0.2
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4914
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity 0.7315 0.3260 0.5390 0.6605 0.7085
## Specificity 0.9354 0.9174 0.8571 0.8505 0.9310
## Pos Pred Value 0.7389 0.4966 0.4854 0.5248 0.7197
## Neg Pred Value 0.9330 0.8448 0.8815 0.9093 0.9274
## Prevalence 0.2000 0.2000 0.2000 0.2000 0.2000
## Detection Rate 0.1463 0.0652 0.1078 0.1321 0.1417
## Detection Prevalence 0.1980 0.1313 0.2221 0.2517 0.1969
## Balanced Accuracy 0.8334 0.6217 0.6981 0.7555 0.8197
caret::confusionMatrix(rf.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction ever mix north oak poak
## ever 1632 90 328 1 73
## mix 30 868 420 183 26
## north 297 541 1145 37 14
## oak 25 458 101 1308 306
## poak 16 43 6 471 1581
##
## Overall Statistics
##
## Accuracy : 0.6534
## 95% CI : (0.644, 0.6627)
## No Information Rate : 0.2
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5668
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity 0.8160 0.4340 0.5725 0.6540 0.7905
## Specificity 0.9385 0.9176 0.8889 0.8888 0.9330
## Pos Pred Value 0.7684 0.5684 0.5629 0.5951 0.7468
## Neg Pred Value 0.9533 0.8664 0.8927 0.9113 0.9468
## Prevalence 0.2000 0.2000 0.2000 0.2000 0.2000
## Detection Rate 0.1632 0.0868 0.1145 0.1308 0.1581
## Detection Prevalence 0.2124 0.1527 0.2034 0.2198 0.2117
## Balanced Accuracy 0.8772 0.6758 0.7307 0.7714 0.8618
caret::confusionMatrix(svm.predict1, as.factor(validation$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction ever mix north oak poak
## ever 1577 82 262 0 73
## mix 36 949 462 172 6
## north 353 517 1163 11 4
## oak 23 431 111 1394 304
## poak 11 21 2 423 1613
##
## Overall Statistics
##
## Accuracy : 0.6696
## 95% CI : (0.6603, 0.6788)
## No Information Rate : 0.2
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.587
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: ever Class: mix Class: north Class: oak Class: poak
## Sensitivity 0.7885 0.4745 0.5815 0.6970 0.8065
## Specificity 0.9479 0.9155 0.8894 0.8914 0.9429
## Pos Pred Value 0.7909 0.5840 0.5679 0.6160 0.7792
## Neg Pred Value 0.9472 0.8745 0.8947 0.9217 0.9512
## Prevalence 0.2000 0.2000 0.2000 0.2000 0.2000
## Detection Rate 0.1577 0.0949 0.1163 0.1394 0.1613
## Detection Prevalence 0.1994 0.1625 0.2048 0.2263 0.2070
## Balanced Accuracy 0.8682 0.6950 0.7354 0.7942 0.8747
#names(all) <- c("all.1","all.10","all.11","all.12","all.13","all.14","all.15","all.16","all.17","all.18","all.19","all.2","all.20","all.21","all.22","all.23","all.24","all.25","all.26","all.27","all.28","all.29","all.3","all.3","all.30","all.31","all.32","all.33","all.34","all.35","all.36","all.37","all.38","all.39","all.4","all.40","all.41","all.42","all.43","all.44","all.45","all.46","all.47","all.48", "all.49", "all.5", "all.50", "all.51", "all.52", "all.53", "all.54", "all.55", "all.56", "all.57", "all.58", "all.59", "all.6", "all.60", "all.61", "all.62", "all.63", "all.64", "all.65", "all.66", "all.67", "all.68", "all.69", "all.7", "all.70", "all.71", "all.72", "all.73", "all.74", "all.75", "all.76", "all.77", "all.78", "all.79", "all.8", "all.80", "all.81", "all.82", "all.83", "all.84", "all.85", "all.86", "all.87", "all.88", "all.89", "all.9")
# could not get the names to apply, at first i did this to the 'train1' varaible, however realised i had to do this to the raster outout.
#knn_model
#predict_test <- predict(pred, knn_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#knn_raster_result <- raster("knn_class")
#masked_result1 <- mask*knn_raster_result
#tm_shape(masked_result1)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "KNN Model", title.size = 1.5)
#dt_model
#predict_test <- predict(pred, dt_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE, filename)
#dt_raster_result <- raster("dt_class")
#masked_result2 <- mask*dt_raster_result
#tm_shape(masked_result2)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "DT Model", title.size = 1.5)
#rf_model
#predict_test <- predict(pred, rf_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#rf_raster_result <- raster("rf_class")
#masked_result3 <- mask*rf_raster_result
#tm_shape(masked_result3)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "RF Model", title.size = 1.5)
#svm_model
#predict_test <- predict(pred, svm_model1, type="prob", index=2, na.rm=TRUE, progress="window", overwrite=TRUE)
#svm_raster_result <- raster("svm_class")
#masked_result4 <- mask*svm_raster_result
#tm_shape(masked_result4)+
#tm_raster(style="cat", labels=c("Evergreen", "Mixed", "Northern Hardwood", "Oak", "Pine-Oak"), palette = c("forestgreen", "darkorange1", "brown1", "cadetblue", "gold3"), title="Forest Type")+
#tm_layout(legend.outside = TRUE)+
#tm_layout(title = "SVM Model", title.size = 1.5)
rf.model.final1 <- rf.model1$finalModel
importance(rf.model.final1)
## ever mix north oak poak
## all.1 2.33986423 0.94218427 -1.079451156 1.197341704 2.44103581
## all.2 -0.13251720 1.73714729 0.610240659 0.220128330 2.43610826
## all.3 2.15590088 2.55579350 -1.037808783 1.851369203 1.01468302
## all.4 1.87455152 0.57728000 0.577388866 -0.683716240 1.48398984
## all.5 2.36408461 2.15225593 -0.989842324 2.819082925 2.90579080
## all.6 3.86330484 1.46250204 -2.398904958 -0.657183407 5.60445181
## all.7 1.78265127 1.76873610 0.694103357 -0.238285407 1.34983447
## all.8 3.38669752 2.59314718 1.893846897 2.719918533 2.21305298
## all.9 3.36717326 4.65708085 1.297000438 3.623522381 0.57227969
## all.10 3.08922658 2.76226415 -0.807303511 -0.306613455 4.87986949
## all.11 0.73106458 1.14150642 0.507851625 0.169579429 3.38028960
## all.12 2.51988167 5.06225951 1.645352955 2.985402453 0.18430640
## all.13 4.61524987 3.01899782 0.087466946 4.406130857 2.88059568
## all.14 2.29696216 2.78511847 1.317147687 4.712274611 2.07962085
## all.15 3.57215291 -1.23417972 -1.378777901 2.564684197 1.67519978
## all.16 1.28693573 0.51502284 0.001738139 0.540264941 0.99731412
## all.17 -1.36880211 -2.21791682 2.087434644 -0.122024448 1.42842686
## all.18 1.65373377 1.18011431 2.359919906 0.305488127 1.09234878
## all.19 0.89665168 -0.45330273 -0.650168153 1.114893082 0.58332893
## all.20 0.73466137 -0.23380393 -0.399385571 -0.430464832 1.37699342
## all.21 2.07553367 1.00503782 1.153855976 0.182944251 2.34238783
## all.22 -0.52155328 0.17787152 -0.115738830 -0.399159362 -0.34670202
## all.23 2.86034369 5.18670916 0.903748464 2.106136602 3.34291083
## all.24 -0.91826283 0.34613776 2.848226501 -0.410089203 3.46901541
## all.25 0.77263487 0.74078870 -0.902755858 1.306086376 -1.42288306
## all.26 0.97431080 3.05857383 -0.761002576 0.772780282 1.35257295
## all.27 1.62680960 2.39478043 1.191232704 -0.572097660 0.43405201
## all.28 6.07569957 3.83432284 -1.768330290 6.117737588 2.12601818
## all.29 1.38135273 -2.24315634 0.538314754 -0.107994866 -0.42095263
## all.30 1.88124025 1.52193664 -1.186246976 0.155939814 3.62045611
## all.31 1.69808131 0.63932068 -0.154462281 -0.037816997 1.38811665
## all.32 1.77873910 0.70870613 1.247623827 0.597148435 1.34273603
## all.33 1.69321731 0.00000000 -1.233168410 -1.005037815 -0.93744041
## all.34 2.84284548 -0.01150385 1.733552299 -0.380937532 2.18076280
## all.35 1.52090888 0.64912717 -0.265045003 0.872200462 -0.06520337
## all.36 1.31399277 0.62292947 -0.615850773 1.151959666 -0.94384182
## all.37 1.47129410 1.94992484 3.036228762 5.468343355 -0.60632362
## all.38 1.18481602 2.79955885 0.912928614 1.265861395 2.25151069
## all.39 1.27394561 1.57765907 1.283753740 -0.825299202 1.07488854
## all.40 1.57446744 -1.20315890 1.469404163 2.032822087 0.57798789
## all.41 0.92962796 0.41833466 3.743968870 4.513599882 1.06328504
## all.42 1.33278518 -0.06655494 0.513557718 3.133203734 1.33330321
## all.43 0.70922373 0.37786762 -1.140924118 1.482612020 0.60511043
## all.44 1.03668676 -1.82868490 2.346098647 -0.301974917 2.22910311
## all.45 -1.00183203 2.65459068 0.484251598 -1.235433334 1.17224074
## all.46 2.45009774 1.65069837 -1.340783655 -1.096749878 0.33143045
## all.47 2.07893071 -0.95303668 0.593782859 0.411852173 1.71604341
## all.48 3.01960722 2.69405278 2.322458535 -0.610495218 4.94718911
## all.49 1.77838699 -0.33157445 -0.306469014 2.182200110 -0.05254188
## all.50 2.58885315 0.60114140 -1.103532163 -1.005037815 0.78746133
## all.51 2.23771023 0.22958235 0.525589313 0.961578275 1.83227636
## all.52 2.18415731 4.85474376 7.740344347 4.703334111 9.98102040
## all.53 2.31125344 1.09940739 -1.641405752 -0.668949634 1.47432453
## all.54 2.21389662 0.22596932 -1.571826721 0.021050951 1.01175711
## all.55 -1.86762258 1.02514233 3.909977893 0.022200334 0.65610765
## all.56 2.22582306 2.09850904 -1.705207117 -0.646943367 1.96673717
## all.57 3.02794231 1.09227222 0.133260853 0.548996282 1.32396158
## all.58 0.07437449 1.54697962 4.390225865 0.919536701 0.85540981
## all.59 0.77462752 0.23022456 0.843196354 0.548318152 2.23438861
## all.60 2.10722974 1.68541636 -1.428347147 0.906698844 -0.33146681
## all.61 -0.18694038 -0.21844506 1.290534963 1.438946844 -0.49766629
## all.62 4.15036773 0.45354973 1.110735548 -0.812467007 0.90498897
## all.63 1.00503782 0.30105710 1.005037815 -1.178597970 1.28761895
## all.64 1.87738779 -0.45930651 -1.915473589 -1.688468452 0.63074241
## all.65 1.06227822 -0.65378668 -0.190447499 2.060928957 -0.05705317
## all.66 5.91325535 -2.22602067 2.170660152 -0.138866103 3.47950110
## all.67 0.42388217 1.01422496 0.021754609 0.298327913 1.50427220
## all.68 2.65314904 -0.14378728 0.208929175 0.333455832 0.57747105
## all.69 -0.12919664 0.62841192 2.918114475 -0.212741791 -0.10066026
## all.70 1.10156174 1.51517775 -0.462095403 1.241597085 2.98318011
## all.71 11.38498675 -0.86196592 10.960867520 8.351042872 16.23463541
## all.72 1.11638927 0.60095348 0.616859344 0.231260172 -0.45712426
## all.73 -1.17439142 -0.89106582 0.904401332 -0.740204310 1.89310187
## all.74 0.62180544 -0.71136762 0.282174685 -1.782616997 2.33662641
## all.75 3.03997337 -0.34740153 -0.357905237 -0.436348062 0.28415787
## all.76 1.81948318 0.21872602 -1.478234043 -0.197246399 1.62076987
## all.77 1.99879957 -1.29865820 -1.243422993 0.823731898 -0.71791914
## all.78 0.33058280 -0.76390171 1.821966230 -0.739620414 -1.44619537
## all.79 -0.52327489 0.02840870 2.136449209 -0.540464718 -0.96405148
## all.80 -1.15565153 1.04066562 1.677866656 -0.542873058 1.29345734
## all.81 2.58631355 0.89645219 -0.890754562 2.279824411 -0.67479207
## all.82 1.89466459 -1.45351946 0.263378637 0.822577972 -2.36020307
## all.83 0.27445017 1.23552776 -0.278985199 0.693709834 -0.67233293
## all.84 1.73744597 0.54760029 -2.485342883 -0.123699953 -0.16486454
## all.85 2.06423090 -1.14165604 -1.280038843 0.009863994 0.83092169
## all.86 0.77022799 -0.20381155 1.707840167 1.694211806 0.24756283
## all.87 0.24196601 0.90004112 0.654679128 -0.415226228 2.01300796
## all.88 -0.12149536 1.44808458 0.124790066 0.568666283 -0.47575017
## all.89 -0.46026798 -0.31769355 1.471595005 1.554353337 0.83844105
## MeanDecreaseAccuracy MeanDecreaseGini
## all.1 2.38994729 4.2761145
## all.2 2.06607528 1.6730604
## all.3 2.94724112 5.2328727
## all.4 1.71422008 1.5401739
## all.5 3.21383349 5.1455515
## all.6 6.06700698 4.2146804
## all.7 2.07986261 2.2673081
## all.8 3.77918148 7.4245030
## all.9 5.46317264 8.9005648
## all.10 5.32204651 7.8947352
## all.11 3.02823599 1.7784507
## all.12 5.35893218 6.4469728
## all.13 5.34659643 15.5671847
## all.14 6.17142264 9.8243326
## all.15 4.55930544 2.5130964
## all.16 1.83042427 1.7480193
## all.17 0.64990521 1.3580779
## all.18 2.74240124 1.7769279
## all.19 1.38712385 0.8140621
## all.20 1.12852530 1.3760124
## all.21 2.68803811 0.8384222
## all.22 -0.71701078 0.8640131
## all.23 6.00722812 12.8794615
## all.24 3.29556625 4.8558756
## all.25 1.17666722 1.2030445
## all.26 2.72076516 2.5192793
## all.27 2.37139566 1.7796481
## all.28 7.91286806 26.7037708
## all.29 -0.82847360 3.1630354
## all.30 3.94697874 3.7085605
## all.31 2.21276291 3.3500351
## all.32 2.62760559 2.1869784
## all.33 1.50295848 0.5837549
## all.34 3.50969928 1.8348586
## all.35 1.68742150 1.4824160
## all.36 1.33486052 1.1754626
## all.37 5.64200218 7.4322231
## all.38 3.81551126 4.3028271
## all.39 2.08170756 1.5815296
## all.40 2.52872671 2.4248936
## all.41 4.99796971 6.0584873
## all.42 3.05851045 3.9102291
## all.43 0.82153800 3.2459309
## all.44 2.25075376 3.3298010
## all.45 1.95607612 3.9342314
## all.46 2.37140809 1.6702323
## all.47 2.40705123 1.8930974
## all.48 5.09522253 8.1262895
## all.49 1.97506187 1.5153295
## all.50 2.73843646 1.6638517
## all.51 2.78761348 3.6841138
## all.52 10.90178179 30.4118956
## all.53 2.28531517 1.7474956
## all.54 2.11324312 2.4801089
## all.55 3.27588279 3.2756865
## all.56 2.44608003 4.3168605
## all.57 3.20680217 4.3615595
## all.58 4.40032937 4.1846086
## all.59 2.20004745 2.1231182
## all.60 2.13488083 1.1637343
## all.61 0.86593528 0.9951390
## all.62 4.25369346 2.3945606
## all.63 0.85135758 0.8643895
## all.64 0.99235474 0.8053013
## all.65 1.63052035 2.4443377
## all.66 5.92494659 7.1440673
## all.67 1.70681326 1.1019673
## all.68 2.82227665 1.7042117
## all.69 2.48857226 3.3968484
## all.70 3.48776818 5.6936890
## all.71 16.94356778 53.5947647
## all.72 1.08865809 1.4953152
## all.73 0.62333202 2.6405992
## all.74 1.22523816 1.0394304
## all.75 1.54915973 3.1545937
## all.76 1.46493519 3.3002767
## all.77 0.91716056 2.2429708
## all.78 -0.12282588 2.0063177
## all.79 0.08884405 2.6834238
## all.80 1.55806010 2.3672658
## all.81 1.93273963 2.4384450
## all.82 -0.38872175 2.8527733
## all.83 1.19902145 4.1841188
## all.84 0.16155058 1.8710386
## all.85 0.66516248 3.0383082
## all.86 1.97647580 2.8645847
## all.87 1.74805599 4.1030804
## all.88 0.85683886 3.4110566
## all.89 1.25059838 3.5942316
Questions for part 2.
Q6. Which algorithm yielded the highest overall accuracy?
A: SVM had the highest accruacy overall.
Q7. Which algorithm yielded the best Kappa statistic?
A: SVM had the highest kappa value.
Q8. Which forest types proved most difficult to map?
A: Mixed for myself, was the most difficult to map.
Q9. Based on the error matrices, what where the primary sources of confusion in this classification. Or, which forest types were most confused.
A: They were all confused, but overall the mixed forest type was the hardest to map on this lab.
Q10. What variables were found to be most important based on the RF importance measures?
A: The most important variable was “all.71.” for the RF importance measures.