Conclusion: We can solve the imbalance either via pre-processing, therefore we can split the data into training and test sets.
Note: This fulfills the requirement of exercise 12.2A.
data(oil)
summary(fattyAcids)
## Palmitic Stearic Oleic Linoleic
## Min. : 4.50 Min. :1.700 Min. :22.80 Min. : 7.90
## 1st Qu.: 6.20 1st Qu.:3.475 1st Qu.:26.30 1st Qu.:43.10
## Median : 9.85 Median :4.200 Median :30.70 Median :50.80
## Mean : 9.04 Mean :4.200 Mean :36.73 Mean :46.49
## 3rd Qu.:11.12 3rd Qu.:5.000 3rd Qu.:38.62 3rd Qu.:58.08
## Max. :14.90 Max. :6.700 Max. :76.70 Max. :66.10
## Linolenic Eicosanoic Eicosenoic
## Min. :0.100 Min. :0.100 Min. :0.1000
## 1st Qu.:0.375 1st Qu.:0.100 1st Qu.:0.1000
## Median :0.800 Median :0.400 Median :0.1000
## Mean :2.272 Mean :0.399 Mean :0.3115
## 3rd Qu.:2.650 3rd Qu.:0.400 3rd Qu.:0.3000
## Max. :9.500 Max. :2.800 Max. :1.8000
str(fattyAcids)
## 'data.frame': 96 obs. of 7 variables:
## $ Palmitic : num 9.7 11.1 11.5 10 12.2 9.8 10.5 10.5 11.5 10 ...
## $ Stearic : num 5.2 5 5.2 4.8 5 4.2 5 5 5.2 4.8 ...
## $ Oleic : num 31 32.9 35 30.4 31.1 43 31.8 31.8 35 30.4 ...
## $ Linoleic : num 52.7 49.8 47.2 53.5 50.5 39.2 51.3 51.3 47.2 53.5 ...
## $ Linolenic : num 0.4 0.3 0.2 0.3 0.3 2.4 0.4 0.4 0.2 0.3 ...
## $ Eicosanoic: num 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 ...
## $ Eicosenoic: num 0.1 0.1 0.1 0.1 0.1 0.5 0.1 0.1 0.1 0.1 ...
table(oilType)
## oilType
## A B C D E F G
## 37 26 3 7 11 10 2
# Checking for skewness
nearZeroVar(fattyAcids, saveMetrics = T)
## freqRatio percentUnique zeroVar nzv
## Palmitic 1.333333 46.87500 FALSE FALSE
## Stearic 1.500000 42.70833 FALSE FALSE
## Oleic 1.000000 78.12500 FALSE FALSE
## Linoleic 1.500000 84.37500 FALSE FALSE
## Linolenic 1.000000 37.50000 FALSE FALSE
## Eicosanoic 1.033333 12.50000 FALSE FALSE
## Eicosenoic 3.176471 14.58333 FALSE FALSE
skw <- apply(fattyAcids, 2, skewness)
skw
## Palmitic Stearic Oleic Linoleic Linolenic Eicosanoic Eicosenoic
## -0.1504192 -0.2796900 1.3404599 -1.0493839 1.2924149 3.2268113 2.1973736
highcorr <- findCorrelation(cor(fattyAcids), cutoff = .75)
removeCorr <- fattyAcids[,-highcorr]
#corrplot(highcorr, method = "number", tl.cex = .35)
par(mfrow = c(3, 3))
hist(fattyAcids$Palmitic)
hist(fattyAcids$Stearic)
hist(fattyAcids$Oleic)
hist(fattyAcids$Linoleic)
hist(fattyAcids$Linolenic)
hist(fattyAcids$Eicosanoic)
hist(fattyAcids$Eicosenoic)
### Setup Split and ctrl ###
set.seed(476)
inTrain = createDataPartition(oilType, p = .8, list = F)
#ClassTrainx <- fattyAcids[inTrain, ]
#ClassTestx <- fattyAcids[-inTrain, ]
ClassTrainx <- removeCorr[inTrain, ]
ClassTestx <- removeCorr[-inTrain, ]
ClassTrainy <- oilType[inTrain]
ClassTesty <- oilType[-inTrain]
ctrl <- trainControl(summaryFunction = defaultSummary, method = "cv",
classProbs = TRUE,
savePredictions = TRUE)
########################## Logistic Regression #########################
set.seed(476)
logisticTune <- train(x = ClassTrainx, y = ClassTrainy,
method = "multinom", metric = "Accuracy",
trControl = ctrl)
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 37.037116
## iter 20 value 11.242564
## iter 30 value 0.303259
## iter 40 value 0.005070
## final value 0.000074
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 37.768106
## iter 20 value 15.849748
## iter 30 value 13.223464
## iter 40 value 13.049179
## iter 50 value 13.033914
## iter 60 value 13.032037
## iter 70 value 13.031990
## final value 13.031983
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 37.037851
## iter 20 value 11.249574
## iter 30 value 0.729694
## iter 40 value 0.621716
## iter 50 value 0.570444
## iter 60 value 0.507637
## iter 70 value 0.478434
## iter 80 value 0.470048
## iter 90 value 0.463975
## iter 100 value 0.461188
## final value 0.461188
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 136.213710
## iter 10 value 39.188555
## iter 20 value 4.559004
## iter 30 value 0.163266
## iter 40 value 0.000648
## final value 0.000066
## converged
## # weights: 56 (42 variable)
## initial value 136.213710
## iter 10 value 40.808219
## iter 20 value 14.120475
## iter 30 value 12.957769
## iter 40 value 12.807765
## iter 50 value 12.783988
## iter 60 value 12.774668
## iter 70 value 12.773698
## final value 12.773529
## converged
## # weights: 56 (42 variable)
## initial value 136.213710
## iter 10 value 39.190186
## iter 20 value 4.592566
## iter 30 value 0.837195
## iter 40 value 0.768101
## iter 50 value 0.696067
## iter 60 value 0.622240
## iter 70 value 0.541662
## iter 80 value 0.505715
## iter 90 value 0.498512
## iter 100 value 0.488651
## final value 0.488651
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 36.044175
## iter 20 value 8.362446
## iter 30 value 0.547745
## iter 40 value 0.001472
## final value 0.000053
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.576142
## iter 20 value 14.192270
## iter 30 value 12.372576
## iter 40 value 12.337865
## iter 50 value 12.336931
## iter 60 value 12.336864
## iter 70 value 12.336846
## final value 12.336845
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 36.046548
## iter 20 value 8.378343
## iter 30 value 0.912518
## iter 40 value 0.673919
## iter 50 value 0.642812
## iter 60 value 0.611540
## iter 70 value 0.569665
## iter 80 value 0.545863
## iter 90 value 0.536188
## iter 100 value 0.528384
## final value 0.528384
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 24.928621
## iter 20 value 4.837072
## iter 30 value 0.083921
## iter 40 value 0.001261
## final value 0.000042
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 27.572817
## iter 20 value 14.538158
## iter 30 value 13.505424
## iter 40 value 13.443958
## iter 50 value 13.441724
## iter 60 value 13.441351
## iter 70 value 13.441318
## final value 13.441317
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 24.931283
## iter 20 value 4.862663
## iter 30 value 0.819301
## iter 40 value 0.757409
## iter 50 value 0.702966
## iter 60 value 0.584607
## iter 70 value 0.560519
## iter 80 value 0.547311
## iter 90 value 0.522046
## iter 100 value 0.519820
## final value 0.519820
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.659435
## iter 20 value 5.216439
## iter 30 value 0.163832
## iter 40 value 0.003696
## final value 0.000020
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 40.752086
## iter 20 value 14.647948
## iter 30 value 13.494134
## iter 40 value 13.417459
## iter 50 value 13.413022
## iter 60 value 13.412882
## iter 70 value 13.412864
## final value 13.412862
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.661553
## iter 20 value 5.237253
## iter 30 value 0.805715
## iter 40 value 0.745250
## iter 50 value 0.662761
## iter 60 value 0.606162
## iter 70 value 0.591018
## iter 80 value 0.556788
## iter 90 value 0.543834
## iter 100 value 0.537030
## final value 0.537030
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.573939
## iter 20 value 4.571260
## iter 30 value 0.213370
## iter 40 value 0.004897
## final value 0.000082
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 40.812613
## iter 20 value 13.536036
## iter 30 value 12.822347
## iter 40 value 12.760687
## iter 50 value 12.756840
## iter 60 value 12.756660
## iter 70 value 12.756639
## final value 12.756638
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.576209
## iter 20 value 4.587060
## iter 30 value 0.778830
## iter 40 value 0.646740
## iter 50 value 0.568046
## iter 60 value 0.493047
## iter 70 value 0.449911
## iter 80 value 0.427309
## iter 90 value 0.402612
## iter 100 value 0.397337
## final value 0.397337
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 48.646053
## iter 20 value 10.678943
## iter 30 value 0.381152
## iter 40 value 0.000496
## final value 0.000060
## converged
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 49.012915
## iter 20 value 15.233230
## iter 30 value 11.957390
## iter 40 value 11.854755
## iter 50 value 11.847418
## iter 60 value 11.846247
## iter 70 value 11.846046
## iter 80 value 11.845989
## final value 11.845960
## converged
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 48.646423
## iter 20 value 10.685933
## iter 30 value 0.709871
## iter 40 value 0.608437
## iter 50 value 0.533455
## iter 60 value 0.461930
## iter 70 value 0.426518
## iter 80 value 0.409416
## iter 90 value 0.404418
## iter 100 value 0.399972
## final value 0.399972
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 46.882254
## iter 20 value 5.655006
## iter 30 value 0.676055
## iter 40 value 0.005901
## final value 0.000061
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 50.671098
## iter 20 value 14.494205
## iter 30 value 12.476372
## iter 40 value 12.282661
## iter 50 value 12.271030
## iter 60 value 12.270330
## iter 70 value 12.270250
## final value 12.270249
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 46.886114
## iter 20 value 5.671515
## iter 30 value 0.995523
## iter 40 value 0.764432
## iter 50 value 0.695395
## iter 60 value 0.614697
## iter 70 value 0.561154
## iter 80 value 0.536476
## iter 90 value 0.512323
## iter 100 value 0.509547
## final value 0.509547
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 48.412104
## iter 20 value 16.251716
## iter 30 value 0.459356
## iter 40 value 0.000781
## final value 0.000054
## converged
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 48.792829
## iter 20 value 18.389066
## iter 30 value 13.445905
## iter 40 value 13.245113
## iter 50 value 13.223132
## iter 60 value 13.217951
## iter 70 value 13.216597
## iter 80 value 13.216507
## final value 13.216498
## converged
## # weights: 56 (42 variable)
## initial value 140.105531
## iter 10 value 48.412487
## iter 20 value 16.255714
## iter 30 value 0.880675
## iter 40 value 0.685544
## iter 50 value 0.654931
## iter 60 value 0.578017
## iter 70 value 0.552908
## iter 80 value 0.544557
## iter 90 value 0.520118
## iter 100 value 0.517278
## final value 0.517278
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.209972
## iter 20 value 4.030849
## iter 30 value 0.141484
## iter 40 value 0.000776
## final value 0.000027
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 40.300479
## iter 20 value 13.429028
## iter 30 value 12.620070
## iter 40 value 12.565607
## iter 50 value 12.563868
## iter 60 value 12.563745
## iter 70 value 12.563737
## final value 12.563737
## converged
## # weights: 56 (42 variable)
## initial value 138.159621
## iter 10 value 38.212091
## iter 20 value 4.058858
## iter 30 value 0.862876
## iter 40 value 0.739719
## iter 50 value 0.656105
## iter 60 value 0.602955
## iter 70 value 0.574462
## iter 80 value 0.524770
## iter 90 value 0.516806
## iter 100 value 0.500080
## final value 0.500080
## stopped after 100 iterations
## # weights: 56 (42 variable)
## initial value 153.726902
## iter 10 value 45.313845
## iter 20 value 8.135523
## iter 30 value 0.962685
## iter 40 value 0.864243
## iter 50 value 0.780797
## iter 60 value 0.688991
## iter 70 value 0.618309
## iter 80 value 0.551546
## iter 90 value 0.530312
## iter 100 value 0.524788
## final value 0.524788
## stopped after 100 iterations
logisticTune
## Penalized Multinomial Regression
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## decay Accuracy Kappa
## 0e+00 0.9513889 0.9354931
## 1e-04 0.9763889 0.9689162
## 1e-01 0.9246032 0.8980935
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was decay = 1e-04.
### Save the test set results in a data frame
testResults <- data.frame(obs = ClassTesty,
logistic = predict(logisticTune, ClassTestx))
### Predict the test set based the logistic regression
ClassTestx$logistic <- predict(logisticTune, ClassTestx, type = "prob")[,1]
#ROC for logistic model
logisticROC <- roc(ClassTesty, ClassTestx$logistic)
## Setting levels: control = A, case = B
## Setting direction: controls > cases
plot(logisticROC, col=1, lty=1, lwd=2)
#Confusion matrix of logistic model
confusionMatrix(data = predict(logisticTune, ClassTestx),
reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 1 0 0 0 0 0
## B 0 4 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9412
## 95% CI : (0.7131, 0.9985)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 7.111e-06
##
## Kappa : 0.9167
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 0.8000 NA 1.00000 1.0000 1.0000
## Specificity 0.9000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 0.8750 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 0.9231 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2353 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4706 0.2353 0 0.05882 0.1176 0.1176
## Balanced Accuracy 0.9500 0.9000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
################################## LDA ###################################
set.seed(476)
ldaTune <- train(x = ClassTrainx, y = ClassTrainy, method = "lda",
preProc = c('center', 'scale'), metric = "Accuracy",
trControl = ctrl)
ldaTune
## Linear Discriminant Analysis
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results:
##
## Accuracy Kappa
## 0.9339286 0.9124878
### Save the test set results in a data frame
testResults$LDA <- predict(ldaTune, ClassTestx)
#Confusion Matrix of lda model
confusionMatrix(data = predict(ldaTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###################################Partial least squares discriminant analysis ###################################
set.seed(476)
plsdaTune <- train(x = ClassTrainx, y = ClassTrainy, method = "pls",
tuneGrid = expand.grid(.ncomp = 1:5), trControl = ctrl)
plsdaTune
## Partial Least Squares
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## ncomp Accuracy Kappa
## 1 0.4676587 0.2168237
## 2 0.7720238 0.6744838
## 3 0.8849206 0.8431781
## 4 0.8960317 0.8605032
## 5 0.9228175 0.8957322
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was ncomp = 5.
### Save the test set results in a data frame
testResults$plsda <- predict(plsdaTune, ClassTestx)
#Confusion matrix of partial least squares discriminant analysis
confusionMatrix(data = predict(plsdaTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###################################Penalized Models #########################
glmnGrid <- expand.grid(.alpha = c(0, .1, .2, .4, .6, .8, 1),
.lambda = seq(.01, .2, length = 40))
set.seed(476)
glmnTune <- train(x = ClassTrainx, y = ClassTrainy, method = "glmnet",
tuneGrid = glmnGrid, metric = "Accuracy", trControl = ctrl)
glmnTune
## glmnet
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## alpha lambda Accuracy Kappa
## 0.0 0.01000000 0.9352679 0.9137585
## 0.0 0.01487179 0.9352679 0.9137585
## 0.0 0.01974359 0.9352679 0.9137585
## 0.0 0.02461538 0.9352679 0.9137585
## 0.0 0.02948718 0.9352679 0.9137585
## 0.0 0.03435897 0.9352679 0.9137585
## 0.0 0.03923077 0.9352679 0.9137585
## 0.0 0.04410256 0.9352679 0.9137585
## 0.0 0.04897436 0.9352679 0.9137585
## 0.0 0.05384615 0.9352679 0.9137585
## 0.0 0.05871795 0.9352679 0.9137585
## 0.0 0.06358974 0.9352679 0.9137585
## 0.0 0.06846154 0.9352679 0.9137585
## 0.0 0.07333333 0.9352679 0.9137585
## 0.0 0.07820513 0.9352679 0.9137585
## 0.0 0.08307692 0.9352679 0.9137585
## 0.0 0.08794872 0.9352679 0.9137585
## 0.0 0.09282051 0.9352679 0.9137585
## 0.0 0.09769231 0.9352679 0.9137585
## 0.0 0.10256410 0.9352679 0.9137585
## 0.0 0.10743590 0.9352679 0.9137585
## 0.0 0.11230769 0.9352679 0.9137585
## 0.0 0.11717949 0.9352679 0.9137585
## 0.0 0.12205128 0.9352679 0.9137585
## 0.0 0.12692308 0.9196429 0.8920194
## 0.0 0.13179487 0.9196429 0.8920194
## 0.0 0.13666667 0.9196429 0.8920194
## 0.0 0.14153846 0.9196429 0.8920194
## 0.0 0.14641026 0.9196429 0.8920194
## 0.0 0.15128205 0.9196429 0.8920194
## 0.0 0.15615385 0.9017857 0.8639891
## 0.0 0.16102564 0.9017857 0.8639891
## 0.0 0.16589744 0.9017857 0.8639891
## 0.0 0.17076923 0.9017857 0.8639891
## 0.0 0.17564103 0.9017857 0.8639891
## 0.0 0.18051282 0.8861607 0.8422499
## 0.0 0.18538462 0.8861607 0.8422499
## 0.0 0.19025641 0.8861607 0.8422499
## 0.0 0.19512821 0.8861607 0.8422499
## 0.0 0.20000000 0.8861607 0.8422499
## 0.1 0.01000000 0.9508929 0.9345918
## 0.1 0.01487179 0.9508929 0.9345918
## 0.1 0.01974359 0.9508929 0.9345918
## 0.1 0.02461538 0.9508929 0.9345918
## 0.1 0.02948718 0.9508929 0.9345918
## 0.1 0.03435897 0.9508929 0.9345918
## 0.1 0.03923077 0.9508929 0.9345918
## 0.1 0.04410256 0.9508929 0.9345918
## 0.1 0.04897436 0.9508929 0.9345918
## 0.1 0.05384615 0.9508929 0.9345918
## 0.1 0.05871795 0.9352679 0.9137585
## 0.1 0.06358974 0.9352679 0.9137585
## 0.1 0.06846154 0.9352679 0.9137585
## 0.1 0.07333333 0.9352679 0.9137585
## 0.1 0.07820513 0.9352679 0.9137585
## 0.1 0.08307692 0.9352679 0.9137585
## 0.1 0.08794872 0.9352679 0.9137585
## 0.1 0.09282051 0.9352679 0.9137585
## 0.1 0.09769231 0.9196429 0.8920194
## 0.1 0.10256410 0.9196429 0.8920194
## 0.1 0.10743590 0.9196429 0.8920194
## 0.1 0.11230769 0.9196429 0.8920194
## 0.1 0.11717949 0.9017857 0.8639891
## 0.1 0.12205128 0.9017857 0.8639891
## 0.1 0.12692308 0.9017857 0.8639891
## 0.1 0.13179487 0.9017857 0.8639891
## 0.1 0.13666667 0.9017857 0.8639891
## 0.1 0.14153846 0.9017857 0.8639891
## 0.1 0.14641026 0.9017857 0.8639891
## 0.1 0.15128205 0.9017857 0.8639891
## 0.1 0.15615385 0.9017857 0.8639891
## 0.1 0.16102564 0.8861607 0.8422499
## 0.1 0.16589744 0.8861607 0.8422499
## 0.1 0.17076923 0.8861607 0.8422499
## 0.1 0.17564103 0.8705357 0.8201049
## 0.1 0.18051282 0.8705357 0.8201049
## 0.1 0.18538462 0.8705357 0.8201049
## 0.1 0.19025641 0.8705357 0.8201049
## 0.1 0.19512821 0.8549107 0.7963895
## 0.1 0.20000000 0.8549107 0.7963895
## 0.2 0.01000000 0.9508929 0.9345918
## 0.2 0.01487179 0.9508929 0.9345918
## 0.2 0.01974359 0.9508929 0.9345918
## 0.2 0.02461538 0.9508929 0.9345918
## 0.2 0.02948718 0.9508929 0.9345918
## 0.2 0.03435897 0.9508929 0.9345918
## 0.2 0.03923077 0.9508929 0.9345918
## 0.2 0.04410256 0.9508929 0.9345918
## 0.2 0.04897436 0.9508929 0.9345918
## 0.2 0.05384615 0.9508929 0.9345918
## 0.2 0.05871795 0.9508929 0.9345918
## 0.2 0.06358974 0.9508929 0.9345918
## 0.2 0.06846154 0.9508929 0.9345918
## 0.2 0.07333333 0.9508929 0.9345918
## 0.2 0.07820513 0.9508929 0.9345918
## 0.2 0.08307692 0.9352679 0.9128527
## 0.2 0.08794872 0.9352679 0.9128527
## 0.2 0.09282051 0.9174107 0.8848224
## 0.2 0.09769231 0.9174107 0.8848224
## 0.2 0.10256410 0.9174107 0.8848224
## 0.2 0.10743590 0.9174107 0.8848224
## 0.2 0.11230769 0.9017857 0.8639891
## 0.2 0.11717949 0.9017857 0.8639891
## 0.2 0.12205128 0.9017857 0.8639891
## 0.2 0.12692308 0.9017857 0.8639891
## 0.2 0.13179487 0.8861607 0.8418440
## 0.2 0.13666667 0.8861607 0.8418440
## 0.2 0.14153846 0.8861607 0.8418440
## 0.2 0.14641026 0.8861607 0.8418440
## 0.2 0.15128205 0.8705357 0.8201049
## 0.2 0.15615385 0.8705357 0.8201049
## 0.2 0.16102564 0.8549107 0.7963895
## 0.2 0.16589744 0.8549107 0.7963895
## 0.2 0.17076923 0.8549107 0.7963895
## 0.2 0.17564103 0.8549107 0.7963895
## 0.2 0.18051282 0.8549107 0.7963895
## 0.2 0.18538462 0.8392857 0.7726741
## 0.2 0.19025641 0.8214286 0.7410270
## 0.2 0.19512821 0.8214286 0.7410270
## 0.2 0.20000000 0.7901786 0.6954783
## 0.4 0.01000000 0.9508929 0.9345918
## 0.4 0.01487179 0.9508929 0.9345918
## 0.4 0.01974359 0.9508929 0.9345918
## 0.4 0.02461538 0.9508929 0.9345918
## 0.4 0.02948718 0.9508929 0.9345918
## 0.4 0.03435897 0.9508929 0.9345918
## 0.4 0.03923077 0.9508929 0.9345918
## 0.4 0.04410256 0.9508929 0.9345918
## 0.4 0.04897436 0.9508929 0.9345918
## 0.4 0.05384615 0.9508929 0.9345918
## 0.4 0.05871795 0.9508929 0.9345918
## 0.4 0.06358974 0.9352679 0.9128527
## 0.4 0.06846154 0.9174107 0.8892416
## 0.4 0.07333333 0.9174107 0.8892416
## 0.4 0.07820513 0.9174107 0.8892416
## 0.4 0.08307692 0.9174107 0.8892416
## 0.4 0.08794872 0.9174107 0.8892416
## 0.4 0.09282051 0.9174107 0.8848224
## 0.4 0.09769231 0.9017857 0.8626774
## 0.4 0.10256410 0.9017857 0.8626774
## 0.4 0.10743590 0.9017857 0.8626774
## 0.4 0.11230769 0.9017857 0.8626774
## 0.4 0.11717949 0.9017857 0.8626774
## 0.4 0.12205128 0.8861607 0.8409382
## 0.4 0.12692308 0.8861607 0.8409382
## 0.4 0.13179487 0.8705357 0.8201049
## 0.4 0.13666667 0.8549107 0.7963895
## 0.4 0.14153846 0.8549107 0.7963895
## 0.4 0.14641026 0.8549107 0.7963895
## 0.4 0.15128205 0.7901786 0.6966429
## 0.4 0.15615385 0.7745536 0.6728334
## 0.4 0.16102564 0.7745536 0.6728334
## 0.4 0.16589744 0.7433036 0.6270532
## 0.4 0.17076923 0.7433036 0.6270532
## 0.4 0.17564103 0.7433036 0.6270532
## 0.4 0.18051282 0.7433036 0.6250770
## 0.4 0.18538462 0.7433036 0.6250770
## 0.4 0.19025641 0.7120536 0.5770486
## 0.4 0.19512821 0.6964286 0.5510746
## 0.4 0.20000000 0.6964286 0.5510746
## 0.6 0.01000000 0.9508929 0.9345918
## 0.6 0.01487179 0.9508929 0.9345918
## 0.6 0.01974359 0.9508929 0.9345918
## 0.6 0.02461538 0.9508929 0.9345918
## 0.6 0.02948718 0.9508929 0.9345918
## 0.6 0.03435897 0.9508929 0.9345918
## 0.6 0.03923077 0.9508929 0.9345918
## 0.6 0.04410256 0.9508929 0.9345918
## 0.6 0.04897436 0.9508929 0.9345918
## 0.6 0.05384615 0.9508929 0.9345918
## 0.6 0.05871795 0.9174107 0.8901474
## 0.6 0.06358974 0.9174107 0.8901474
## 0.6 0.06846154 0.9174107 0.8901474
## 0.6 0.07333333 0.9174107 0.8892416
## 0.6 0.07820513 0.9174107 0.8892416
## 0.6 0.08307692 0.9017857 0.8696498
## 0.6 0.08794872 0.9017857 0.8696498
## 0.6 0.09282051 0.9017857 0.8696498
## 0.6 0.09769231 0.9017857 0.8670966
## 0.6 0.10256410 0.9017857 0.8670966
## 0.6 0.10743590 0.9017857 0.8626774
## 0.6 0.11230769 0.8861607 0.8389620
## 0.6 0.11717949 0.8705357 0.8172228
## 0.6 0.12205128 0.8705357 0.8172228
## 0.6 0.12692308 0.8214286 0.7420975
## 0.6 0.13179487 0.8214286 0.7420975
## 0.6 0.13666667 0.7901786 0.6941745
## 0.6 0.14153846 0.7276786 0.6038648
## 0.6 0.14641026 0.7276786 0.6038648
## 0.6 0.15128205 0.7276786 0.6007641
## 0.6 0.15615385 0.7120536 0.5745736
## 0.6 0.16102564 0.7433036 0.6157132
## 0.6 0.16589744 0.7276786 0.5914003
## 0.6 0.17076923 0.6964286 0.5417109
## 0.6 0.17564103 0.6808036 0.5173980
## 0.6 0.18051282 0.6830357 0.5200267
## 0.6 0.18538462 0.6830357 0.5200267
## 0.6 0.19025641 0.6830357 0.5200267
## 0.6 0.19512821 0.6830357 0.5200267
## 0.6 0.20000000 0.6830357 0.5200267
## 0.8 0.01000000 0.9665179 0.9550000
## 0.8 0.01487179 0.9508929 0.9345918
## 0.8 0.01974359 0.9508929 0.9345918
## 0.8 0.02461538 0.9508929 0.9345918
## 0.8 0.02948718 0.9508929 0.9345918
## 0.8 0.03435897 0.9508929 0.9345918
## 0.8 0.03923077 0.9508929 0.9345918
## 0.8 0.04410256 0.9508929 0.9345918
## 0.8 0.04897436 0.9508929 0.9345918
## 0.8 0.05384615 0.9174107 0.8901474
## 0.8 0.05871795 0.9174107 0.8901474
## 0.8 0.06358974 0.9017857 0.8705556
## 0.8 0.06846154 0.9017857 0.8696498
## 0.8 0.07333333 0.9017857 0.8696498
## 0.8 0.07820513 0.9017857 0.8696498
## 0.8 0.08307692 0.9017857 0.8696498
## 0.8 0.08794872 0.9017857 0.8696498
## 0.8 0.09282051 0.9017857 0.8696498
## 0.8 0.09769231 0.9017857 0.8670966
## 0.8 0.10256410 0.9017857 0.8641952
## 0.8 0.10743590 0.9017857 0.8597760
## 0.8 0.11230769 0.8861607 0.8384994
## 0.8 0.11717949 0.8370536 0.7619453
## 0.8 0.12205128 0.8058036 0.7164908
## 0.8 0.12692308 0.7433036 0.6226019
## 0.8 0.13179487 0.7611607 0.6461682
## 0.8 0.13666667 0.7455357 0.6208452
## 0.8 0.14153846 0.7455357 0.6208452
## 0.8 0.14641026 0.7299107 0.5981399
## 0.8 0.15128205 0.7142857 0.5721659
## 0.8 0.15615385 0.7142857 0.5721659
## 0.8 0.16102564 0.6830357 0.5231275
## 0.8 0.16589744 0.6830357 0.5200267
## 0.8 0.17076923 0.6517857 0.4714009
## 0.8 0.17564103 0.6517857 0.4714009
## 0.8 0.18051282 0.6361607 0.4464009
## 0.8 0.18538462 0.6205357 0.4209302
## 0.8 0.19025641 0.6205357 0.4209302
## 0.8 0.19512821 0.6205357 0.4209302
## 0.8 0.20000000 0.6205357 0.4209302
## 1.0 0.01000000 0.9665179 0.9550000
## 1.0 0.01487179 0.9665179 0.9550000
## 1.0 0.01974359 0.9665179 0.9550000
## 1.0 0.02461538 0.9665179 0.9550000
## 1.0 0.02948718 0.9665179 0.9550000
## 1.0 0.03435897 0.9665179 0.9550000
## 1.0 0.03923077 0.9508929 0.9345918
## 1.0 0.04410256 0.9174107 0.8901474
## 1.0 0.04897436 0.9174107 0.8901474
## 1.0 0.05384615 0.9017857 0.8705556
## 1.0 0.05871795 0.9017857 0.8705556
## 1.0 0.06358974 0.9017857 0.8705556
## 1.0 0.06846154 0.9174107 0.8901474
## 1.0 0.07333333 0.9174107 0.8892416
## 1.0 0.07820513 0.9174107 0.8892416
## 1.0 0.08307692 0.9174107 0.8892416
## 1.0 0.08794872 0.9174107 0.8892416
## 1.0 0.09282051 0.9174107 0.8892416
## 1.0 0.09769231 0.8861607 0.8442496
## 1.0 0.10256410 0.8883929 0.8465841
## 1.0 0.10743590 0.8392857 0.7660672
## 1.0 0.11230769 0.7924107 0.6961682
## 1.0 0.11717949 0.7611607 0.6461682
## 1.0 0.12205128 0.7455357 0.6208452
## 1.0 0.12692308 0.7455357 0.6208452
## 1.0 0.13179487 0.6986607 0.5536735
## 1.0 0.13666667 0.6830357 0.5314513
## 1.0 0.14153846 0.6830357 0.5314513
## 1.0 0.14641026 0.6517857 0.4839500
## 1.0 0.15128205 0.6517857 0.4839500
## 1.0 0.15615385 0.6205357 0.4331924
## 1.0 0.16102564 0.6205357 0.4299456
## 1.0 0.16589744 0.6205357 0.4299456
## 1.0 0.17076923 0.6205357 0.4268449
## 1.0 0.17564103 0.6205357 0.4268449
## 1.0 0.18051282 0.6205357 0.4225159
## 1.0 0.18538462 0.6205357 0.4225159
## 1.0 0.19025641 0.6205357 0.4247307
## 1.0 0.19512821 0.6049107 0.3992600
## 1.0 0.20000000 0.5892857 0.3753750
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were alpha = 1 and lambda = 0.03435897.
plot(glmnTune)
### Save the test set results in a data frame
testResults$glmn <- predict(glmnTune, ClassTestx)
#Confusion matrix of penalized models
confusionMatrix(data = predict(glmnTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################Nearest Shrunken Centroids######################
set.seed(476)
nscGrid <- data.frame(.threshold = 0:25)
nscTune <- train(x = ClassTrainx, y = ClassTrainy, method = "pam",
preProc = c("center", "scale"),
tuneGrid = nscGrid,
metric = "Accuracy", trControl = ctrl)
## 1Warning: a class contains only 1 sample1111111Warning: a class contains only 1 sample111
nscTune
## Nearest Shrunken Centroids
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## threshold Accuracy Kappa
## 0 0.9607143 0.9476735
## 1 0.9464286 0.9264613
## 2 0.9353175 0.9121756
## 3 0.8960317 0.8609466
## 4 0.8835317 0.8435553
## 5 0.8335317 0.7732711
## 6 0.6295635 0.4743221
## 7 0.5287698 0.3107974
## 8 0.3815476 0.0000000
## 9 0.3815476 0.0000000
## 10 0.3815476 0.0000000
## 11 0.3815476 0.0000000
## 12 0.3815476 0.0000000
## 13 0.3815476 0.0000000
## 14 0.3815476 0.0000000
## 15 0.3815476 0.0000000
## 16 0.3815476 0.0000000
## 17 0.3815476 0.0000000
## 18 0.3815476 0.0000000
## 19 0.3815476 0.0000000
## 20 0.3815476 0.0000000
## 21 0.3815476 0.0000000
## 22 0.3815476 0.0000000
## 23 0.3815476 0.0000000
## 24 0.3815476 0.0000000
## 25 0.3815476 0.0000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was threshold = 0.
plot(nscTune)
#var importance
plot(varImp(nscTune, scale =FALSE))
### Save the test set results in a data frame
testResults$NSC <- predict(nscTune, ClassTestx)
#Confusion matrix of nearest shrunken centroids
confusionMatrix(data = predict(nscTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#########################Create the confusion matrix from the test set######################
#Confusion matrix of logistic model
set.seed(476)
confusionMatrix(data = predict(logisticTune, ClassTestx),
reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 1 0 0 0 0 0
## B 0 4 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9412
## 95% CI : (0.7131, 0.9985)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 7.111e-06
##
## Kappa : 0.9167
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 0.8000 NA 1.00000 1.0000 1.0000
## Specificity 0.9000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 0.8750 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 0.9231 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2353 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4706 0.2353 0 0.05882 0.1176 0.1176
## Balanced Accuracy 0.9500 0.9000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion Matrix of lda model
confusionMatrix(data = predict(ldaTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of partial least squares discriminant analysis
confusionMatrix(data = predict(plsdaTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of penalized models
confusionMatrix(data = predict(glmnTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of nearest shrunken centroids
confusionMatrix(data = predict(nscTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Resamples of Tranining data
res = resamples(list(Logistic = logisticTune, LDA =ldaTune, PLSDA = plsdaTune,
Penalized = glmnTune, NSC = nscTune ))
dotplot(res)
dotplot(res,metric="Accuracy")
Conclusion: We should chose to optimize the kappa as it is more of a robust measure whenever the data is imbalanced between classes, which we saw initially when dealing with this data set.
Note: This fulfills the requirement of exercise 12.2B.
# No code needed
Conclusion1: We see based on the accuracy and kappa results that the model that performed the best was the Logistic Regression.
Conclusion2: We see based on the results of the confusion matrix we can see that the model would accurately predict A, C, D, E, F, or G well. However the it would least accurately predict oilType B.
Note: This fulfills the requirement of exercise 12.2C.
#Confusion matrix of logistic model
set.seed(476)
confusionMatrix(data = predict(logisticTune, ClassTestx),
reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 1 0 0 0 0 0
## B 0 4 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9412
## 95% CI : (0.7131, 0.9985)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 7.111e-06
##
## Kappa : 0.9167
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 0.8000 NA 1.00000 1.0000 1.0000
## Specificity 0.9000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 0.8750 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 0.9231 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2353 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4706 0.2353 0 0.05882 0.1176 0.1176
## Balanced Accuracy 0.9500 0.9000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
**Conclusion1: We can see based on the accuracy results that the optimal model from chapter 13 is the KNN(considering the QDA, RDA, and SVM failed to run with errors that could not be resolved).
Conclusion2: Comparing the KNN to the Logistic Regression, we saw that the Logistic Regression outperformed the KNN.
Conclusion3: Given that the Logistic Regression outperformed the KNN we can say that the data had more of a linear separation of boundaries vs nonlinear.**
Note: This fulfills the requirement of exercise 13.2A.
################################Quadratic discriminant analysis################
# Model Not Working Errors
#set.seed(476)
#QDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "qda",
# metric = "Accuracy",
# trControl = ctrl)
#QDATune
#plot(QDATune)
#var importance
#plot(varImp(QDATune, scale =FALSE))
### Save the test set results in a data frame
#testResults$QDA <- predict(QDATune, ClassTestx)
#Confusion matrix of QDA model
#confusionMatrix(data = predict(QDATune, ClassTestx), reference = ClassTesty)
###########################regularized discriminant analysis###################
#Model not working Errors
#set.seed(476)
#RDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "rda",
# preProc = c('center', 'scale'), metric = "Accuracy",
# trControl = ctrl)
#RDATune
#plot(RDATune)
#var importance
#plot(varImp(RDATune, scale =FALSE))
### Save the test set results in a data frame
#testResults$RDA <- predict(RDATune, ClassTestx)
#Confusion matrix of RDA model
#confusionMatrix(data = predict(RDATune, ClassTestx), reference = ClassTesty)
###################################mixture discriminant analysis###############
set.seed(476)
MDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "mda",
tuneGrid = expand.grid(.subclasses = 1:6),
metric = "Acccuracy", trControl = ctrl)
MDATune
## Mixture Discriminant Analysis
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## subclasses Accuracy Kappa
## 1 0.9339286 0.9124878
## 2 0.9339286 0.9124878
## 3 0.9339286 0.9124878
## 4 0.9589286 0.9458356
## 5 0.9464286 0.9288144
## 6 0.9589286 0.9451409
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was subclasses = 4.
plot(MDATune)
#var importance
plot(varImp(MDATune, scale =FALSE))
### Save the test set results in a data frame
testResults$MDA <- predict(MDATune, ClassTestx)
#Confusion matrix of MDA model
confusionMatrix(data = predict(MDATune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################Naive Bayes###################################
set.seed(476)
NBTune <- train(x = ClassTrainx, y = ClassTrainy, method = "nb",
preProc = c('center', 'scale'), metric = "Accuracy",
trControl = ctrl)
NBTune
## Naive Bayes
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9375 0.9187074
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
plot(NBTune)
#var importance
plot(varImp(NBTune, scale =FALSE))
### Save the test set results in a data frame
testResults$NB <- predict(NBTune, ClassTestx)
#Confusion matrix of NB model
confusionMatrix(data = predict(NBTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################K-nearest neighbors###########################
set.seed(476)
KNNTune <- train(x = ClassTrainx, y = ClassTrainy, method = "knn",
metric = "Accuracy", preProc = c("center", "scale"),
tuneGrid = data.frame(.k = seq(1,400, by=10)),
trControl = ctrl)
KNNTune
## k-Nearest Neighbors
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## k Accuracy Kappa
## 1 0.9625000 0.948426877
## 11 0.8567460 0.802654262
## 21 0.6845238 0.534158299
## 31 0.6484127 0.477969529
## 41 0.4083333 0.069975835
## 51 0.3815476 0.020679970
## 61 0.3815476 0.008326325
## 71 0.3815476 0.000000000
## 81 0.3815476 0.000000000
## 91 0.3815476 0.000000000
## 101 0.3815476 0.000000000
## 111 0.3815476 0.000000000
## 121 0.3815476 0.000000000
## 131 0.3815476 0.000000000
## 141 0.3815476 0.000000000
## 151 0.3815476 0.000000000
## 161 0.3815476 0.000000000
## 171 0.3815476 0.000000000
## 181 0.3815476 0.000000000
## 191 0.3815476 0.000000000
## 201 0.3815476 0.000000000
## 211 0.3815476 0.000000000
## 221 0.3815476 0.000000000
## 231 0.3815476 0.000000000
## 241 0.3815476 0.000000000
## 251 0.3815476 0.000000000
## 261 0.3815476 0.000000000
## 271 0.3815476 0.000000000
## 281 0.3815476 0.000000000
## 291 0.3815476 0.000000000
## 301 0.3815476 0.000000000
## 311 0.3815476 0.000000000
## 321 0.3815476 0.000000000
## 331 0.3815476 0.000000000
## 341 0.3815476 0.000000000
## 351 0.3815476 0.000000000
## 361 0.3815476 0.000000000
## 371 0.3815476 0.000000000
## 381 0.3815476 0.000000000
## 391 0.3815476 0.000000000
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 1.
plot(KNNTune)
#var importance
plot(varImp(KNNTune, scale =FALSE))
### Save the test set results in a data frame
testResults$KNN <- predict(KNNTune, ClassTestx)
#Confusion matrix of KNN model
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################Neural networks##############################
set.seed(476)
nnetGrid <- expand.grid(.size = 1:10, .decay = c(0, .1, 1, 2))
maxSize <- max(nnetGrid$.size)
numWts <-200
NNTune <- train(x = ClassTrainx, y = ClassTrainy, method = "nnet",
metric = "Accuracy",
preProc = c("center", "scale", "spatialSign"),
tuneGrid = nnetGrid, trace = FALSE, maxit = 2000,
MaxNWts = numWts, trControl = ctrl)
NNTune
## Neural Network
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6), spatial sign transformation (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 1 0.0 0.7309524 0.6355807
## 1 0.1 0.5823413 0.3897512
## 1 1.0 0.6234127 0.4347511
## 1 2.0 0.3815476 0.0000000
## 2 0.0 0.8099206 0.7448108
## 2 0.1 0.8349206 0.7788992
## 2 1.0 0.6091270 0.4129366
## 2 2.0 0.3815476 0.0000000
## 3 0.0 0.9121032 0.8819571
## 3 0.1 0.8835317 0.8415722
## 3 1.0 0.6091270 0.4173741
## 3 2.0 0.4837302 0.1944929
## 4 0.0 0.9035714 0.8670496
## 4 0.1 0.8853175 0.8439308
## 4 1.0 0.6216270 0.4356337
## 4 2.0 0.4837302 0.1944929
## 5 0.0 0.9277778 0.9090930
## 5 0.1 0.8978175 0.8611857
## 5 1.0 0.6091270 0.4154173
## 5 2.0 0.5591270 0.3209333
## 6 0.0 0.9496032 0.9347094
## 6 0.1 0.9103175 0.8787109
## 6 1.0 0.6091270 0.4185862
## 6 2.0 0.5591270 0.3209333
## 7 0.0 0.9250000 0.9023620
## 7 0.1 0.9103175 0.8787109
## 7 1.0 0.6327381 0.4528627
## 7 2.0 0.5716270 0.3418393
## 8 0.0 0.9255952 0.9015627
## 8 0.1 0.9103175 0.8787109
## 8 1.0 0.6327381 0.4528627
## 8 2.0 0.5716270 0.3418393
## 9 0.0 0.9371032 0.9183403
## 9 0.1 0.8960317 0.8608731
## 9 1.0 0.6452381 0.4744300
## 9 2.0 0.5841270 0.3622158
## 10 0.0 0.9371032 0.9191841
## 10 0.1 0.9103175 0.8787109
## 10 1.0 0.6438492 0.4704715
## 10 2.0 0.5841270 0.3622158
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 6 and decay = 0.
plot(NNTune)
#var importance
#plot(varImp(NNTune, scale =T))
### Save the test set results in a data frame
testResults$NN <- predict(NNTune, ClassTestx)
#Confusion matrix of NN model
confusionMatrix(data = predict(NNTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################Flexible discriminant analysis#################
set.seed(476)
FDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "fda",
preProc = c('center', 'scale'),
metric = "Accuracy", trControl = ctrl)
FDATune
## Flexible Discriminant Analysis
##
## 79 samples
## 6 predictor
## 7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G'
##
## Pre-processing: centered (6), scaled (6)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ...
## Resampling results across tuning parameters:
##
## nprune Accuracy Kappa
## 2 0.5055556 0.2682867
## 8 0.9482143 0.9313469
## 14 0.9482143 0.9313469
##
## Tuning parameter 'degree' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 1 and nprune = 8.
plot(FDATune)
#var importance
plot(varImp(FDATune, scale =FALSE))
### Save the test set results in a data frame
testResults$FDA <- predict(FDATune, ClassTestx)
#Confusion matrix of FDA model
confusionMatrix(data = predict(FDATune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
###############################Support Vector Machines#######################
# Model not working has Errors when ran
#set.seed(476)
#SVMGrid <- expand.grid(.size = 1:10, .decay = c(0, .1, 1, 2))
#maxSize <- max(SVMGrid$.size)
#numWts <-200
#sigmaRangeReduced <- sigest(as.matrix(removeCorr))
#svmRGridReduced <- expand.grid(.sigma = sigmaRangeReduced[1],
# .C = 2^(seq(-4, 6)))
#SVMTune <- train(x = ClassTrainx, y = ClassTrainy, method = "svmRadial",
# metric = "Accuracy", preProc = c("center", "scale"),
# tuneGrid = svmRGridReduced, MaxNWts = 200,
# fit = FALSE,
# trControl = ctrl)
#SVMTune
#plot(SVMTune)
#var importance
#plot(varImp(SVMTune, scale =FALSE))
### Save the test set results in a data frame
#testResults$FDA <- predict(SVMTune, ClassTestx)
#Confusion matrix of SVM model
#confusionMatrix(data = predict(SVMTune, ClassTestx), reference = ClassTesty)
#########################Create the confusion matrix from the test set######################
#Confusion matrix of QDA
#confusionMatrix(data = predict(QDATune, ClassTestx), reference = ClassTesty)
#Confusion Matrix of RDA
#confusionMatrix(data = predict(RDATune, ClassTestx), reference = ClassTesty)
#Confusion matrix of MDA
confusionMatrix(data = predict(MDATune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of NB
confusionMatrix(data = predict(NBTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of KNN
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of NN
confusionMatrix(data = predict(NNTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of FDA
confusionMatrix(data = predict(FDATune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA
#Confusion matrix of SVM
#confusionMatrix(data = predict(SVMTune, ClassTestx), reference = ClassTesty)
#Resamples of Tranining data
res = resamples(list(MDA = MDATune, NB = NBTune,
KNN = KNNTune, NN = NNTune, FDA = FDATune))
dotplot(res)
res1 = resamples(list(Logistic = logisticTune, LDA =ldaTune,PLSDA = plsdaTune,
Penalized = glmnTune, NSC = nscTune,
MDA = MDATune, NB = NBTune, KNN = KNNTune,
NN = NNTune, FDA = FDATune ))
dotplot(res1)
Conclusion: We see based on the results of the confusion matrix we can see that the model would accurately predict all types accurately.
Note: This fulfills the requirement of exercise 13.2B.
#Confusion matrix of KNN model
set.seed(476)
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E F G
## A 7 0 0 0 0 0 0
## B 0 5 0 0 0 0 0
## C 0 0 0 0 0 0 0
## D 0 0 0 1 0 0 0
## E 0 0 0 0 2 0 0
## F 0 0 0 0 0 2 0
## G 0 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8049, 1)
## No Information Rate : 0.4118
## P-Value [Acc > NIR] : 2.812e-07
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Specificity 1.0000 1.0000 1 1.00000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Rate 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Detection Prevalence 0.4118 0.2941 0 0.05882 0.1176 0.1176
## Balanced Accuracy 1.0000 1.0000 NA 1.00000 1.0000 1.0000
## Class: G
## Sensitivity NA
## Specificity 1
## Pos Pred Value NA
## Neg Pred Value NA
## Prevalence 0
## Detection Rate 0
## Detection Prevalence 0
## Balanced Accuracy NA