STA6543

Problem 12.2 - In Exercise 4.4, we described a data set which contained 96

oil samples each from one of seven types of oils (pumpkin, sunflower,

peanut, olive, soybean, rapeseed, and corn). Gas chromatography was

performed on each sample and the percentage of each type of 7 fatty acids

was determined. We would like to use these data to build a model that

predicts the type of oil based on a sample’s fatty acid percentages.

Exercise 12.2A: Like the hepatic injury data, these data suffer

from extreme imbalance. Given this imbalance, should the data be split

into training and test sets?

Conclusion: We can solve the imbalance either via pre-processing, therefore we can split the data into training and test sets.

Note: This fulfills the requirement of exercise 12.2A.

data(oil)
summary(fattyAcids)

##     Palmitic        Stearic          Oleic          Linoleic    
##  Min.   : 4.50   Min.   :1.700   Min.   :22.80   Min.   : 7.90  
##  1st Qu.: 6.20   1st Qu.:3.475   1st Qu.:26.30   1st Qu.:43.10  
##  Median : 9.85   Median :4.200   Median :30.70   Median :50.80  
##  Mean   : 9.04   Mean   :4.200   Mean   :36.73   Mean   :46.49  
##  3rd Qu.:11.12   3rd Qu.:5.000   3rd Qu.:38.62   3rd Qu.:58.08  
##  Max.   :14.90   Max.   :6.700   Max.   :76.70   Max.   :66.10  
##    Linolenic       Eicosanoic      Eicosenoic    
##  Min.   :0.100   Min.   :0.100   Min.   :0.1000  
##  1st Qu.:0.375   1st Qu.:0.100   1st Qu.:0.1000  
##  Median :0.800   Median :0.400   Median :0.1000  
##  Mean   :2.272   Mean   :0.399   Mean   :0.3115  
##  3rd Qu.:2.650   3rd Qu.:0.400   3rd Qu.:0.3000  
##  Max.   :9.500   Max.   :2.800   Max.   :1.8000

str(fattyAcids)

## 'data.frame':    96 obs. of  7 variables:
##  $ Palmitic  : num  9.7 11.1 11.5 10 12.2 9.8 10.5 10.5 11.5 10 ...
##  $ Stearic   : num  5.2 5 5.2 4.8 5 4.2 5 5 5.2 4.8 ...
##  $ Oleic     : num  31 32.9 35 30.4 31.1 43 31.8 31.8 35 30.4 ...
##  $ Linoleic  : num  52.7 49.8 47.2 53.5 50.5 39.2 51.3 51.3 47.2 53.5 ...
##  $ Linolenic : num  0.4 0.3 0.2 0.3 0.3 2.4 0.4 0.4 0.2 0.3 ...
##  $ Eicosanoic: num  0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 0.4 ...
##  $ Eicosenoic: num  0.1 0.1 0.1 0.1 0.1 0.5 0.1 0.1 0.1 0.1 ...

table(oilType)

## oilType
##  A  B  C  D  E  F  G 
## 37 26  3  7 11 10  2

# Checking for skewness
nearZeroVar(fattyAcids, saveMetrics = T)

##            freqRatio percentUnique zeroVar   nzv
## Palmitic    1.333333      46.87500   FALSE FALSE
## Stearic     1.500000      42.70833   FALSE FALSE
## Oleic       1.000000      78.12500   FALSE FALSE
## Linoleic    1.500000      84.37500   FALSE FALSE
## Linolenic   1.000000      37.50000   FALSE FALSE
## Eicosanoic  1.033333      12.50000   FALSE FALSE
## Eicosenoic  3.176471      14.58333   FALSE FALSE

skw <- apply(fattyAcids, 2, skewness)
skw

##   Palmitic    Stearic      Oleic   Linoleic  Linolenic Eicosanoic Eicosenoic 
## -0.1504192 -0.2796900  1.3404599 -1.0493839  1.2924149  3.2268113  2.1973736

highcorr <- findCorrelation(cor(fattyAcids), cutoff = .75)
removeCorr <- fattyAcids[,-highcorr]
#corrplot(highcorr, method = "number", tl.cex = .35)


par(mfrow = c(3, 3))
hist(fattyAcids$Palmitic)
hist(fattyAcids$Stearic)
hist(fattyAcids$Oleic)
hist(fattyAcids$Linoleic)
hist(fattyAcids$Linolenic)
hist(fattyAcids$Eicosanoic)
hist(fattyAcids$Eicosenoic)

### Setup Split and ctrl ###
set.seed(476)
inTrain = createDataPartition(oilType, p = .8, list = F)
#ClassTrainx <- fattyAcids[inTrain, ]
#ClassTestx  <- fattyAcids[-inTrain, ]

ClassTrainx <- removeCorr[inTrain, ]
ClassTestx  <- removeCorr[-inTrain, ]

ClassTrainy <- oilType[inTrain]
ClassTesty  <- oilType[-inTrain]

ctrl <- trainControl(summaryFunction = defaultSummary, method = "cv", 
                     classProbs = TRUE,
                     savePredictions = TRUE)

########################## Logistic Regression #########################
set.seed(476)
logisticTune <- train(x = ClassTrainx, y = ClassTrainy, 
                     method = "multinom", metric = "Accuracy", 
                     trControl = ctrl)

## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 37.037116
## iter  20 value 11.242564
## iter  30 value 0.303259
## iter  40 value 0.005070
## final  value 0.000074 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 37.768106
## iter  20 value 15.849748
## iter  30 value 13.223464
## iter  40 value 13.049179
## iter  50 value 13.033914
## iter  60 value 13.032037
## iter  70 value 13.031990
## final  value 13.031983 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 37.037851
## iter  20 value 11.249574
## iter  30 value 0.729694
## iter  40 value 0.621716
## iter  50 value 0.570444
## iter  60 value 0.507637
## iter  70 value 0.478434
## iter  80 value 0.470048
## iter  90 value 0.463975
## iter 100 value 0.461188
## final  value 0.461188 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 136.213710 
## iter  10 value 39.188555
## iter  20 value 4.559004
## iter  30 value 0.163266
## iter  40 value 0.000648
## final  value 0.000066 
## converged
## # weights:  56 (42 variable)
## initial  value 136.213710 
## iter  10 value 40.808219
## iter  20 value 14.120475
## iter  30 value 12.957769
## iter  40 value 12.807765
## iter  50 value 12.783988
## iter  60 value 12.774668
## iter  70 value 12.773698
## final  value 12.773529 
## converged
## # weights:  56 (42 variable)
## initial  value 136.213710 
## iter  10 value 39.190186
## iter  20 value 4.592566
## iter  30 value 0.837195
## iter  40 value 0.768101
## iter  50 value 0.696067
## iter  60 value 0.622240
## iter  70 value 0.541662
## iter  80 value 0.505715
## iter  90 value 0.498512
## iter 100 value 0.488651
## final  value 0.488651 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 36.044175
## iter  20 value 8.362446
## iter  30 value 0.547745
## iter  40 value 0.001472
## final  value 0.000053 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.576142
## iter  20 value 14.192270
## iter  30 value 12.372576
## iter  40 value 12.337865
## iter  50 value 12.336931
## iter  60 value 12.336864
## iter  70 value 12.336846
## final  value 12.336845 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 36.046548
## iter  20 value 8.378343
## iter  30 value 0.912518
## iter  40 value 0.673919
## iter  50 value 0.642812
## iter  60 value 0.611540
## iter  70 value 0.569665
## iter  80 value 0.545863
## iter  90 value 0.536188
## iter 100 value 0.528384
## final  value 0.528384 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 24.928621
## iter  20 value 4.837072
## iter  30 value 0.083921
## iter  40 value 0.001261
## final  value 0.000042 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 27.572817
## iter  20 value 14.538158
## iter  30 value 13.505424
## iter  40 value 13.443958
## iter  50 value 13.441724
## iter  60 value 13.441351
## iter  70 value 13.441318
## final  value 13.441317 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 24.931283
## iter  20 value 4.862663
## iter  30 value 0.819301
## iter  40 value 0.757409
## iter  50 value 0.702966
## iter  60 value 0.584607
## iter  70 value 0.560519
## iter  80 value 0.547311
## iter  90 value 0.522046
## iter 100 value 0.519820
## final  value 0.519820 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.659435
## iter  20 value 5.216439
## iter  30 value 0.163832
## iter  40 value 0.003696
## final  value 0.000020 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 40.752086
## iter  20 value 14.647948
## iter  30 value 13.494134
## iter  40 value 13.417459
## iter  50 value 13.413022
## iter  60 value 13.412882
## iter  70 value 13.412864
## final  value 13.412862 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.661553
## iter  20 value 5.237253
## iter  30 value 0.805715
## iter  40 value 0.745250
## iter  50 value 0.662761
## iter  60 value 0.606162
## iter  70 value 0.591018
## iter  80 value 0.556788
## iter  90 value 0.543834
## iter 100 value 0.537030
## final  value 0.537030 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.573939
## iter  20 value 4.571260
## iter  30 value 0.213370
## iter  40 value 0.004897
## final  value 0.000082 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 40.812613
## iter  20 value 13.536036
## iter  30 value 12.822347
## iter  40 value 12.760687
## iter  50 value 12.756840
## iter  60 value 12.756660
## iter  70 value 12.756639
## final  value 12.756638 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.576209
## iter  20 value 4.587060
## iter  30 value 0.778830
## iter  40 value 0.646740
## iter  50 value 0.568046
## iter  60 value 0.493047
## iter  70 value 0.449911
## iter  80 value 0.427309
## iter  90 value 0.402612
## iter 100 value 0.397337
## final  value 0.397337 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 48.646053
## iter  20 value 10.678943
## iter  30 value 0.381152
## iter  40 value 0.000496
## final  value 0.000060 
## converged
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 49.012915
## iter  20 value 15.233230
## iter  30 value 11.957390
## iter  40 value 11.854755
## iter  50 value 11.847418
## iter  60 value 11.846247
## iter  70 value 11.846046
## iter  80 value 11.845989
## final  value 11.845960 
## converged
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 48.646423
## iter  20 value 10.685933
## iter  30 value 0.709871
## iter  40 value 0.608437
## iter  50 value 0.533455
## iter  60 value 0.461930
## iter  70 value 0.426518
## iter  80 value 0.409416
## iter  90 value 0.404418
## iter 100 value 0.399972
## final  value 0.399972 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 46.882254
## iter  20 value 5.655006
## iter  30 value 0.676055
## iter  40 value 0.005901
## final  value 0.000061 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 50.671098
## iter  20 value 14.494205
## iter  30 value 12.476372
## iter  40 value 12.282661
## iter  50 value 12.271030
## iter  60 value 12.270330
## iter  70 value 12.270250
## final  value 12.270249 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 46.886114
## iter  20 value 5.671515
## iter  30 value 0.995523
## iter  40 value 0.764432
## iter  50 value 0.695395
## iter  60 value 0.614697
## iter  70 value 0.561154
## iter  80 value 0.536476
## iter  90 value 0.512323
## iter 100 value 0.509547
## final  value 0.509547 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 48.412104
## iter  20 value 16.251716
## iter  30 value 0.459356
## iter  40 value 0.000781
## final  value 0.000054 
## converged
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 48.792829
## iter  20 value 18.389066
## iter  30 value 13.445905
## iter  40 value 13.245113
## iter  50 value 13.223132
## iter  60 value 13.217951
## iter  70 value 13.216597
## iter  80 value 13.216507
## final  value 13.216498 
## converged
## # weights:  56 (42 variable)
## initial  value 140.105531 
## iter  10 value 48.412487
## iter  20 value 16.255714
## iter  30 value 0.880675
## iter  40 value 0.685544
## iter  50 value 0.654931
## iter  60 value 0.578017
## iter  70 value 0.552908
## iter  80 value 0.544557
## iter  90 value 0.520118
## iter 100 value 0.517278
## final  value 0.517278 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.209972
## iter  20 value 4.030849
## iter  30 value 0.141484
## iter  40 value 0.000776
## final  value 0.000027 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 40.300479
## iter  20 value 13.429028
## iter  30 value 12.620070
## iter  40 value 12.565607
## iter  50 value 12.563868
## iter  60 value 12.563745
## iter  70 value 12.563737
## final  value 12.563737 
## converged
## # weights:  56 (42 variable)
## initial  value 138.159621 
## iter  10 value 38.212091
## iter  20 value 4.058858
## iter  30 value 0.862876
## iter  40 value 0.739719
## iter  50 value 0.656105
## iter  60 value 0.602955
## iter  70 value 0.574462
## iter  80 value 0.524770
## iter  90 value 0.516806
## iter 100 value 0.500080
## final  value 0.500080 
## stopped after 100 iterations
## # weights:  56 (42 variable)
## initial  value 153.726902 
## iter  10 value 45.313845
## iter  20 value 8.135523
## iter  30 value 0.962685
## iter  40 value 0.864243
## iter  50 value 0.780797
## iter  60 value 0.688991
## iter  70 value 0.618309
## iter  80 value 0.551546
## iter  90 value 0.530312
## iter 100 value 0.524788
## final  value 0.524788 
## stopped after 100 iterations

logisticTune

## Penalized Multinomial Regression 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   decay  Accuracy   Kappa    
##   0e+00  0.9513889  0.9354931
##   1e-04  0.9763889  0.9689162
##   1e-01  0.9246032  0.8980935
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was decay = 1e-04.

### Save the test set results in a data frame                 
testResults <- data.frame(obs = ClassTesty,
                          logistic = predict(logisticTune, ClassTestx))


### Predict the test set based the logistic regression 
ClassTestx$logistic <- predict(logisticTune, ClassTestx, type = "prob")[,1]

#ROC for logistic model
logisticROC <- roc(ClassTesty, ClassTestx$logistic)

## Setting levels: control = A, case = B

## Setting direction: controls > cases

plot(logisticROC, col=1, lty=1, lwd=2)

#Confusion matrix of logistic model
confusionMatrix(data = predict(logisticTune, ClassTestx), 
                reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 1 0 0 0 0 0
##          B 0 4 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9412          
##                  95% CI : (0.7131, 0.9985)
##     No Information Rate : 0.4118          
##     P-Value [Acc > NIR] : 7.111e-06       
##                                           
##                   Kappa : 0.9167          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   0.8000       NA  1.00000   1.0000   1.0000
## Specificity            0.9000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         0.8750   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   0.9231       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2353        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4706   0.2353        0  0.05882   0.1176   0.1176
## Balanced Accuracy      0.9500   0.9000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

################################## LDA ###################################
set.seed(476)
ldaTune <- train(x = ClassTrainx, y = ClassTrainy, method = "lda", 
                  preProc = c('center', 'scale'), metric = "Accuracy", 
                  trControl = ctrl)
ldaTune

## Linear Discriminant Analysis 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results:
## 
##   Accuracy   Kappa    
##   0.9339286  0.9124878

### Save the test set results in a data frame  
testResults$LDA <- predict(ldaTune, ClassTestx)

#Confusion Matrix of lda model
confusionMatrix(data = predict(ldaTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###################################Partial least squares discriminant analysis ###################################
set.seed(476)
plsdaTune <- train(x = ClassTrainx, y = ClassTrainy, method = "pls", 
                   tuneGrid = expand.grid(.ncomp = 1:5), trControl = ctrl)

plsdaTune

## Partial Least Squares 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  Accuracy   Kappa    
##   1      0.4676587  0.2168237
##   2      0.7720238  0.6744838
##   3      0.8849206  0.8431781
##   4      0.8960317  0.8605032
##   5      0.9228175  0.8957322
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was ncomp = 5.

### Save the test set results in a data frame  
testResults$plsda <- predict(plsdaTune, ClassTestx)

#Confusion matrix of partial least squares discriminant analysis
confusionMatrix(data = predict(plsdaTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###################################Penalized Models #########################
glmnGrid <- expand.grid(.alpha = c(0, .1, .2, .4, .6, .8, 1), 
                        .lambda = seq(.01, .2, length = 40))
set.seed(476)
glmnTune <- train(x = ClassTrainx, y = ClassTrainy, method = "glmnet", 
                  tuneGrid = glmnGrid, metric = "Accuracy", trControl = ctrl)
glmnTune

## glmnet 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   alpha  lambda      Accuracy   Kappa    
##   0.0    0.01000000  0.9352679  0.9137585
##   0.0    0.01487179  0.9352679  0.9137585
##   0.0    0.01974359  0.9352679  0.9137585
##   0.0    0.02461538  0.9352679  0.9137585
##   0.0    0.02948718  0.9352679  0.9137585
##   0.0    0.03435897  0.9352679  0.9137585
##   0.0    0.03923077  0.9352679  0.9137585
##   0.0    0.04410256  0.9352679  0.9137585
##   0.0    0.04897436  0.9352679  0.9137585
##   0.0    0.05384615  0.9352679  0.9137585
##   0.0    0.05871795  0.9352679  0.9137585
##   0.0    0.06358974  0.9352679  0.9137585
##   0.0    0.06846154  0.9352679  0.9137585
##   0.0    0.07333333  0.9352679  0.9137585
##   0.0    0.07820513  0.9352679  0.9137585
##   0.0    0.08307692  0.9352679  0.9137585
##   0.0    0.08794872  0.9352679  0.9137585
##   0.0    0.09282051  0.9352679  0.9137585
##   0.0    0.09769231  0.9352679  0.9137585
##   0.0    0.10256410  0.9352679  0.9137585
##   0.0    0.10743590  0.9352679  0.9137585
##   0.0    0.11230769  0.9352679  0.9137585
##   0.0    0.11717949  0.9352679  0.9137585
##   0.0    0.12205128  0.9352679  0.9137585
##   0.0    0.12692308  0.9196429  0.8920194
##   0.0    0.13179487  0.9196429  0.8920194
##   0.0    0.13666667  0.9196429  0.8920194
##   0.0    0.14153846  0.9196429  0.8920194
##   0.0    0.14641026  0.9196429  0.8920194
##   0.0    0.15128205  0.9196429  0.8920194
##   0.0    0.15615385  0.9017857  0.8639891
##   0.0    0.16102564  0.9017857  0.8639891
##   0.0    0.16589744  0.9017857  0.8639891
##   0.0    0.17076923  0.9017857  0.8639891
##   0.0    0.17564103  0.9017857  0.8639891
##   0.0    0.18051282  0.8861607  0.8422499
##   0.0    0.18538462  0.8861607  0.8422499
##   0.0    0.19025641  0.8861607  0.8422499
##   0.0    0.19512821  0.8861607  0.8422499
##   0.0    0.20000000  0.8861607  0.8422499
##   0.1    0.01000000  0.9508929  0.9345918
##   0.1    0.01487179  0.9508929  0.9345918
##   0.1    0.01974359  0.9508929  0.9345918
##   0.1    0.02461538  0.9508929  0.9345918
##   0.1    0.02948718  0.9508929  0.9345918
##   0.1    0.03435897  0.9508929  0.9345918
##   0.1    0.03923077  0.9508929  0.9345918
##   0.1    0.04410256  0.9508929  0.9345918
##   0.1    0.04897436  0.9508929  0.9345918
##   0.1    0.05384615  0.9508929  0.9345918
##   0.1    0.05871795  0.9352679  0.9137585
##   0.1    0.06358974  0.9352679  0.9137585
##   0.1    0.06846154  0.9352679  0.9137585
##   0.1    0.07333333  0.9352679  0.9137585
##   0.1    0.07820513  0.9352679  0.9137585
##   0.1    0.08307692  0.9352679  0.9137585
##   0.1    0.08794872  0.9352679  0.9137585
##   0.1    0.09282051  0.9352679  0.9137585
##   0.1    0.09769231  0.9196429  0.8920194
##   0.1    0.10256410  0.9196429  0.8920194
##   0.1    0.10743590  0.9196429  0.8920194
##   0.1    0.11230769  0.9196429  0.8920194
##   0.1    0.11717949  0.9017857  0.8639891
##   0.1    0.12205128  0.9017857  0.8639891
##   0.1    0.12692308  0.9017857  0.8639891
##   0.1    0.13179487  0.9017857  0.8639891
##   0.1    0.13666667  0.9017857  0.8639891
##   0.1    0.14153846  0.9017857  0.8639891
##   0.1    0.14641026  0.9017857  0.8639891
##   0.1    0.15128205  0.9017857  0.8639891
##   0.1    0.15615385  0.9017857  0.8639891
##   0.1    0.16102564  0.8861607  0.8422499
##   0.1    0.16589744  0.8861607  0.8422499
##   0.1    0.17076923  0.8861607  0.8422499
##   0.1    0.17564103  0.8705357  0.8201049
##   0.1    0.18051282  0.8705357  0.8201049
##   0.1    0.18538462  0.8705357  0.8201049
##   0.1    0.19025641  0.8705357  0.8201049
##   0.1    0.19512821  0.8549107  0.7963895
##   0.1    0.20000000  0.8549107  0.7963895
##   0.2    0.01000000  0.9508929  0.9345918
##   0.2    0.01487179  0.9508929  0.9345918
##   0.2    0.01974359  0.9508929  0.9345918
##   0.2    0.02461538  0.9508929  0.9345918
##   0.2    0.02948718  0.9508929  0.9345918
##   0.2    0.03435897  0.9508929  0.9345918
##   0.2    0.03923077  0.9508929  0.9345918
##   0.2    0.04410256  0.9508929  0.9345918
##   0.2    0.04897436  0.9508929  0.9345918
##   0.2    0.05384615  0.9508929  0.9345918
##   0.2    0.05871795  0.9508929  0.9345918
##   0.2    0.06358974  0.9508929  0.9345918
##   0.2    0.06846154  0.9508929  0.9345918
##   0.2    0.07333333  0.9508929  0.9345918
##   0.2    0.07820513  0.9508929  0.9345918
##   0.2    0.08307692  0.9352679  0.9128527
##   0.2    0.08794872  0.9352679  0.9128527
##   0.2    0.09282051  0.9174107  0.8848224
##   0.2    0.09769231  0.9174107  0.8848224
##   0.2    0.10256410  0.9174107  0.8848224
##   0.2    0.10743590  0.9174107  0.8848224
##   0.2    0.11230769  0.9017857  0.8639891
##   0.2    0.11717949  0.9017857  0.8639891
##   0.2    0.12205128  0.9017857  0.8639891
##   0.2    0.12692308  0.9017857  0.8639891
##   0.2    0.13179487  0.8861607  0.8418440
##   0.2    0.13666667  0.8861607  0.8418440
##   0.2    0.14153846  0.8861607  0.8418440
##   0.2    0.14641026  0.8861607  0.8418440
##   0.2    0.15128205  0.8705357  0.8201049
##   0.2    0.15615385  0.8705357  0.8201049
##   0.2    0.16102564  0.8549107  0.7963895
##   0.2    0.16589744  0.8549107  0.7963895
##   0.2    0.17076923  0.8549107  0.7963895
##   0.2    0.17564103  0.8549107  0.7963895
##   0.2    0.18051282  0.8549107  0.7963895
##   0.2    0.18538462  0.8392857  0.7726741
##   0.2    0.19025641  0.8214286  0.7410270
##   0.2    0.19512821  0.8214286  0.7410270
##   0.2    0.20000000  0.7901786  0.6954783
##   0.4    0.01000000  0.9508929  0.9345918
##   0.4    0.01487179  0.9508929  0.9345918
##   0.4    0.01974359  0.9508929  0.9345918
##   0.4    0.02461538  0.9508929  0.9345918
##   0.4    0.02948718  0.9508929  0.9345918
##   0.4    0.03435897  0.9508929  0.9345918
##   0.4    0.03923077  0.9508929  0.9345918
##   0.4    0.04410256  0.9508929  0.9345918
##   0.4    0.04897436  0.9508929  0.9345918
##   0.4    0.05384615  0.9508929  0.9345918
##   0.4    0.05871795  0.9508929  0.9345918
##   0.4    0.06358974  0.9352679  0.9128527
##   0.4    0.06846154  0.9174107  0.8892416
##   0.4    0.07333333  0.9174107  0.8892416
##   0.4    0.07820513  0.9174107  0.8892416
##   0.4    0.08307692  0.9174107  0.8892416
##   0.4    0.08794872  0.9174107  0.8892416
##   0.4    0.09282051  0.9174107  0.8848224
##   0.4    0.09769231  0.9017857  0.8626774
##   0.4    0.10256410  0.9017857  0.8626774
##   0.4    0.10743590  0.9017857  0.8626774
##   0.4    0.11230769  0.9017857  0.8626774
##   0.4    0.11717949  0.9017857  0.8626774
##   0.4    0.12205128  0.8861607  0.8409382
##   0.4    0.12692308  0.8861607  0.8409382
##   0.4    0.13179487  0.8705357  0.8201049
##   0.4    0.13666667  0.8549107  0.7963895
##   0.4    0.14153846  0.8549107  0.7963895
##   0.4    0.14641026  0.8549107  0.7963895
##   0.4    0.15128205  0.7901786  0.6966429
##   0.4    0.15615385  0.7745536  0.6728334
##   0.4    0.16102564  0.7745536  0.6728334
##   0.4    0.16589744  0.7433036  0.6270532
##   0.4    0.17076923  0.7433036  0.6270532
##   0.4    0.17564103  0.7433036  0.6270532
##   0.4    0.18051282  0.7433036  0.6250770
##   0.4    0.18538462  0.7433036  0.6250770
##   0.4    0.19025641  0.7120536  0.5770486
##   0.4    0.19512821  0.6964286  0.5510746
##   0.4    0.20000000  0.6964286  0.5510746
##   0.6    0.01000000  0.9508929  0.9345918
##   0.6    0.01487179  0.9508929  0.9345918
##   0.6    0.01974359  0.9508929  0.9345918
##   0.6    0.02461538  0.9508929  0.9345918
##   0.6    0.02948718  0.9508929  0.9345918
##   0.6    0.03435897  0.9508929  0.9345918
##   0.6    0.03923077  0.9508929  0.9345918
##   0.6    0.04410256  0.9508929  0.9345918
##   0.6    0.04897436  0.9508929  0.9345918
##   0.6    0.05384615  0.9508929  0.9345918
##   0.6    0.05871795  0.9174107  0.8901474
##   0.6    0.06358974  0.9174107  0.8901474
##   0.6    0.06846154  0.9174107  0.8901474
##   0.6    0.07333333  0.9174107  0.8892416
##   0.6    0.07820513  0.9174107  0.8892416
##   0.6    0.08307692  0.9017857  0.8696498
##   0.6    0.08794872  0.9017857  0.8696498
##   0.6    0.09282051  0.9017857  0.8696498
##   0.6    0.09769231  0.9017857  0.8670966
##   0.6    0.10256410  0.9017857  0.8670966
##   0.6    0.10743590  0.9017857  0.8626774
##   0.6    0.11230769  0.8861607  0.8389620
##   0.6    0.11717949  0.8705357  0.8172228
##   0.6    0.12205128  0.8705357  0.8172228
##   0.6    0.12692308  0.8214286  0.7420975
##   0.6    0.13179487  0.8214286  0.7420975
##   0.6    0.13666667  0.7901786  0.6941745
##   0.6    0.14153846  0.7276786  0.6038648
##   0.6    0.14641026  0.7276786  0.6038648
##   0.6    0.15128205  0.7276786  0.6007641
##   0.6    0.15615385  0.7120536  0.5745736
##   0.6    0.16102564  0.7433036  0.6157132
##   0.6    0.16589744  0.7276786  0.5914003
##   0.6    0.17076923  0.6964286  0.5417109
##   0.6    0.17564103  0.6808036  0.5173980
##   0.6    0.18051282  0.6830357  0.5200267
##   0.6    0.18538462  0.6830357  0.5200267
##   0.6    0.19025641  0.6830357  0.5200267
##   0.6    0.19512821  0.6830357  0.5200267
##   0.6    0.20000000  0.6830357  0.5200267
##   0.8    0.01000000  0.9665179  0.9550000
##   0.8    0.01487179  0.9508929  0.9345918
##   0.8    0.01974359  0.9508929  0.9345918
##   0.8    0.02461538  0.9508929  0.9345918
##   0.8    0.02948718  0.9508929  0.9345918
##   0.8    0.03435897  0.9508929  0.9345918
##   0.8    0.03923077  0.9508929  0.9345918
##   0.8    0.04410256  0.9508929  0.9345918
##   0.8    0.04897436  0.9508929  0.9345918
##   0.8    0.05384615  0.9174107  0.8901474
##   0.8    0.05871795  0.9174107  0.8901474
##   0.8    0.06358974  0.9017857  0.8705556
##   0.8    0.06846154  0.9017857  0.8696498
##   0.8    0.07333333  0.9017857  0.8696498
##   0.8    0.07820513  0.9017857  0.8696498
##   0.8    0.08307692  0.9017857  0.8696498
##   0.8    0.08794872  0.9017857  0.8696498
##   0.8    0.09282051  0.9017857  0.8696498
##   0.8    0.09769231  0.9017857  0.8670966
##   0.8    0.10256410  0.9017857  0.8641952
##   0.8    0.10743590  0.9017857  0.8597760
##   0.8    0.11230769  0.8861607  0.8384994
##   0.8    0.11717949  0.8370536  0.7619453
##   0.8    0.12205128  0.8058036  0.7164908
##   0.8    0.12692308  0.7433036  0.6226019
##   0.8    0.13179487  0.7611607  0.6461682
##   0.8    0.13666667  0.7455357  0.6208452
##   0.8    0.14153846  0.7455357  0.6208452
##   0.8    0.14641026  0.7299107  0.5981399
##   0.8    0.15128205  0.7142857  0.5721659
##   0.8    0.15615385  0.7142857  0.5721659
##   0.8    0.16102564  0.6830357  0.5231275
##   0.8    0.16589744  0.6830357  0.5200267
##   0.8    0.17076923  0.6517857  0.4714009
##   0.8    0.17564103  0.6517857  0.4714009
##   0.8    0.18051282  0.6361607  0.4464009
##   0.8    0.18538462  0.6205357  0.4209302
##   0.8    0.19025641  0.6205357  0.4209302
##   0.8    0.19512821  0.6205357  0.4209302
##   0.8    0.20000000  0.6205357  0.4209302
##   1.0    0.01000000  0.9665179  0.9550000
##   1.0    0.01487179  0.9665179  0.9550000
##   1.0    0.01974359  0.9665179  0.9550000
##   1.0    0.02461538  0.9665179  0.9550000
##   1.0    0.02948718  0.9665179  0.9550000
##   1.0    0.03435897  0.9665179  0.9550000
##   1.0    0.03923077  0.9508929  0.9345918
##   1.0    0.04410256  0.9174107  0.8901474
##   1.0    0.04897436  0.9174107  0.8901474
##   1.0    0.05384615  0.9017857  0.8705556
##   1.0    0.05871795  0.9017857  0.8705556
##   1.0    0.06358974  0.9017857  0.8705556
##   1.0    0.06846154  0.9174107  0.8901474
##   1.0    0.07333333  0.9174107  0.8892416
##   1.0    0.07820513  0.9174107  0.8892416
##   1.0    0.08307692  0.9174107  0.8892416
##   1.0    0.08794872  0.9174107  0.8892416
##   1.0    0.09282051  0.9174107  0.8892416
##   1.0    0.09769231  0.8861607  0.8442496
##   1.0    0.10256410  0.8883929  0.8465841
##   1.0    0.10743590  0.8392857  0.7660672
##   1.0    0.11230769  0.7924107  0.6961682
##   1.0    0.11717949  0.7611607  0.6461682
##   1.0    0.12205128  0.7455357  0.6208452
##   1.0    0.12692308  0.7455357  0.6208452
##   1.0    0.13179487  0.6986607  0.5536735
##   1.0    0.13666667  0.6830357  0.5314513
##   1.0    0.14153846  0.6830357  0.5314513
##   1.0    0.14641026  0.6517857  0.4839500
##   1.0    0.15128205  0.6517857  0.4839500
##   1.0    0.15615385  0.6205357  0.4331924
##   1.0    0.16102564  0.6205357  0.4299456
##   1.0    0.16589744  0.6205357  0.4299456
##   1.0    0.17076923  0.6205357  0.4268449
##   1.0    0.17564103  0.6205357  0.4268449
##   1.0    0.18051282  0.6205357  0.4225159
##   1.0    0.18538462  0.6205357  0.4225159
##   1.0    0.19025641  0.6205357  0.4247307
##   1.0    0.19512821  0.6049107  0.3992600
##   1.0    0.20000000  0.5892857  0.3753750
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were alpha = 1 and lambda = 0.03435897.

plot(glmnTune)

### Save the test set results in a data frame  
testResults$glmn <- predict(glmnTune, ClassTestx)

#Confusion matrix of penalized models
confusionMatrix(data = predict(glmnTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################Nearest Shrunken Centroids######################
set.seed(476)

nscGrid <- data.frame(.threshold = 0:25)
nscTune <- train(x = ClassTrainx, y = ClassTrainy, method = "pam",
                 preProc = c("center", "scale"),
                 tuneGrid = nscGrid,
                 metric = "Accuracy", trControl = ctrl)

## 1Warning: a class contains only 1 sample1111111Warning: a class contains only 1 sample111

nscTune

## Nearest Shrunken Centroids 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   threshold  Accuracy   Kappa    
##    0         0.9607143  0.9476735
##    1         0.9464286  0.9264613
##    2         0.9353175  0.9121756
##    3         0.8960317  0.8609466
##    4         0.8835317  0.8435553
##    5         0.8335317  0.7732711
##    6         0.6295635  0.4743221
##    7         0.5287698  0.3107974
##    8         0.3815476  0.0000000
##    9         0.3815476  0.0000000
##   10         0.3815476  0.0000000
##   11         0.3815476  0.0000000
##   12         0.3815476  0.0000000
##   13         0.3815476  0.0000000
##   14         0.3815476  0.0000000
##   15         0.3815476  0.0000000
##   16         0.3815476  0.0000000
##   17         0.3815476  0.0000000
##   18         0.3815476  0.0000000
##   19         0.3815476  0.0000000
##   20         0.3815476  0.0000000
##   21         0.3815476  0.0000000
##   22         0.3815476  0.0000000
##   23         0.3815476  0.0000000
##   24         0.3815476  0.0000000
##   25         0.3815476  0.0000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was threshold = 0.

plot(nscTune)

#var importance 
plot(varImp(nscTune, scale =FALSE))

### Save the test set results in a data frame  
testResults$NSC <- predict(nscTune, ClassTestx)

#Confusion matrix of nearest shrunken centroids
confusionMatrix(data = predict(nscTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#########################Create the confusion matrix from the test set######################
#Confusion matrix of logistic model
set.seed(476)
confusionMatrix(data = predict(logisticTune, ClassTestx), 
                reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 1 0 0 0 0 0
##          B 0 4 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9412          
##                  95% CI : (0.7131, 0.9985)
##     No Information Rate : 0.4118          
##     P-Value [Acc > NIR] : 7.111e-06       
##                                           
##                   Kappa : 0.9167          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   0.8000       NA  1.00000   1.0000   1.0000
## Specificity            0.9000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         0.8750   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   0.9231       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2353        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4706   0.2353        0  0.05882   0.1176   0.1176
## Balanced Accuracy      0.9500   0.9000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion Matrix of lda model
confusionMatrix(data = predict(ldaTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of partial least squares discriminant analysis
confusionMatrix(data = predict(plsdaTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of penalized models
confusionMatrix(data = predict(glmnTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of nearest shrunken centroids
confusionMatrix(data = predict(nscTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Resamples of Tranining data
res = resamples(list(Logistic = logisticTune, LDA =ldaTune, PLSDA = plsdaTune, 
Penalized = glmnTune, NSC = nscTune ))
dotplot(res)

dotplot(res,metric="Accuracy")

Exercise 12.2B: Which classification statistic would you choose to optimize

for this exercise and why?

Conclusion: We should chose to optimize the kappa as it is more of a robust measure whenever the data is imbalanced between classes, which we saw initially when dealing with this data set.

Note: This fulfills the requirement of exercise 12.2B.

# No code needed

Exercise 12.2C: Of the models presented in this chapter, which performs best

on these data? Which oil type does the model most accurately predict? Least

accurately predict?

Conclusion1: We see based on the accuracy and kappa results that the model that performed the best was the Logistic Regression.

Conclusion2: We see based on the results of the confusion matrix we can see that the model would accurately predict A, C, D, E, F, or G well. However the it would least accurately predict oilType B.

Note: This fulfills the requirement of exercise 12.2C.

#Confusion matrix of logistic model
set.seed(476)
confusionMatrix(data = predict(logisticTune, ClassTestx), 
                reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 1 0 0 0 0 0
##          B 0 4 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9412          
##                  95% CI : (0.7131, 0.9985)
##     No Information Rate : 0.4118          
##     P-Value [Acc > NIR] : 7.111e-06       
##                                           
##                   Kappa : 0.9167          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   0.8000       NA  1.00000   1.0000   1.0000
## Specificity            0.9000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         0.8750   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   0.9231       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2353        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4706   0.2353        0  0.05882   0.1176   0.1176
## Balanced Accuracy      0.9500   0.9000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

Exercise 13.2: Use the fatty acid data from the previous exercise set

(Exercise 12.2)

A Use the same data splitting approach (if any) and pre-processing steps

that you did in the previous chapter. Using the same classification statistic

as before, build models described in this chapter for these data. Which

model has the best predictive ability? How does this optimal model’s

performance compare to the best linear model’s performance? Would you

infer that the data have nonlinear separation boundaries based on this

comparison?

**Conclusion1: We can see based on the accuracy results that the optimal model from chapter 13 is the KNN(considering the QDA, RDA, and SVM failed to run with errors that could not be resolved).

Conclusion2: Comparing the KNN to the Logistic Regression, we saw that the Logistic Regression outperformed the KNN.

Conclusion3: Given that the Logistic Regression outperformed the KNN we can say that the data had more of a linear separation of boundaries vs nonlinear.**

Note: This fulfills the requirement of exercise 13.2A.

################################Quadratic discriminant analysis################

# Model Not Working Errors

#set.seed(476)
#QDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "qda", 
#                 metric = "Accuracy", 
#                 trControl = ctrl)
#QDATune

#plot(QDATune)

#var importance 
#plot(varImp(QDATune, scale =FALSE))

### Save the test set results in a data frame  
#testResults$QDA <- predict(QDATune, ClassTestx)

#Confusion matrix of QDA model
#confusionMatrix(data = predict(QDATune, ClassTestx), reference = ClassTesty)

###########################regularized discriminant analysis###################

#Model not working Errors

#set.seed(476)
#RDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "rda", 
#                 preProc = c('center', 'scale'), metric = "Accuracy", 
#                 trControl = ctrl)
#RDATune

#plot(RDATune)

#var importance 
#plot(varImp(RDATune, scale =FALSE))

### Save the test set results in a data frame  
#testResults$RDA <- predict(RDATune, ClassTestx)

#Confusion matrix of RDA model
#confusionMatrix(data = predict(RDATune, ClassTestx), reference = ClassTesty)

###################################mixture discriminant analysis###############
set.seed(476)
MDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "mda", 
                 tuneGrid = expand.grid(.subclasses = 1:6), 
                 metric = "Acccuracy", trControl = ctrl)
MDATune

## Mixture Discriminant Analysis 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   subclasses  Accuracy   Kappa    
##   1           0.9339286  0.9124878
##   2           0.9339286  0.9124878
##   3           0.9339286  0.9124878
##   4           0.9589286  0.9458356
##   5           0.9464286  0.9288144
##   6           0.9589286  0.9451409
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was subclasses = 4.

plot(MDATune)

#var importance 
plot(varImp(MDATune, scale =FALSE))

### Save the test set results in a data frame  
testResults$MDA <- predict(MDATune, ClassTestx)

#Confusion matrix of MDA model
confusionMatrix(data = predict(MDATune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################Naive Bayes###################################
set.seed(476)
NBTune <- train(x = ClassTrainx, y = ClassTrainy, method = "nb", 
                preProc = c('center', 'scale'), metric = "Accuracy", 
                trControl = ctrl)
NBTune

## Naive Bayes 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   usekernel  Accuracy  Kappa    
##   FALSE         NaN          NaN
##    TRUE      0.9375    0.9187074
## 
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
##  parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
##  = 1.

plot(NBTune)

#var importance 
plot(varImp(NBTune, scale =FALSE))

### Save the test set results in a data frame  
testResults$NB <- predict(NBTune, ClassTestx)

#Confusion matrix of NB model
confusionMatrix(data = predict(NBTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################K-nearest neighbors###########################
set.seed(476)
KNNTune <- train(x = ClassTrainx, y = ClassTrainy, method = "knn", 
                 metric = "Accuracy", preProc = c("center", "scale"),
                 tuneGrid = data.frame(.k =  seq(1,400, by=10)), 
                 trControl = ctrl)
KNNTune

## k-Nearest Neighbors 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   k    Accuracy   Kappa      
##     1  0.9625000  0.948426877
##    11  0.8567460  0.802654262
##    21  0.6845238  0.534158299
##    31  0.6484127  0.477969529
##    41  0.4083333  0.069975835
##    51  0.3815476  0.020679970
##    61  0.3815476  0.008326325
##    71  0.3815476  0.000000000
##    81  0.3815476  0.000000000
##    91  0.3815476  0.000000000
##   101  0.3815476  0.000000000
##   111  0.3815476  0.000000000
##   121  0.3815476  0.000000000
##   131  0.3815476  0.000000000
##   141  0.3815476  0.000000000
##   151  0.3815476  0.000000000
##   161  0.3815476  0.000000000
##   171  0.3815476  0.000000000
##   181  0.3815476  0.000000000
##   191  0.3815476  0.000000000
##   201  0.3815476  0.000000000
##   211  0.3815476  0.000000000
##   221  0.3815476  0.000000000
##   231  0.3815476  0.000000000
##   241  0.3815476  0.000000000
##   251  0.3815476  0.000000000
##   261  0.3815476  0.000000000
##   271  0.3815476  0.000000000
##   281  0.3815476  0.000000000
##   291  0.3815476  0.000000000
##   301  0.3815476  0.000000000
##   311  0.3815476  0.000000000
##   321  0.3815476  0.000000000
##   331  0.3815476  0.000000000
##   341  0.3815476  0.000000000
##   351  0.3815476  0.000000000
##   361  0.3815476  0.000000000
##   371  0.3815476  0.000000000
##   381  0.3815476  0.000000000
##   391  0.3815476  0.000000000
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was k = 1.

plot(KNNTune)

#var importance 
plot(varImp(KNNTune, scale =FALSE))

### Save the test set results in a data frame  
testResults$KNN <- predict(KNNTune, ClassTestx)

#Confusion matrix of KNN model
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################Neural networks##############################
set.seed(476)
nnetGrid <- expand.grid(.size = 1:10, .decay = c(0, .1, 1, 2))
maxSize <- max(nnetGrid$.size)
numWts <-200

NNTune <- train(x = ClassTrainx, y = ClassTrainy, method = "nnet", 
                metric = "Accuracy", 
                preProc = c("center", "scale", "spatialSign"), 
                tuneGrid = nnetGrid, trace = FALSE, maxit = 2000, 
                MaxNWts = numWts, trControl = ctrl)
NNTune

## Neural Network 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6), spatial sign transformation (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   size  decay  Accuracy   Kappa    
##    1    0.0    0.7309524  0.6355807
##    1    0.1    0.5823413  0.3897512
##    1    1.0    0.6234127  0.4347511
##    1    2.0    0.3815476  0.0000000
##    2    0.0    0.8099206  0.7448108
##    2    0.1    0.8349206  0.7788992
##    2    1.0    0.6091270  0.4129366
##    2    2.0    0.3815476  0.0000000
##    3    0.0    0.9121032  0.8819571
##    3    0.1    0.8835317  0.8415722
##    3    1.0    0.6091270  0.4173741
##    3    2.0    0.4837302  0.1944929
##    4    0.0    0.9035714  0.8670496
##    4    0.1    0.8853175  0.8439308
##    4    1.0    0.6216270  0.4356337
##    4    2.0    0.4837302  0.1944929
##    5    0.0    0.9277778  0.9090930
##    5    0.1    0.8978175  0.8611857
##    5    1.0    0.6091270  0.4154173
##    5    2.0    0.5591270  0.3209333
##    6    0.0    0.9496032  0.9347094
##    6    0.1    0.9103175  0.8787109
##    6    1.0    0.6091270  0.4185862
##    6    2.0    0.5591270  0.3209333
##    7    0.0    0.9250000  0.9023620
##    7    0.1    0.9103175  0.8787109
##    7    1.0    0.6327381  0.4528627
##    7    2.0    0.5716270  0.3418393
##    8    0.0    0.9255952  0.9015627
##    8    0.1    0.9103175  0.8787109
##    8    1.0    0.6327381  0.4528627
##    8    2.0    0.5716270  0.3418393
##    9    0.0    0.9371032  0.9183403
##    9    0.1    0.8960317  0.8608731
##    9    1.0    0.6452381  0.4744300
##    9    2.0    0.5841270  0.3622158
##   10    0.0    0.9371032  0.9191841
##   10    0.1    0.9103175  0.8787109
##   10    1.0    0.6438492  0.4704715
##   10    2.0    0.5841270  0.3622158
## 
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 6 and decay = 0.

plot(NNTune)

#var importance 
#plot(varImp(NNTune, scale =T))

### Save the test set results in a data frame  
testResults$NN <- predict(NNTune, ClassTestx)

#Confusion matrix of NN model
confusionMatrix(data = predict(NNTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################Flexible discriminant analysis#################
set.seed(476)
FDATune <- train(x = ClassTrainx, y = ClassTrainy, method = "fda", 
                 preProc = c('center', 'scale'), 
                 metric = "Accuracy", trControl = ctrl)
FDATune

## Flexible Discriminant Analysis 
## 
## 79 samples
##  6 predictor
##  7 classes: 'A', 'B', 'C', 'D', 'E', 'F', 'G' 
## 
## Pre-processing: centered (6), scaled (6) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 71, 70, 71, 71, 71, 71, ... 
## Resampling results across tuning parameters:
## 
##   nprune  Accuracy   Kappa    
##    2      0.5055556  0.2682867
##    8      0.9482143  0.9313469
##   14      0.9482143  0.9313469
## 
## Tuning parameter 'degree' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were degree = 1 and nprune = 8.

plot(FDATune)

#var importance 
plot(varImp(FDATune, scale =FALSE))

### Save the test set results in a data frame  
testResults$FDA <- predict(FDATune, ClassTestx)

#Confusion matrix of FDA model
confusionMatrix(data = predict(FDATune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

###############################Support Vector Machines#######################

# Model not working has Errors when ran

#set.seed(476)
#SVMGrid <- expand.grid(.size = 1:10, .decay = c(0, .1, 1, 2))
#maxSize <- max(SVMGrid$.size)
#numWts <-200
#sigmaRangeReduced <- sigest(as.matrix(removeCorr))
#svmRGridReduced <- expand.grid(.sigma = sigmaRangeReduced[1], 
#                               .C = 2^(seq(-4, 6)))

#SVMTune <- train(x = ClassTrainx, y = ClassTrainy, method = "svmRadial", 
#                 metric = "Accuracy", preProc = c("center", "scale"), 
#                 tuneGrid = svmRGridReduced, MaxNWts = 200,
#                 fit = FALSE, 
#                 trControl = ctrl)
#SVMTune

#plot(SVMTune)

#var importance 
#plot(varImp(SVMTune, scale =FALSE))

### Save the test set results in a data frame  
#testResults$FDA <- predict(SVMTune, ClassTestx)

#Confusion matrix of SVM model
#confusionMatrix(data = predict(SVMTune, ClassTestx), reference = ClassTesty)

#########################Create the confusion matrix from the test set######################
#Confusion matrix of QDA
#confusionMatrix(data = predict(QDATune, ClassTestx), reference = ClassTesty)

#Confusion Matrix of RDA
#confusionMatrix(data = predict(RDATune, ClassTestx), reference = ClassTesty)

#Confusion matrix of MDA
confusionMatrix(data = predict(MDATune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of NB
confusionMatrix(data = predict(NBTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of KNN
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of NN
confusionMatrix(data = predict(NNTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of FDA
confusionMatrix(data = predict(FDATune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

#Confusion matrix of SVM
#confusionMatrix(data = predict(SVMTune, ClassTestx), reference = ClassTesty)


#Resamples of Tranining data
res = resamples(list(MDA = MDATune, NB = NBTune, 
                     KNN = KNNTune, NN = NNTune, FDA = FDATune))
dotplot(res)

res1 = resamples(list(Logistic = logisticTune, LDA =ldaTune,PLSDA = plsdaTune, 
Penalized = glmnTune, NSC = nscTune, 
MDA = MDATune, NB = NBTune, KNN = KNNTune, 
NN = NNTune, FDA = FDATune ))
dotplot(res1)

Excercise 13.2B: Which oil type does the optimal model most accurately

predict? Least accurately predict?

Conclusion: We see based on the results of the confusion matrix we can see that the model would accurately predict all types accurately.

Note: This fulfills the requirement of exercise 13.2B.

#Confusion matrix of KNN model
set.seed(476)
confusionMatrix(data = predict(KNNTune, ClassTestx), reference = ClassTesty)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction A B C D E F G
##          A 7 0 0 0 0 0 0
##          B 0 5 0 0 0 0 0
##          C 0 0 0 0 0 0 0
##          D 0 0 0 1 0 0 0
##          E 0 0 0 0 2 0 0
##          F 0 0 0 0 0 2 0
##          G 0 0 0 0 0 0 0
## 
## Overall Statistics
##                                      
##                Accuracy : 1          
##                  95% CI : (0.8049, 1)
##     No Information Rate : 0.4118     
##     P-Value [Acc > NIR] : 2.812e-07  
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E Class: F
## Sensitivity            1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Specificity            1.0000   1.0000        1  1.00000   1.0000   1.0000
## Pos Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Neg Pred Value         1.0000   1.0000       NA  1.00000   1.0000   1.0000
## Prevalence             0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Rate         0.4118   0.2941        0  0.05882   0.1176   0.1176
## Detection Prevalence   0.4118   0.2941        0  0.05882   0.1176   0.1176
## Balanced Accuracy      1.0000   1.0000       NA  1.00000   1.0000   1.0000
##                      Class: G
## Sensitivity                NA
## Specificity                 1
## Pos Pred Value             NA
## Neg Pred Value             NA
## Prevalence                  0
## Detection Rate              0
## Detection Prevalence        0
## Balanced Accuracy          NA

STA6543_HW3

Jerris George

08/07/2022