Load Libraries

require(caret)
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
require(class)
## Loading required package: class
require(dbscan)
## Loading required package: dbscan
## Warning: package 'dbscan' was built under R version 4.3.2
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
require(e1071)
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 4.3.2
require(factoextra)
## Loading required package: factoextra
## Warning: package 'factoextra' was built under R version 4.3.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
require(fdm2id)
## Loading required package: fdm2id
## Warning: package 'fdm2id' was built under R version 4.3.2
## Loading required package: arules
## Warning: package 'arules' was built under R version 4.3.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.3.2
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## Loading required package: arulesViz
## Warning: package 'arulesViz' was built under R version 4.3.2
## Loading required package: FactoMineR
## Warning: package 'FactoMineR' was built under R version 4.3.2
## 
## Attaching package: 'fdm2id'
## The following objects are masked from 'package:FactoMineR':
## 
##     CA, MCA, PCA
## The following object is masked from 'package:class':
## 
##     SOM
require(ggpubr)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.3.2
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.3.2
## Error: package or namespace load failed for 'kableExtra':
##  .onLoad failed in loadNamespace() for 'kableExtra', details:
##   call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in% 
##   error: 'length = 3' in coercion to 'logical(1)'
require(keras)
## Loading required package: keras
## Warning: package 'keras' was built under R version 4.3.2
require(neuralnet)
## Loading required package: neuralnet
## Warning: package 'neuralnet' was built under R version 4.3.2
require(nnet)
## Loading required package: nnet
require(parsnip)
## Loading required package: parsnip
## Warning: package 'parsnip' was built under R version 4.3.2
## 
## Attaching package: 'parsnip'
## The following object is masked from 'package:e1071':
## 
##     tune
require(psych)
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.3.2
## 
## Attaching package: 'psych'
## The following object is masked from 'package:fdm2id':
## 
##     kaiser
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
require(randomForest)
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.3.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:ggplot2':
## 
##     margin
require(rpart)
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 4.3.2
require(rpart.plot)
## Loading required package: rpart.plot
## Warning: package 'rpart.plot' was built under R version 4.3.2

Functions

myprint=function(x){print(x)}
mycite=function(x){citation(x)}
mycm=function(x) confusionMatrix(as.factor(x), test_scale$Species)

Train/Test

Stratified

set.seed(1234)
p1=sample(1:50,.7*50) #setosa
p2=sample(51:100,.7*50) #versicolor
p3=sample(101:150, .7*50) #virginica
mys=c(p1,p2,p3)
train=iris[mys,]
test=iris[-mys,]
print(noquote(c("Size of Train/Test:",nrow(train), nrow(test))))
## [1] Size of Train/Test: 105                 45

Scaling

train_scale=as.data.frame(scale(train[,1:4]))
train_scale$Species=train$Species
test_scale=as.data.frame(scale(test[,1:4]))
test_scale$Species=test$Species
myprint(describe(train_scale[,1:4]))
##              vars   n mean sd median trimmed  mad   min  max range  skew
## Sepal.Length    1 105    0  1  -0.06   -0.04 1.07 -1.85 2.46  4.32  0.31
## Sepal.Width     2 105    0  1  -0.10   -0.04 0.70 -1.98 3.19  5.17  0.48
## Petal.Length    3 105    0  1   0.31    0.00 1.09 -1.50 1.78  3.28 -0.25
## Petal.Width     4 105    0  1   0.15   -0.03 1.38 -1.45 1.74  3.19 -0.09
##              kurtosis  se
## Sepal.Length    -0.60 0.1
## Sepal.Width      0.43 0.1
## Petal.Length    -1.43 0.1
## Petal.Width     -1.33 0.1
myprint(table(train_scale[,5]))
## 
##     setosa versicolor  virginica 
##         35         35         35

Naive Bayes

c1=naiveBayes(Species~., data=train_scale)
pred1=predict(c1,test_scale)
mycm(pred1)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         1
##   virginica       0          2        14
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8667           0.9333
## Specificity                 1.0000            0.9667           0.9333
## Pos Pred Value              1.0000            0.9286           0.8750
## Neg Pred Value              1.0000            0.9355           0.9655
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2889           0.3111
## Detection Prevalence        0.3333            0.3111           0.3556
## Balanced Accuracy           1.0000            0.9167           0.9333

Neural Network

c2=neuralnet(Species~., data=train_scale, hidden=c(4))
plot(c2,rep = "best")

pred2=max.col(predict(c2, test_scale))
mylabs=c('setosa','versicolor','virginica')
pred2=mylabs[pred2]
mycm(pred2)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         1
##   virginica       0          2        14
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8667           0.9333
## Specificity                 1.0000            0.9667           0.9333
## Pos Pred Value              1.0000            0.9286           0.8750
## Neg Pred Value              1.0000            0.9355           0.9655
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2889           0.3111
## Detection Prevalence        0.3333            0.3111           0.3556
## Balanced Accuracy           1.0000            0.9167           0.9333

Multinomial Logistic Regression

c3=nnet::multinom(Species~., data=train_scale)
## # weights:  18 (10 variable)
## initial  value 115.354290 
## iter  10 value 7.746044
## iter  20 value 0.047736
## iter  30 value 0.009575
## iter  40 value 0.007000
## iter  50 value 0.003940
## iter  60 value 0.003405
## iter  70 value 0.002791
## iter  80 value 0.002252
## iter  90 value 0.002068
## iter 100 value 0.001854
## final  value 0.001854 
## stopped after 100 iterations
pred3=predict(c3, test_scale)
mycm(pred3)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         1
##   virginica       0          2        14
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8667           0.9333
## Specificity                 1.0000            0.9667           0.9333
## Pos Pred Value              1.0000            0.9286           0.8750
## Neg Pred Value              1.0000            0.9355           0.9655
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2889           0.3111
## Detection Prevalence        0.3333            0.3111           0.3556
## Balanced Accuracy           1.0000            0.9167           0.9333

Tree

c4= rpart(Species ~ ., data=train_scale)
rpart.plot(c4)

pred4=max.col(predict(c4,test_scale))
pred4=mylabs[pred4]
mycm(pred4)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         14         1
##   virginica       0          1        14
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9556          
##                  95% CI : (0.8485, 0.9946)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9333          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9333           0.9333
## Specificity                 1.0000            0.9667           0.9667
## Pos Pred Value              1.0000            0.9333           0.9333
## Neg Pred Value              1.0000            0.9667           0.9667
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3111           0.3111
## Detection Prevalence        0.3333            0.3333           0.3333
## Balanced Accuracy           1.0000            0.9500           0.9500

Forest

c5=randomForest(Species~., data=train_scale)
plot(c5)

pred5=predict(c5,test_scale)
mycm(pred5)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         12         0
##   virginica       0          3        15
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8000           1.0000
## Specificity                 1.0000            1.0000           0.9000
## Pos Pred Value              1.0000            1.0000           0.8333
## Neg Pred Value              1.0000            0.9091           1.0000
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2667           0.3333
## Detection Prevalence        0.3333            0.2667           0.4000
## Balanced Accuracy           1.0000            0.9000           0.9500

Linear SVM

c6=svm(Species~., data=train_scale, kernel='linear')
plot(c6, test_scale, Petal.Length~Petal.Width, fill=TRUE)

pred6=predict(c6,test_scale)
mycm(pred6)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         1
##   virginica       0          2        14
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8667           0.9333
## Specificity                 1.0000            0.9667           0.9333
## Pos Pred Value              1.0000            0.9286           0.8750
## Neg Pred Value              1.0000            0.9355           0.9655
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2889           0.3111
## Detection Prevalence        0.3333            0.3111           0.3556
## Balanced Accuracy           1.0000            0.9167           0.9333

KNN

c7=class::knn(train_scale[, 1:4], test_scale[, 1:4], train_scale[,5], k=3)
pred7=predict(c6,test_scale)
mycm(pred7)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         13         1
##   virginica       0          2        14
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9333         
##                  95% CI : (0.8173, 0.986)
##     No Information Rate : 0.3333         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9            
##                                          
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.8667           0.9333
## Specificity                 1.0000            0.9667           0.9333
## Pos Pred Value              1.0000            0.9286           0.8750
## Neg Pred Value              1.0000            0.9355           0.9655
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.2889           0.3111
## Detection Prevalence        0.3333            0.3111           0.3556
## Balanced Accuracy           1.0000            0.9167           0.9333

K-Means (Unsupervised)

c8=kmeans(train_scale[, 1:4], 3, nstart = 25)
fviz_cluster(c8, data = train_scale[,1:4],palette = c("#2E9FDF", "#00AFBB", "#E7B800"), 
             geom = "point", ellipse.type = "convex", ggtheme = theme_bw())

pred8=predict(c8, test_scale[,1:4])
newlabs=c('versicolor','setosa','virginica') #reordered since order means nothing in kmeans
pred8=newlabs[pred8]
mycm(pred8)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa          0         11         5
##   versicolor      0          4        10
##   virginica      15          0         0
## 
## Overall Statistics
##                                           
##                Accuracy : 0.0889          
##                  95% CI : (0.0248, 0.2122)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 1               
##                                           
##                   Kappa : -0.3667         
##                                           
##  Mcnemar's Test P-Value : 9.537e-06       
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 0.0000           0.26667           0.0000
## Specificity                 0.4667           0.66667           0.5000
## Pos Pred Value              0.0000           0.28571           0.0000
## Neg Pred Value              0.4828           0.64516           0.5000
## Prevalence                  0.3333           0.33333           0.3333
## Detection Rate              0.0000           0.08889           0.0000
## Detection Prevalence        0.3556           0.31111           0.3333
## Balanced Accuracy           0.2333           0.46667           0.2500