require(caret)
## Loading required package: caret
## Warning: package 'caret' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
require(class)
## Loading required package: class
require(dbscan)
## Loading required package: dbscan
## Warning: package 'dbscan' was built under R version 4.3.2
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
require(e1071)
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 4.3.2
require(factoextra)
## Loading required package: factoextra
## Warning: package 'factoextra' was built under R version 4.3.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
require(fdm2id)
## Loading required package: fdm2id
## Warning: package 'fdm2id' was built under R version 4.3.2
## Loading required package: arules
## Warning: package 'arules' was built under R version 4.3.2
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.3.2
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
## Loading required package: arulesViz
## Warning: package 'arulesViz' was built under R version 4.3.2
## Loading required package: FactoMineR
## Warning: package 'FactoMineR' was built under R version 4.3.2
##
## Attaching package: 'fdm2id'
## The following objects are masked from 'package:FactoMineR':
##
## CA, MCA, PCA
## The following object is masked from 'package:class':
##
## SOM
require(ggpubr)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.3.2
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.3.2
## Error: package or namespace load failed for 'kableExtra':
## .onLoad failed in loadNamespace() for 'kableExtra', details:
## call: !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output %in%
## error: 'length = 3' in coercion to 'logical(1)'
require(keras)
## Loading required package: keras
## Warning: package 'keras' was built under R version 4.3.2
require(neuralnet)
## Loading required package: neuralnet
## Warning: package 'neuralnet' was built under R version 4.3.2
require(nnet)
## Loading required package: nnet
require(parsnip)
## Loading required package: parsnip
## Warning: package 'parsnip' was built under R version 4.3.2
##
## Attaching package: 'parsnip'
## The following object is masked from 'package:e1071':
##
## tune
require(psych)
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.3.2
##
## Attaching package: 'psych'
## The following object is masked from 'package:fdm2id':
##
## kaiser
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
require(randomForest)
## Loading required package: randomForest
## Warning: package 'randomForest' was built under R version 4.3.2
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:psych':
##
## outlier
## The following object is masked from 'package:ggplot2':
##
## margin
require(rpart)
## Loading required package: rpart
## Warning: package 'rpart' was built under R version 4.3.2
require(rpart.plot)
## Loading required package: rpart.plot
## Warning: package 'rpart.plot' was built under R version 4.3.2
myprint=function(x){print(x)}
mycite=function(x){citation(x)}
mycm=function(x) confusionMatrix(as.factor(x), test_scale$Species)
Stratified
set.seed(1234)
p1=sample(1:50,.7*50) #setosa
p2=sample(51:100,.7*50) #versicolor
p3=sample(101:150, .7*50) #virginica
mys=c(p1,p2,p3)
train=iris[mys,]
test=iris[-mys,]
print(noquote(c("Size of Train/Test:",nrow(train), nrow(test))))
## [1] Size of Train/Test: 105 45
train_scale=as.data.frame(scale(train[,1:4]))
train_scale$Species=train$Species
test_scale=as.data.frame(scale(test[,1:4]))
test_scale$Species=test$Species
myprint(describe(train_scale[,1:4]))
## vars n mean sd median trimmed mad min max range skew
## Sepal.Length 1 105 0 1 -0.06 -0.04 1.07 -1.85 2.46 4.32 0.31
## Sepal.Width 2 105 0 1 -0.10 -0.04 0.70 -1.98 3.19 5.17 0.48
## Petal.Length 3 105 0 1 0.31 0.00 1.09 -1.50 1.78 3.28 -0.25
## Petal.Width 4 105 0 1 0.15 -0.03 1.38 -1.45 1.74 3.19 -0.09
## kurtosis se
## Sepal.Length -0.60 0.1
## Sepal.Width 0.43 0.1
## Petal.Length -1.43 0.1
## Petal.Width -1.33 0.1
myprint(table(train_scale[,5]))
##
## setosa versicolor virginica
## 35 35 35
c1=naiveBayes(Species~., data=train_scale)
pred1=predict(c1,test_scale)
mycm(pred1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
c2=neuralnet(Species~., data=train_scale, hidden=c(4))
plot(c2,rep = "best")
pred2=max.col(predict(c2, test_scale))
mylabs=c('setosa','versicolor','virginica')
pred2=mylabs[pred2]
mycm(pred2)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
c3=nnet::multinom(Species~., data=train_scale)
## # weights: 18 (10 variable)
## initial value 115.354290
## iter 10 value 7.746044
## iter 20 value 0.047736
## iter 30 value 0.009575
## iter 40 value 0.007000
## iter 50 value 0.003940
## iter 60 value 0.003405
## iter 70 value 0.002791
## iter 80 value 0.002252
## iter 90 value 0.002068
## iter 100 value 0.001854
## final value 0.001854
## stopped after 100 iterations
pred3=predict(c3, test_scale)
mycm(pred3)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
c4= rpart(Species ~ ., data=train_scale)
rpart.plot(c4)
pred4=max.col(predict(c4,test_scale))
pred4=mylabs[pred4]
mycm(pred4)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 14 1
## virginica 0 1 14
##
## Overall Statistics
##
## Accuracy : 0.9556
## 95% CI : (0.8485, 0.9946)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9333
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9333 0.9333
## Specificity 1.0000 0.9667 0.9667
## Pos Pred Value 1.0000 0.9333 0.9333
## Neg Pred Value 1.0000 0.9667 0.9667
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3111 0.3111
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 0.9500 0.9500
c5=randomForest(Species~., data=train_scale)
plot(c5)
pred5=predict(c5,test_scale)
mycm(pred5)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 12 0
## virginica 0 3 15
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8000 1.0000
## Specificity 1.0000 1.0000 0.9000
## Pos Pred Value 1.0000 1.0000 0.8333
## Neg Pred Value 1.0000 0.9091 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2667 0.3333
## Detection Prevalence 0.3333 0.2667 0.4000
## Balanced Accuracy 1.0000 0.9000 0.9500
c6=svm(Species~., data=train_scale, kernel='linear')
plot(c6, test_scale, Petal.Length~Petal.Width, fill=TRUE)
pred6=predict(c6,test_scale)
mycm(pred6)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
c7=class::knn(train_scale[, 1:4], test_scale[, 1:4], train_scale[,5], k=3)
pred7=predict(c6,test_scale)
mycm(pred7)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 13 1
## virginica 0 2 14
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8173, 0.986)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8667 0.9333
## Specificity 1.0000 0.9667 0.9333
## Pos Pred Value 1.0000 0.9286 0.8750
## Neg Pred Value 1.0000 0.9355 0.9655
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.2889 0.3111
## Detection Prevalence 0.3333 0.3111 0.3556
## Balanced Accuracy 1.0000 0.9167 0.9333
c8=kmeans(train_scale[, 1:4], 3, nstart = 25)
fviz_cluster(c8, data = train_scale[,1:4],palette = c("#2E9FDF", "#00AFBB", "#E7B800"),
geom = "point", ellipse.type = "convex", ggtheme = theme_bw())
pred8=predict(c8, test_scale[,1:4])
newlabs=c('versicolor','setosa','virginica') #reordered since order means nothing in kmeans
pred8=newlabs[pred8]
mycm(pred8)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 0 11 5
## versicolor 0 4 10
## virginica 15 0 0
##
## Overall Statistics
##
## Accuracy : 0.0889
## 95% CI : (0.0248, 0.2122)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.3667
##
## Mcnemar's Test P-Value : 9.537e-06
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 0.0000 0.26667 0.0000
## Specificity 0.4667 0.66667 0.5000
## Pos Pred Value 0.0000 0.28571 0.0000
## Neg Pred Value 0.4828 0.64516 0.5000
## Prevalence 0.3333 0.33333 0.3333
## Detection Rate 0.0000 0.08889 0.0000
## Detection Prevalence 0.3556 0.31111 0.3333
## Balanced Accuracy 0.2333 0.46667 0.2500