library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(klaR)
## Loading required package: MASS
data("iris")
#define an 80%/20%/ test split of the dataset
trainindex <- createDataPartition(iris$Species, p=0.80, list = FALSE)
datatrain<-iris[trainindex,]
datatest<-iris[-trainindex,]
#train a naive bayes model
fit<- NaiveBayes(Species~., data=datatest)
#make predictions
predictions<-predict (fit,datatest[,1:4])
#summarize result
confusionMatrix(predictions$class, datatest$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 9 1
## virginica 0 1 9
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.7793, 0.9918)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 8.747e-12
##
## Kappa : 0.9
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.9000 0.9000
## Specificity 1.0000 0.9500 0.9500
## Pos Pred Value 1.0000 0.9000 0.9000
## Neg Pred Value 1.0000 0.9500 0.9500
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3000 0.3000
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 0.9250 0.9250
#define training control
traincontrol<-trainControl(method = "boot",number = 100)
#evaluate the model
fit<-train(Species~., data=iris,trControl=traincontrol,method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Bootstrapped (100 reps)
## Summary of sample sizes: 150, 150, 150, 150, 150, 150, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9488169 0.9224679
## TRUE 0.9521251 0.9274750
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
# cross validation
#define training control
traincontrol<-trainControl(method = "cv",number = 10)
#evaluate the model
fit<-train(Species~., data=iris,trControl=traincontrol,method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9466667 0.92
## TRUE 0.9533333 0.93
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
# repeated cross valitation
traincontrol<-trainControl(method = "repeatedcv",number = 10, repeats = 3)
#evaluate the model
fit<-train(Species~., data=iris,trControl=traincontrol,method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.9300000
## TRUE 0.9555556 0.9333333
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
# Loocv
traincontrol<-trainControl(method = "LOOCV")
#evaluate the model
fit<-train(Species~., data=iris,trControl=traincontrol,method="nb")
print(fit)
## Naive Bayes
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation
## Summary of sample sizes: 149, 149, 149, 149, 149, 149, ...
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE 0.9533333 0.93
## TRUE 0.9600000 0.94
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.