This solution explains how iris data can be explored and used with naive bayes theory to predict the species.

Load the required package

library('TSA')
## Warning: package 'TSA' was built under R version 3.4.3
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 3.4.3
## Loading required package: locfit
## Warning: package 'locfit' was built under R version 3.4.3
## locfit 1.5-9.1    2013-03-22
## Loading required package: mgcv
## Warning: package 'mgcv' was built under R version 3.4.3
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 3.4.3
## This is mgcv 1.8-23. For overview type 'help("mgcv-package")'.
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.4.3
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library('forecast')
## Warning: package 'forecast' was built under R version 3.4.3
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
## 
##     getResponse
library('tseries')
library('ggplot2') # visualization
library('ggthemes') # visualization
## Warning: package 'ggthemes' was built under R version 3.4.3
library('scales') # visualization
library('dplyr') # data manipulation
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('mice') # imputation
## Warning: package 'mice' was built under R version 3.4.3
## Loading required package: lattice
library('randomForest') # classification algorithm
## Warning: package 'randomForest' was built under R version 3.4.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
library('rpart') # for decision tree
## Warning: package 'rpart' was built under R version 3.4.3
library('ROCR')
## Warning: package 'ROCR' was built under R version 3.4.3
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
# library('ROCR')
# library('randomForest')
# library('corrr')
# library('corrplot')
# library('glue')
# library('caTools')
# library('data.table')
# require("knitr")
# require("geosphere")
# require("gmapsdistance")
require("tidyr")
## Loading required package: tidyr
## Warning: package 'tidyr' was built under R version 3.4.3
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:mice':
## 
##     complete
library('corrplot')
## Warning: package 'corrplot' was built under R version 3.4.3
## corrplot 0.84 loaded
#source("distance.R")
library('car')
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library('caret')
## Warning: package 'caret' was built under R version 3.4.3
library('gclus')
## Loading required package: cluster
library('MASS')
## Warning: package 'MASS' was built under R version 3.4.3
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library('ggcorrplot')
## Warning: package 'ggcorrplot' was built under R version 3.4.3
library('cluster')
library('caTools')
## Warning: package 'caTools' was built under R version 3.4.3
library('rpart')
library('rpart.plot')
## Warning: package 'rpart.plot' was built under R version 3.4.3
library('rattle')
## Warning: package 'rattle' was built under R version 3.4.3
## Rattle: A free graphical interface for data science with R.
## Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
## 
## Attaching package: 'rattle'
## The following object is masked from 'package:randomForest':
## 
##     importance
library('RColorBrewer')
library('data.table')
## Warning: package 'data.table' was built under R version 3.4.3
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library('ROCR')
library('purrr')
## Warning: package 'purrr' was built under R version 3.4.3
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:data.table':
## 
##     transpose
## The following object is masked from 'package:caret':
## 
##     lift
## The following object is masked from 'package:car':
## 
##     some
## The following object is masked from 'package:scales':
## 
##     discard
library('tidyr')
library('ggplot2')
library('dummies')
## dummies-1.5.6 provided by Decision Patterns
library('corrplot')
library('usdm')
## Warning: package 'usdm' was built under R version 3.4.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.4.3
## Loading required package: raster
## 
## Attaching package: 'raster'
## The following object is masked from 'package:data.table':
## 
##     shift
## The following objects are masked from 'package:MASS':
## 
##     area, select
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:nlme':
## 
##     getData
## 
## Attaching package: 'usdm'
## The following object is masked from 'package:car':
## 
##     vif
## The following object is masked from 'package:nlme':
## 
##     Variogram
library('e1071')
## Warning: package 'e1071' was built under R version 3.4.3
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:raster':
## 
##     interpolate
## The following objects are masked from 'package:TSA':
## 
##     kurtosis, skewness
library('ElemStatLearn')
## Warning: package 'ElemStatLearn' was built under R version 3.4.3

Including Plots

You can also embed plots, for example:

mydata <- read.csv('iris.csv')
summary(mydata)
##   sepal.length    sepal.width     petal.length    petal.width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.550   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.300   Median :1.300  
##  Mean   :5.839   Mean   :3.058   Mean   :3.741   Mean   :1.191  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##         class   
##  setosa    :51  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
table(mydata$class)
## 
##     setosa versicolor  virginica 
##         51         50         50

Split the data into training and test data

#Partitioning Data Sets
#Partition train and val
#We will use this throughout so that samples are comparable
set.seed(3451)
pd<-sample(2,nrow(mydata),replace=TRUE, prob=c(0.7,0.3))
train<-mydata[pd==1,]
val<-mydata[pd==2,]

Doing a simple plot here to understand the data nature

qplot(train$sepal.length, train$petal.width, colour = train$class, data=train)

Separating the Predictor variables into matrix formar

train.2fact<-train[,c(1,3,5)]
val.2fact<-val[,c(1,3,5)]

Use Naive Bayes theory to calculate the predictor

NB.1<-naiveBayes(x=train.2fact[-3], y=train.2fact$class)
class(NB.1)
## [1] "naiveBayes"
summary(NB.1)
##         Length Class  Mode     
## apriori 3      table  numeric  
## tables  2      -none- list     
## levels  3      -none- character
## call    3      -none- call
print(NB.1)
## 
## Naive Bayes Classifier for Discrete Predictors
## 
## Call:
## naiveBayes.default(x = train.2fact[-3], y = train.2fact$class)
## 
## A-priori probabilities:
## train.2fact$class
##     setosa versicolor  virginica 
##  0.3454545  0.3272727  0.3272727 
## 
## Conditional probabilities:
##                  sepal.length
## train.2fact$class     [,1]      [,2]
##        setosa     5.007895 0.3096481
##        versicolor 5.841667 0.4912811
##        virginica  6.605556 0.6973191
## 
##                  petal.length
## train.2fact$class     [,1]      [,2]
##        setosa     1.465789 0.1863973
##        versicolor 4.177778 0.4691093
##        virginica  5.580556 0.5746151

Predict the category for the training data

#pedict
y_pred<-predict(NB.1,newdata=val.2fact[-3])
val.2fact$SpeciesPredicted <- y_pred
length(val.2fact$Species)
## [1] 41
length(val.2fact$SpeciesPredicted)
## [1] 41
#Confusion matrix
CofusionIris <- table(actualclass=val.2fact$Species, predictedclass=val.2fact$SpeciesPredicted)
CofusionIrismatrix <- confusionMatrix(CofusionIris)
print(CofusionIrismatrix)
## Confusion Matrix and Statistics
## 
##             predictedclass
## actualclass  setosa versicolor virginica
##   setosa         13          0         0
##   versicolor      0         12         0
##   virginica       0          0        16
## 
## Overall Statistics
##                                     
##                Accuracy : 1         
##                  95% CI : (0.914, 1)
##     No Information Rate : 0.3902    
##     P-Value [Acc > NIR] : < 2.2e-16 
##                                     
##                   Kappa : 1         
##  Mcnemar's Test P-Value : NA        
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           1.0000
## Specificity                 1.0000            1.0000           1.0000
## Pos Pred Value              1.0000            1.0000           1.0000
## Neg Pred Value              1.0000            1.0000           1.0000
## Prevalence                  0.3171            0.2927           0.3902
## Detection Rate              0.3171            0.2927           0.3902
## Detection Prevalence        0.3171            0.2927           0.3902
## Balanced Accuracy           1.0000            1.0000           1.0000