d = iris
library(caTools)
library(rpart)
library(rpart.plot)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
# Splitting
set.seed(20) # Setting the seed
sample =sample.split(d$Species, SplitRatio = .70)
Train=subset(d, sample==TRUE)
Test=subset(d, sample==FALSE)
# Training the decision Tree classifier
Tree <- rpart(Species~., data=Train,cp=0.2)# The dot reps all other variables
# Prediction
Tree.Species.pred = predict(Tree,Test,type = c( "class"))
Tree.Species.pred = predict(Tree,Test,type = c( "class"),
na.action = na.pass)
# Confusion matrix to evaluate the model
# confusionMatrix(your predicted test, the actual values)
confusionMatrix(Tree.Species.pred,Test$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 2
## virginica 0 0 13
##
## Overall Statistics
##
## Accuracy : 0.9556
## 95% CI : (0.8485, 0.9946)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9333
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.8667
## Specificity 1.0000 0.9333 1.0000
## Pos Pred Value 1.0000 0.8824 1.0000
## Neg Pred Value 1.0000 1.0000 0.9375
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.2889
## Detection Prevalence 0.3333 0.3778 0.2889
## Balanced Accuracy 1.0000 0.9667 0.9333
# Visualizing the decision Tree
rpart.plot(Tree,
main='using rpart',border.col='purple',
box.col=c('green','blue','orange'),
split.box.col='yellow',
split.border.col='red',
split.round=1,
round=T,
leaf.round = 1,lwd=2)

# Visualizing the decision Tree
prp(Tree,
main='using prp',border.col='purple',
box.col=c('green','blue','orange'),
split.box.col='yellow',
split.border.col='red',
split.round=1,
round=T,
leaf.round = 1,lwd=2)

rpart.plot(Tree)
