suppressMessages(library(caret))
## Warning: package 'caret' was built under R version 3.6.3
library(ggplot2)
data("iris")
set.seed(430)
names(iris)<- tolower(names(iris))
table(iris$species)
##
## setosa versicolor virginica
## 50 50 50
index <- createDataPartition(y = iris$species ,p = 0.7, list = FALSE)
train.set <- iris[index,]
test.set <- iris[-index,]
dim(train.set)
## [1] 105 5
g <- ggplot(data = train.set)
g1 <- g + geom_point(aes(x=sepal.length, y = sepal.width, color = species),pch = 19)
g2 <- g+ geom_point(aes(x=petal.length, y = petal.width, color = species), pch = 19)
gridExtra::grid.arrange(g1,g2)
iris.tree = train(species ~ .,
data=train.set,
method="rpart",
trControl = trainControl(method = "cv"))
iris.tree
## CART
##
## 105 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 96, 94, 94, 94, 95, 95, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.0000000 0.9527273 0.9282660
## 0.4428571 0.7733333 0.6637422
## 0.5000000 0.3857576 0.1285714
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.
suppressMessages(library(rattle))
## Warning: package 'rattle' was built under R version 3.6.3
fancyRpartPlot(iris.tree$finalModel)
Error rate in Training set
iris.pred = predict(iris.tree)
table(iris.pred, train.set$species)
##
## iris.pred setosa versicolor virginica
## setosa 35 0 0
## versicolor 0 34 3
## virginica 0 1 32
iris.pred = predict(iris.tree, newdata = test.set)
table(iris.pred, test.set$species)
##
## iris.pred setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 2
## virginica 0 0 13
error.rate = round(mean(iris.pred != test.set$species),2)
error.rate
## [1] 0.04
Error rate is 2% with the classifiers like petal length and petal width.