Example Iris Data
data(iris); library(ggplot2)
names(iris)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species"
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
Create training and test sets
inTrain <- createDataPartition(y=iris$Species, p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]
dim(training); dim(testing)
## [1] 105 5
## [1] 45 5
ris petal widths/sepal width
qplot(Petal.Width,Sepal.Width,colour=Species,data=training)

Iris petal widths/sepal width
library(caret)
modFit <- train(Species ~ .,method="rpart",data=training)
print(modFit$finalModel)
## n= 105
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 105 70 setosa (0.3333333 0.3333333 0.3333333)
## 2) Petal.Length< 2.6 35 0 setosa (1.0000000 0.0000000 0.0000000) *
## 3) Petal.Length>=2.6 70 35 versicolor (0.0000000 0.5000000 0.5000000)
## 6) Petal.Width< 1.65 38 4 versicolor (0.0000000 0.8947368 0.1052632) *
## 7) Petal.Width>=1.65 32 1 virginica (0.0000000 0.0312500 0.9687500) *
Plot tree
plot(modFit$finalModel, uniform=TRUE,
main="Classification Tree")
text(modFit$finalModel, use.n=TRUE, all=TRUE, cex=.8)

Prettier plots
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.1.0 Copyright (c) 2006-2017 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(modFit$finalModel)

Predicting new values
predict(modFit,newdata=testing)
## [1] setosa setosa setosa setosa setosa setosa
## [7] setosa setosa setosa setosa setosa setosa
## [13] setosa setosa setosa versicolor versicolor versicolor
## [19] versicolor versicolor versicolor versicolor versicolor virginica
## [25] versicolor versicolor versicolor versicolor versicolor versicolor
## [31] virginica virginica virginica virginica virginica virginica
## [37] virginica virginica virginica virginica virginica virginica
## [43] virginica virginica virginica
## Levels: setosa versicolor virginica