Random Forest Example - Iris Data
Iris data
data(iris); library(ggplot2)
inTrain <- createDataPartition(y=iris$Species,
p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]
Random forests
library(caret)
modFit <- train(Species~ .,data=training,method="rf",prox=TRUE)
modFit
## Random Forest
##
## 105 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9204943 0.8794656
## 3 0.9224275 0.8824119
## 4 0.9224560 0.8824836
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 4.
Getting a single tree
getTree(modFit$finalModel,k=2)
## left daughter right daughter split var split point status prediction
## 1 2 3 3 2.60 1 0
## 2 0 0 0 0.00 -1 1
## 3 4 5 4 1.75 1 0
## 4 6 7 3 5.45 1 0
## 5 8 9 1 5.95 1 0
## 6 10 11 1 4.95 1 0
## 7 0 0 0 0.00 -1 3
## 8 12 13 1 5.75 1 0
## 9 0 0 0 0.00 -1 3
## 10 0 0 0 0.00 -1 3
## 11 14 15 3 4.95 1 0
## 12 0 0 0 0.00 -1 3
## 13 0 0 0 0.00 -1 2
## 14 0 0 0 0.00 -1 2
## 15 16 17 4 1.55 1 0
## 16 0 0 0 0.00 -1 3
## 17 0 0 0 0.00 -1 2
Class “centers”
irisP <- classCenter(training[,c(3,4)], training$Species, modFit$finalModel$prox)
irisP <- as.data.frame(irisP); irisP$Species <- rownames(irisP)
p <- qplot(Petal.Width, Petal.Length, col=Species,data=training)
p + geom_point(aes(x=Petal.Width,y=Petal.Length,col=Species),size=5,shape=4,data=irisP)

Predicting new values
pred <- predict(modFit,testing); testing$predRight <- pred==testing$Species
table(pred,testing$Species)
##
## pred setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 0
## virginica 0 0 15
Predicting new values
qplot(Petal.Width,Petal.Length,colour=predRight,data=testing,main="newdata Predictions")
