Random Forest Example - Iris Data

Iris data

data(iris); library(ggplot2)
inTrain <- createDataPartition(y=iris$Species,
                              p=0.7, list=FALSE)
training <- iris[inTrain,]
testing <- iris[-inTrain,]

Random forests

library(caret)
modFit <- train(Species~ .,data=training,method="rf",prox=TRUE)
modFit
## Random Forest 
## 
## 105 samples
##   4 predictor
##   3 classes: 'setosa', 'versicolor', 'virginica' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##   2     0.9204943  0.8794656
##   3     0.9224275  0.8824119
##   4     0.9224560  0.8824836
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 4.

Getting a single tree

getTree(modFit$finalModel,k=2)
##    left daughter right daughter split var split point status prediction
## 1              2              3         3        2.60      1          0
## 2              0              0         0        0.00     -1          1
## 3              4              5         4        1.75      1          0
## 4              6              7         3        5.45      1          0
## 5              8              9         1        5.95      1          0
## 6             10             11         1        4.95      1          0
## 7              0              0         0        0.00     -1          3
## 8             12             13         1        5.75      1          0
## 9              0              0         0        0.00     -1          3
## 10             0              0         0        0.00     -1          3
## 11            14             15         3        4.95      1          0
## 12             0              0         0        0.00     -1          3
## 13             0              0         0        0.00     -1          2
## 14             0              0         0        0.00     -1          2
## 15            16             17         4        1.55      1          0
## 16             0              0         0        0.00     -1          3
## 17             0              0         0        0.00     -1          2

Class “centers”

irisP <- classCenter(training[,c(3,4)], training$Species, modFit$finalModel$prox)
irisP <- as.data.frame(irisP); irisP$Species <- rownames(irisP)
p <- qplot(Petal.Width, Petal.Length, col=Species,data=training)
p + geom_point(aes(x=Petal.Width,y=Petal.Length,col=Species),size=5,shape=4,data=irisP)

Predicting new values

pred <- predict(modFit,testing); testing$predRight <- pred==testing$Species
table(pred,testing$Species)
##             
## pred         setosa versicolor virginica
##   setosa         15          0         0
##   versicolor      0         15         0
##   virginica       0          0        15

Predicting new values

qplot(Petal.Width,Petal.Length,colour=predRight,data=testing,main="newdata Predictions")