Using the ‘iris’ dataset, we create training and datasets using the createDataPartition function in the caret package.
data(iris);library(ggplot2);library(caret)
## Loading required package: lattice
inTrain<-createDataPartition(y=iris$Species,p=0.7,list=FALSE)
training<-iris[inTrain,]
testing<-iris[-inTrain,]
We then fit a model on the training set using random forests.
modFit<-train(Species~.,data=training,method="rf",prox=TRUE)
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
modFit
## Random Forest
##
## 105 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Bootstrapped (25 reps)
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa Accuracy SD Kappa SD
## 2 0.9573069 0.9351308 0.02962483 0.04479868
## 3 0.9603108 0.9396813 0.03162919 0.04780683
## 4 0.9581454 0.9365125 0.03945046 0.05957527
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 3.
getTree(modFit$finalModel,k=2)
## left daughter right daughter split var split point status prediction
## 1 2 3 4 0.80 1 0
## 2 0 0 0 0.00 -1 1
## 3 4 5 3 4.75 1 0
## 4 0 0 0 0.00 -1 2
## 5 6 7 3 4.95 1 0
## 6 8 9 2 3.05 1 0
## 7 10 11 1 6.05 1 0
## 8 12 13 1 6.50 1 0
## 9 0 0 0 0.00 -1 2
## 10 14 15 4 1.70 1 0
## 11 0 0 0 0.00 -1 3
## 12 0 0 0 0.00 -1 3
## 13 0 0 0 0.00 -1 2
## 14 0 0 0 0.00 -1 2
## 15 0 0 0 0.00 -1 3
irisP<-classCenter(training[,c(3,4)],training$Species,modFit$finalModel$prox)
irisP<-as.data.frame(irisP);irisP$Species<-rownames(irisP)
p<-qplot(Petal.Width,Petal.Length,col=Species,data=training)
p+geom_point(aes(x=Petal.Width,y=Petal.Length,col=Species),size=5,shape=4,data=irisP)
pred<-predict(modFit,testing);testing$predRight<-pred==testing$Species
table(pred,testing$Species)
##
## pred setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 15 2
## virginica 0 0 13
qplot(Petal.Width,Petal.Length,colour=predRight,data=testing,main="newdata Predictions")