Sample data: iris
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
Split data to Training data and testing data (70% training 30% tessting)
ind <- sample(2,nrow(iris), replace=TRUE, prob=c(0.7,0.3))
trainData <- iris[ind==1,]
testData <- iris[ind==2,]
Using library randomforest
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
Generate learning tree
iris_rf <- randomForest(Species~.,data=trainData,ntree=100,proximity=TRUE)
table(predict(iris_rf),trainData$Species)
##
## setosa versicolor virginica
## setosa 34 0 0
## versicolor 0 35 3
## virginica 0 3 35
print(iris_rf)
##
## Call:
## randomForest(formula = Species ~ ., data = trainData, ntree = 100, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 100
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 5.45%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 34 0 0 0.00000000
## versicolor 0 35 3 0.07894737
## virginica 0 3 35 0.07894737
plot(iris_rf)
importance(iris_rf)
## MeanDecreaseGini
## Sepal.Length 5.066282
## Sepal.Width 1.798066
## Petal.Length 34.160173
## Petal.Width 31.444874
varImpPlot(iris_rf)
Build random forest for testing data
irisPred<-predict(iris_rf,newdata=testData)
table(irisPred, testData$Species)
##
## irisPred setosa versicolor virginica
## setosa 16 0 0
## versicolor 0 11 0
## virginica 0 1 12
See the margin, positve or negative
plot(margin(iris_rf,testData$Species))
Tune Random Forest
tune.rf <- tuneRF(iris[,-5],iris[,5], stepFactor=0.5)
## mtry = 2 OOB error = 4.67%
## Searching left ...
## mtry = 4 OOB error = 4.67%
## 0 0.05
## Searching right ...
## mtry = 1 OOB error = 4%
## 0.1428571 0.05
## Warning in randomForest.default(x, y, mtry = mtryCur, ntree = ntreeTry, :
## invalid mtry: reset to within valid range
## mtry = 0 OOB error = 5.33%
## -0.3333333 0.05
## Warning in xy.coords(x, y, xlabel, ylabel, log): 1 x value <= 0 omitted
## from logarithmic plot
print(tune.rf)
## mtry OOBError
## 0.OOB 0 0.05333333
## 1.OOB 1 0.04000000
## 2.OOB 2 0.04666667
## 4.OOB 4 0.04666667