data(iris)
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(caTools)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
split <- sample.split(iris, SplitRatio = 0.7)
split
## [1] TRUE FALSE TRUE FALSE TRUE
train <- subset(iris, split == "TRUE")
test <- subset(iris, split == "FALSE")
set.seed(120) # Setting seed
classifier_RF = randomForest(x = train[-5],
y = train$Species,
ntree = 500)
classifier_RF
##
## Call:
## randomForest(x = train[-5], y = train$Species, ntree = 500)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 4.44%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 30 0 0 0.00000000
## versicolor 0 28 2 0.06666667
## virginica 0 2 28 0.06666667
y_pred = predict(classifier_RF, newdata = test[-5])
confusion_mtx = table(test[, 5], y_pred)
confusion_mtx
## y_pred
## setosa versicolor virginica
## setosa 20 0 0
## versicolor 0 19 1
## virginica 0 3 17
plot(classifier_RF)

importance(classifier_RF)
## MeanDecreaseGini
## Sepal.Length 5.906138
## Sepal.Width 1.199741
## Petal.Length 29.700240
## Petal.Width 22.501569
varImpPlot(classifier_RF)
