Load Libraries and Inspect data
# Load Libraries
library(stats)
library(caret)
library(randomForest)
library(mlbench)
# Inspect data
data("iris")
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##1 5.1 3.5 1.4 0.2 setosa
##2 4.9 3.0 1.4 0.2 setosa
##3 4.7 3.2 1.3 0.2 setosa
##4 4.6 3.1 1.5 0.2 setosa
##5 5.0 3.6 1.4 0.2 setosa
##6 5.4 3.9 1.7 0.4 setosa
# Variables
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
Split data into Training and Testing
index <- sample(2,nrow(iris),replace = TRUE, prob=c(0.7,0.3))
#Training data
Training <- iris[index==1, ]
#Testing data
Testing <- iris[index==2, ]
Create Random Forest Model
rfm = randomForest(Species~., data = Training)
print(rfm)
## randomForest(formula = Species ~ ., data = Training)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
## OOB estimate of error rate: 2.97%
## Confusion matrix:
## setosa versicolor virginica class.error
## setosa 35 0 0 0.00000000
## versicolor 0 34 2 0.05555556
## virginica 0 1 29 0.03333333
Rank Variables
importance(rfm)
varImpPlot(rfm)
## MeanDecreaseGini
## Sepal.Length 6.064209
## Sepal.Width 1.479663
## Petal.Length 31.390687
## Petal.Width 27.506897
Prediction of Training data
# Create Confusion Matrix
pre1 <- predict(rfm,Training)
confusionMatrix(pre1, Training$Species)
## Confusion Matrix and Statistics
## Reference
## Prediction setosa versicolor virginica
## setosa 35 0 0
## versicolor 0 36 0
## virginica 0 0 30
## Overall Statistics
## Accuracy : 1
## 95% CI : (0.9641, 1)
## No Information Rate : 0.3564
## P-Value [Acc > NIR] : < 2.2e-16
## Kappa : 1
## Mcnemar's Test P-Value : NA
## Statistics by Class:
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 1.000
## Specificity 1.0000 1.0000 1.000
## Pos Pred Value 1.0000 1.0000 1.000
## Neg Pred Value 1.0000 1.0000 1.000
## Prevalence 0.3465 0.3564 0.297
## Detection Rate 0.3465 0.3564 0.297
## Detection Prevalence 0.3465 0.3564 0.297
## Balanced Accuracy 1.0000 1.0000 1.000
Tuning Parameters
predicted_table <- predict(rfm, Testing[,-5])
table(observed= Testing[,5], predicted = predicted_table)
## predicted
## observed setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 12 2
## virginica 0 1 19
Prediction of Testing data
pre2 <- predict(rfm,Testing)
confusionMatrix(pre2,Testing$Species)
## Confusion Matrix and Statistics
## Reference
## Prediction setosa versicolor virginica
## setosa 15 0 0
## versicolor 0 12 1
## virginica 0 2 19
## Overall Statistics
## Accuracy : 0.9388
## 95% CI : (0.8313, 0.9872)
## No Information Rate : 0.4082
## P-Value [Acc > NIR] : 5.007e-15
## Kappa : 0.9066
## Mcnemar's Test P-Value : NA
## Statistics by Class:
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 0.8571 0.9500
## Specificity 1.0000 0.9714 0.9310
## Pos Pred Value 1.0000 0.9231 0.9048
## Neg Pred Value 1.0000 0.9444 0.9643
## Prevalence 0.3061 0.2857 0.4082
## Detection Rate 0.3061 0.2449 0.3878
## Detection Prevalence 0.3061 0.2653 0.4286
## Balanced Accuracy 1.0000 0.9143 0.9405
``` Conclusion