Using Random Forest
library(randomForest)
## Warning: package 'randomForest' was built under R version 3.4.4
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
data(iris)
#View(iris)
# Splitting data into training and testing. As the species are in order
# splitting the data based on species
iris_setosa <- iris[iris$Species=="setosa",] # 50
iris_versicolor <- iris[iris$Species=="versicolor",] # 50
iris_virginica <- iris[iris$Species=="virginica",] # 50
iris_train <- rbind(iris_setosa[1:35,],iris_versicolor[1:35,],iris_virginica[1:35,])
iris_test <- rbind(iris_setosa[36:50,],iris_versicolor[36:50,],iris_virginica[36:50,])
# Building a random forest model on training data
#attach(iris_train)
fit.forest <- randomForest(iris_train$Species ~ .,data=iris_train)
predict(fit.forest,iris_test) # 100% accuracy
## 36 37 38 39 40 41
## setosa setosa setosa setosa setosa setosa
## 42 43 44 45 46 47
## setosa setosa setosa setosa setosa setosa
## 48 49 50 86 87 88
## setosa setosa setosa versicolor versicolor versicolor
## 89 90 91 92 93 94
## versicolor versicolor versicolor versicolor versicolor versicolor
## 95 96 97 98 99 100
## versicolor versicolor versicolor versicolor versicolor versicolor
## 136 137 138 139 140 141
## virginica virginica virginica versicolor virginica virginica
## 142 143 144 145 146 147
## virginica virginica virginica virginica virginica virginica
## 148 149 150
## virginica virginica virginica
## Levels: setosa versicolor virginica
# Training accuracy
mean(iris_train$Species == predict(fit.forest,iris_train)) # 100% accuracy
## [1] 1
# Predicting test data
pred_test <- predict(fit.forest,newdata=iris_test)
mean(pred_test==iris_test$Species) # Accuracy = 100 %
## [1] 0.9777778