library(C50)
# Creating a subsets of data
iris_setosa <- iris[iris$Species=="setosa",]
iris_versicolor <- iris[iris$Species=="versicolor",]
iris_virginica <- iris[iris$Species=="virginica",]
iris_train <- rbind(iris_setosa[1:30,],iris_versicolor[1:30,],iris_virginica[1:30,])
iris_test <- rbind(iris_setosa[31:50,],iris_versicolor[31:50,],iris_virginica[31:50,])
# Creating decision tree
iris_c5.0 <- C5.0(iris_train[,-5],iris_train$Species)
# ploting the decision tree
plot(iris_c5.0)

mean(iris_train$Species==predict(iris_c5.0,iris_train))
## [1] 0.9777778
predict_test_5.0 <- predict(iris_c5.0,iris_test)
mean(iris_test$Species==predict_test_5.0)
## [1] 0.9333333
library(gmodels)
# Checking the accuracy level
CrossTable(iris_test$Species, predict_test_5.0)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 60
##
##
## | predict_test_5.0
## iris_test$Species | setosa | versicolor | virginica | Row Total |
## ------------------|------------|------------|------------|------------|
## setosa | 20 | 0 | 0 | 20 |
## | 26.667 | 7.333 | 6.000 | |
## | 1.000 | 0.000 | 0.000 | 0.333 |
## | 1.000 | 0.000 | 0.000 | |
## | 0.333 | 0.000 | 0.000 | |
## ------------------|------------|------------|------------|------------|
## versicolor | 0 | 19 | 1 | 20 |
## | 6.667 | 18.561 | 4.167 | |
## | 0.000 | 0.950 | 0.050 | 0.333 |
## | 0.000 | 0.864 | 0.056 | |
## | 0.000 | 0.317 | 0.017 | |
## ------------------|------------|------------|------------|------------|
## virginica | 0 | 3 | 17 | 20 |
## | 6.667 | 2.561 | 20.167 | |
## | 0.000 | 0.150 | 0.850 | 0.333 |
## | 0.000 | 0.136 | 0.944 | |
## | 0.000 | 0.050 | 0.283 | |
## ------------------|------------|------------|------------|------------|
## Column Total | 20 | 22 | 18 | 60 |
## | 0.333 | 0.367 | 0.300 | |
## ------------------|------------|------------|------------|------------|
##
##