Data Mining Supervised Learning

Decision Trees

IRIS Classification

#install.packages("C50")
#install.packages("tree")

library(C50)
## Warning: package 'C50' was built under R version 3.5.1
library(tree)
## Warning: package 'tree' was built under R version 3.5.1
#data()
data("iris")
#iris <- data(iris)


iris_setosa <- iris[iris$Species == "setosa",]
iris_versicolor <- iris[iris$Species == "versicolor",]
iris_virginica <- iris[iris$Species == "virginica",]

iris_train <- rbind(iris_setosa[1:25,], iris_versicolor[1:25,], iris_virginica[1:25,])
iris_test <- rbind(iris_setosa[26:50,], iris_versicolor[26:50,], iris_virginica[26:50,])

irisc5.0_train <- C5.0(iris_train[,-5], iris_train$Species)
plot(irisc5.0_train)

mean(iris_train$Species == predict(irisc5.0_train, iris_train))
## [1] 0.9733333
predc5.0_test <- predict(irisc5.0_train, newdata = iris_test)

mean(predc5.0_test == iris_test$Species)
## [1] 0.9466667
#install.packages("gmodels")

library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
CrossTable(iris_test$Species, predc5.0_test)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  75 
## 
##  
##                   | predc5.0_test 
## iris_test$Species |     setosa | versicolor |  virginica |  Row Total | 
## ------------------|------------|------------|------------|------------|
##            setosa |         25 |          0 |          0 |         25 | 
##                   |     33.333 |      9.000 |      7.667 |            | 
##                   |      1.000 |      0.000 |      0.000 |      0.333 | 
##                   |      1.000 |      0.000 |      0.000 |            | 
##                   |      0.333 |      0.000 |      0.000 |            | 
## ------------------|------------|------------|------------|------------|
##        versicolor |          0 |         24 |          1 |         25 | 
##                   |      8.333 |     25.000 |      5.797 |            | 
##                   |      0.000 |      0.960 |      0.040 |      0.333 | 
##                   |      0.000 |      0.889 |      0.043 |            | 
##                   |      0.000 |      0.320 |      0.013 |            | 
## ------------------|------------|------------|------------|------------|
##         virginica |          0 |          3 |         22 |         25 | 
##                   |      8.333 |      4.000 |     26.797 |            | 
##                   |      0.000 |      0.120 |      0.880 |      0.333 | 
##                   |      0.000 |      0.111 |      0.957 |            | 
##                   |      0.000 |      0.040 |      0.293 |            | 
## ------------------|------------|------------|------------|------------|
##      Column Total |         25 |         27 |         23 |         75 | 
##                   |      0.333 |      0.360 |      0.307 |            | 
## ------------------|------------|------------|------------|------------|
## 
##