Data mining supervised learning

Decision trees

Iris Classification

#install.packages("C50")
#install.packages("tree")
#install.packages("gmodels")
library(C50)
library(tree)
library(gmodels)
iris_setosa <-iris[iris$Species=="setosa",]
iris_versicolor <- iris[iris$Species=="versicolor",]
iris_virginica <- iris[iris$Species=="virginica",]
iris_train<-rbind(iris_setosa[1:25,],iris_versicolor[1:25,],iris_virginica[1:25,])
iris_test <- rbind(iris_setosa[26:50,],iris_versicolor[26:50,],iris_virginica[26:50,])
irisC5.0_train <- C5.0(iris_train[,-5],iris_train$Species)
plot(irisC5.0_train)

#View(iris_train)
mean(iris_train$Species==predict(irisC5.0_train,iris_train))
## [1] 0.9733333
predC5.0_test <- predict(irisC5.0_train,newdata=iris_test)
mean(predC5.0_test==iris_test$Species)
## [1] 0.9466667
CrossTable(iris_test$Species,predC5.0_test)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  75 
## 
##  
##                   | predC5.0_test 
## iris_test$Species |     setosa | versicolor |  virginica |  Row Total | 
## ------------------|------------|------------|------------|------------|
##            setosa |         25 |          0 |          0 |         25 | 
##                   |     33.333 |      9.000 |      7.667 |            | 
##                   |      1.000 |      0.000 |      0.000 |      0.333 | 
##                   |      1.000 |      0.000 |      0.000 |            | 
##                   |      0.333 |      0.000 |      0.000 |            | 
## ------------------|------------|------------|------------|------------|
##        versicolor |          0 |         24 |          1 |         25 | 
##                   |      8.333 |     25.000 |      5.797 |            | 
##                   |      0.000 |      0.960 |      0.040 |      0.333 | 
##                   |      0.000 |      0.889 |      0.043 |            | 
##                   |      0.000 |      0.320 |      0.013 |            | 
## ------------------|------------|------------|------------|------------|
##         virginica |          0 |          3 |         22 |         25 | 
##                   |      8.333 |      4.000 |     26.797 |            | 
##                   |      0.000 |      0.120 |      0.880 |      0.333 | 
##                   |      0.000 |      0.111 |      0.957 |            | 
##                   |      0.000 |      0.040 |      0.293 |            | 
## ------------------|------------|------------|------------|------------|
##      Column Total |         25 |         27 |         23 |         75 | 
##                   |      0.333 |      0.360 |      0.307 |            | 
## ------------------|------------|------------|------------|------------|
## 
## 
#iris_tree <- tree(Species~.,data=iris_train)