Iris Data - ANNetwork
- Debora Drucker,
- Jorge Gomez,
- Henrique Mello,
- Juliano Melis
Objective
- Improve Artificial Neural Network model to classify Iris species
First Step
- It is important to make a F/T data to perform this classification procedure
data(iris)
iris$setosa<-iris$Species=='setosa'
iris$virginica<-iris$Species=='virginica'
iris$versicolor<-iris$Species=='versicolor'
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species setosa
## 1 5.1 3.5 1.4 0.2 setosa TRUE
## 2 4.9 3.0 1.4 0.2 setosa TRUE
## 3 4.7 3.2 1.3 0.2 setosa TRUE
## 4 4.6 3.1 1.5 0.2 setosa TRUE
## 5 5.0 3.6 1.4 0.2 setosa TRUE
## 6 5.4 3.9 1.7 0.4 setosa TRUE
## virginica versicolor
## 1 FALSE FALSE
## 2 FALSE FALSE
## 3 FALSE FALSE
## 4 FALSE FALSE
## 5 FALSE FALSE
## 6 FALSE FALSE
Second Step
- Separate a
train & a test dataset
id_train <- sample(x = nrow(iris), size = nrow(iris)*0.5)
iris_train <- iris[id_train,]
iris_test <- iris[-id_train,]
table(iris_test$Species)
##
## setosa versicolor virginica
## 24 25 26
table(iris_train$Species)
##
## setosa versicolor virginica
## 26 25 24
Third step
- As performed by Raphael in the class
colnames(iris_test)
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
## [5] "Species" "setosa" "virginica" "versicolor"
if(!require(neuralnet)){install.packages('neuralnet')}
## Loading required package: neuralnet
## Warning: package 'neuralnet' was built under R version 3.3.3
iris_net <- neuralnet(setosa + versicolor + virginica ~
Sepal.Width + Sepal.Length,
data=iris_train, hidden = 3,
err.fct = 'ce', linear.output = F,
lifesign = 'minimal', # 'none', 'full' or 'minimal'
rep=2, stepmax = 1e+06) # read help
## hidden: 3 thresh: 0.01 rep: 1/2 steps: 192353 error: 18.92344 time: 28.19 secs
## hidden: 3 thresh: 0.01 rep: 2/2 steps: 231344 error: 24.67674 time: 34.16 secs
plot(iris_net, rep='best')

Checking the model
- Performing a confusion matrix
iris_prediction <- compute(iris_net,
iris_test[, c("Sepal.Width","Sepal.Length")])
idx <- apply(iris_prediction$net.result, 1, FUN=which.max)
predicted<-c('setosa','virginica','versicolor')[idx]
table(predicted, iris_test$Species)
##
## predicted setosa versicolor virginica
## setosa 23 1 0
## versicolor 0 11 16
## virginica 1 13 10
Verifying the dataset
## Loading required package: ggplot2

Verifying the dataset

Verifying the dataset

Verifying the dataset

Improving the model
iris_net1 <- neuralnet(setosa + versicolor + virginica ~
Sepal.Width + Sepal.Length+ Petal.Width + Petal.Length,
data=iris_train, hidden = 3,
err.fct = 'ce', linear.output = F,
lifesign = 'minimal', # 'none', 'full' or 'minimal'
rep=2, stepmax = 1e+06) # read help
## hidden: 3 thresh: 0.01 rep: 1/2 steps: 13671 error: 0.00109 time: 1.99 secs
## hidden: 3 thresh: 0.01 rep: 2/2 steps: 110721 error: 0.00418 time: 15.86 secs
Improving the model
iris_prediction <- compute(iris_net1,
iris_test[, c("Sepal.Width","Sepal.Length","Petal.Width","Petal.Length")])
idx <- apply(iris_prediction$net.result, 1, FUN=which.max)
predicted<-c('setosa','versicolor','virginica')[idx]
table(predicted, iris_test$Species)
##
## predicted setosa versicolor virginica
## setosa 24 0 0
## versicolor 0 22 12
## virginica 0 3 14
Improving the model
plot(iris_net1, rep='best')

Improving the model
iris_net2 <- neuralnet(setosa + versicolor + virginica ~
Sepal.Width + Sepal.Length+ Petal.Width + Petal.Length,
data=iris_train, hidden = c(2,10),
err.fct = 'ce', linear.output = F,
lifesign = 'minimal', # 'none', 'full' or 'minimal'
rep=2, stepmax = 1e+06) # read help
## hidden: 2, 10 thresh: 0.01 rep: 1/2 steps: 25582 error: 0.00017 time: 6.33 secs
## hidden: 2, 10 thresh: 0.01 rep: 2/2 steps: 9086 error: 0.00073 time: 2.26 secs
Improving the model
iris_prediction <- compute(iris_net2,
iris_test[, c("Sepal.Width","Sepal.Length","Petal.Width","Petal.Length")])
idx <- apply(iris_prediction$net.result, 1, FUN=which.max)
predicted<-c('setosa','versicolor','virginica')[idx]
table(predicted, iris_test$Species)
##
## predicted setosa versicolor virginica
## setosa 24 1 0
## versicolor 0 22 10
## virginica 0 2 16
Improving the model
plot(iris_net2, rep='best')

Improving the model
- Giving up. Let’s try LDA!
require(MASS)
## Loading required package: MASS
iris_lda <- lda(Species~Sepal.Width + Sepal.Length+ Petal.Width + Petal.Length,data=iris_train)
iris_lda_prediction<-predict(iris_lda,iris_test)$posterior
id_lda<-apply(iris_lda_prediction, 1, FUN=which.max)
predicted_lda<-c('setosa','versicolor','virginica')[id_lda]
Improving the model
table(predicted_lda, iris_test$Species)
##
## predicted_lda setosa versicolor virginica
## setosa 24 0 0
## versicolor 0 25 5
## virginica 0 0 21
Conclusion
- Old methods might be useful sometimes…