Rede Neural com o banco de dados IRIS usando o pacote Neuralnet

library('neuralnet')

Primeiro vamos criar uma cópia dos dados iris e fazer a binarização das colunas.

myiris = iris
myiris = cbind(myiris,myiris$Species == 'setosa')
myiris = cbind(myiris,myiris$Species == 'versicolor')
myiris = cbind(myiris,myiris$Species == 'virginica')
summary(myiris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species   myiris$Species == "setosa" myiris$Species == "versicolor"
##  setosa    :50   Mode :logical              Mode :logical                 
##  versicolor:50   FALSE:100                  FALSE:100                     
##  virginica :50   TRUE :50                   TRUE :50                      
##                                                                           
##                                                                           
##                                                                           
##  myiris$Species == "virginica"
##  Mode :logical                
##  FALSE:100                    
##  TRUE :50                     
##                               
##                               
## 
head(myiris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
##   myiris$Species == "setosa" myiris$Species == "versicolor"
## 1                       TRUE                          FALSE
## 2                       TRUE                          FALSE
## 3                       TRUE                          FALSE
## 4                       TRUE                          FALSE
## 5                       TRUE                          FALSE
## 6                       TRUE                          FALSE
##   myiris$Species == "virginica"
## 1                         FALSE
## 2                         FALSE
## 3                         FALSE
## 4                         FALSE
## 5                         FALSE
## 6                         FALSE

Em seguida vamos renomear as colunas lógicas criadas

names(myiris)[6] = 'setosa'
names(myiris)[7] = 'versicolor'
names(myiris)[8] = 'virginica'
head(myiris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species setosa versicolor
## 1          5.1         3.5          1.4         0.2  setosa   TRUE      FALSE
## 2          4.9         3.0          1.4         0.2  setosa   TRUE      FALSE
## 3          4.7         3.2          1.3         0.2  setosa   TRUE      FALSE
## 4          4.6         3.1          1.5         0.2  setosa   TRUE      FALSE
## 5          5.0         3.6          1.4         0.2  setosa   TRUE      FALSE
## 6          5.4         3.9          1.7         0.4  setosa   TRUE      FALSE
##   virginica
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
summary(myiris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species     setosa        versicolor      virginica      
##  setosa    :50   Mode :logical   Mode :logical   Mode :logical  
##  versicolor:50   FALSE:100       FALSE:100       FALSE:100      
##  virginica :50   TRUE :50        TRUE :50        TRUE :50       
##                                                                 
##                                                                 
## 

Vamos criar os modelos de teste e treino com 30% e 70% respectivamente

set.seed(55)
amostra <- sample(2,150,replace = TRUE, prob = c(0.7,0.3))
myiristeste <- myiris[amostra == 2,]
myiristreino <- myiris[amostra == 1,]
dim(myiristreino)
## [1] 96  8
dim(myiristeste)
## [1] 54  8

Agora vamos criar o modelo

modelo <- neuralnet(setosa + versicolor + virginica ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, myiristreino, hidden = c(5,4))

Parametros do modelo

plot(modelo)

Testando o modelo

O resultado apresenta os pesos para cada classificação, os maiores pesos referem-se à correta classificação.

teste <- compute(modelo, myiristeste[,1:4])
teste$net.result
##              [,1]          [,2]          [,3]
## 4    9.990730e-01  1.365199e-03  4.681509e-05
## 11   1.001480e+00 -2.219619e-03  8.855900e-04
## 16   9.992308e-01  1.657043e-03 -3.554142e-04
## 18   1.000410e+00 -1.725989e-03 -5.384136e-04
## 22   9.987055e-01 -1.794166e-04 -2.222609e-03
## 28   1.001024e+00 -1.392768e-03  8.420464e-04
## 32   9.997729e-01  1.959120e-03  1.871703e-03
## 33   1.000150e+00  1.090337e-03  7.588675e-04
## 35   9.997873e-01  2.128347e-03  2.003279e-03
## 36   1.001383e+00 -2.203680e-03 -5.339576e-05
## 38   1.001021e+00 -2.646063e-03 -5.132597e-04
## 39   9.993140e-01 -7.059004e-04 -1.254017e-03
## 42   9.923155e-01  2.521667e-02  1.218037e-02
## 43   9.993098e-01 -2.426240e-03 -2.981554e-03
## 44   9.952871e-01  2.950440e-03 -3.381167e-03
## 45   9.958149e-01  6.127366e-03 -1.276459e-03
## 46   9.987478e-01  3.656745e-03  1.838186e-03
## 47   9.996807e-01  7.869665e-04 -1.070227e-04
## 49   1.001183e+00 -1.873562e-03  5.627091e-04
## 51  -3.763370e-04  9.988349e-01  1.099130e-03
## 52  -2.762307e-04  1.001414e+00 -1.408183e-03
## 54  -9.554934e-05  1.004405e+00 -4.323195e-03
## 55  -5.699568e-05  1.005338e+00 -5.230351e-03
## 58   4.565600e-04  9.995910e-01  2.633049e-04
## 60  -2.083388e-04  1.002506e+00 -2.472786e-03
## 61  -8.598424e-05  1.000514e+00 -5.609538e-04
## 64  -1.577308e-05  1.005488e+00 -5.381433e-03
## 66  -3.712460e-04  9.985182e-01  1.404755e-03
## 70  -1.011954e-04  9.994821e-01  4.386073e-04
## 74  -1.745247e-04  1.002140e+00 -2.123107e-03
## 78  -6.830261e-04  2.074590e-01  7.923998e-01
## 79   3.028198e-05  1.006274e+00 -6.147448e-03
## 81  -1.284744e-04  9.996328e-01  2.963328e-04
## 84  -5.932406e-04 -8.863543e-03  1.008719e+00
## 87  -2.001486e-04  1.002976e+00 -2.928272e-03
## 88  -2.999758e-04  1.002082e+00 -2.051019e-03
## 89  -2.900384e-04  9.988173e-01  1.105286e-03
## 91  -9.152633e-05  1.003708e+00 -3.649536e-03
## 92  -1.455232e-04  1.003406e+00 -3.351150e-03
## 96  -1.461193e-04  9.986999e-01  1.200768e-03
## 100 -2.527700e-04  1.000548e+00 -5.728662e-04
## 104 -7.254470e-05 -2.754730e-03  1.002746e+00
## 106  4.701378e-04  3.508095e-03  9.966201e-01
## 108 -7.992835e-05 -2.759759e-03  1.002751e+00
## 112 -1.705329e-05 -2.078190e-03  1.002084e+00
## 113  1.689799e-04  4.799085e-05  1.000005e+00
## 121  4.348533e-04  3.064279e-03  9.970538e-01
## 125  4.088910e-05 -1.455186e-03  1.001474e+00
## 128 -6.939103e-04 -9.436565e-03  1.009265e+00
## 130 -8.722329e-04 -1.209944e-02  1.011884e+00
## 133  5.785414e-04  4.699080e-03  9.954546e-01
## 140  5.237609e-06 -1.833493e-03  1.001845e+00
## 144  5.449598e-04  4.308112e-03  9.958370e-01
## 150 -5.533607e-04 -8.413872e-03  1.008279e+00

Criação do dataframe

Vamos criar um dataframe com a coluna que se refere ao maior preso da classificação

resultado <- as.data.frame(teste$net.result)
names(resultado)[1] <- 'setosa'
names(resultado)[2] <- 'versicolor'
names(resultado)[3] <- 'virginica'
resultado$class <- colnames(resultado[,1:3])[max.col(resultado[,1:3],ties.method = 'first')]
head(resultado)
##       setosa    versicolor     virginica  class
## 4  0.9990730  0.0013651988  4.681509e-05 setosa
## 11 1.0014803 -0.0022196195  8.855900e-04 setosa
## 16 0.9992308  0.0016570435 -3.554142e-04 setosa
## 18 1.0004096 -0.0017259893 -5.384136e-04 setosa
## 22 0.9987055 -0.0001794166 -2.222609e-03 setosa
## 28 1.0010235 -0.0013927681  8.420464e-04 setosa

Criando a matriz de confusão para saber o percentual de acerto

Observamos que nossa classificação alcançou quase 100% de acerto

confusao <- table(resultado$class,myiristeste$Species)
sum(diag(confusao)*100/sum(confusao))
## [1] 96.2963

Observando a Matriz de confusão

A matriz de confusão demonstra os acertos e erros para cada coluna de classificação. Neste caso chegamos próximos de 100% de acerto.

confusao
##             
##              setosa versicolor virginica
##   setosa         19          0         0
##   versicolor      0         20         0
##   virginica       0          2        13