library('neuralnet')
myiris = iris
myiris = cbind(myiris,myiris$Species == 'setosa')
myiris = cbind(myiris,myiris$Species == 'versicolor')
myiris = cbind(myiris,myiris$Species == 'virginica')
summary(myiris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species myiris$Species == "setosa" myiris$Species == "versicolor"
## setosa :50 Mode :logical Mode :logical
## versicolor:50 FALSE:100 FALSE:100
## virginica :50 TRUE :50 TRUE :50
##
##
##
## myiris$Species == "virginica"
## Mode :logical
## FALSE:100
## TRUE :50
##
##
##
head(myiris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## myiris$Species == "setosa" myiris$Species == "versicolor"
## 1 TRUE FALSE
## 2 TRUE FALSE
## 3 TRUE FALSE
## 4 TRUE FALSE
## 5 TRUE FALSE
## 6 TRUE FALSE
## myiris$Species == "virginica"
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
names(myiris)[6] = 'setosa'
names(myiris)[7] = 'versicolor'
names(myiris)[8] = 'virginica'
head(myiris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species setosa versicolor
## 1 5.1 3.5 1.4 0.2 setosa TRUE FALSE
## 2 4.9 3.0 1.4 0.2 setosa TRUE FALSE
## 3 4.7 3.2 1.3 0.2 setosa TRUE FALSE
## 4 4.6 3.1 1.5 0.2 setosa TRUE FALSE
## 5 5.0 3.6 1.4 0.2 setosa TRUE FALSE
## 6 5.4 3.9 1.7 0.4 setosa TRUE FALSE
## virginica
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
summary(myiris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species setosa versicolor virginica
## setosa :50 Mode :logical Mode :logical Mode :logical
## versicolor:50 FALSE:100 FALSE:100 FALSE:100
## virginica :50 TRUE :50 TRUE :50 TRUE :50
##
##
##
set.seed(55)
amostra <- sample(2,150,replace = TRUE, prob = c(0.7,0.3))
myiristeste <- myiris[amostra == 2,]
myiristreino <- myiris[amostra == 1,]
dim(myiristreino)
## [1] 96 8
dim(myiristeste)
## [1] 54 8
modelo <- neuralnet(setosa + versicolor + virginica ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, myiristreino, hidden = c(5,4))
plot(modelo)
O resultado apresenta os pesos para cada classificação, os maiores pesos referem-se à correta classificação.
teste <- compute(modelo, myiristeste[,1:4])
teste$net.result
## [,1] [,2] [,3]
## 4 9.990730e-01 1.365199e-03 4.681509e-05
## 11 1.001480e+00 -2.219619e-03 8.855900e-04
## 16 9.992308e-01 1.657043e-03 -3.554142e-04
## 18 1.000410e+00 -1.725989e-03 -5.384136e-04
## 22 9.987055e-01 -1.794166e-04 -2.222609e-03
## 28 1.001024e+00 -1.392768e-03 8.420464e-04
## 32 9.997729e-01 1.959120e-03 1.871703e-03
## 33 1.000150e+00 1.090337e-03 7.588675e-04
## 35 9.997873e-01 2.128347e-03 2.003279e-03
## 36 1.001383e+00 -2.203680e-03 -5.339576e-05
## 38 1.001021e+00 -2.646063e-03 -5.132597e-04
## 39 9.993140e-01 -7.059004e-04 -1.254017e-03
## 42 9.923155e-01 2.521667e-02 1.218037e-02
## 43 9.993098e-01 -2.426240e-03 -2.981554e-03
## 44 9.952871e-01 2.950440e-03 -3.381167e-03
## 45 9.958149e-01 6.127366e-03 -1.276459e-03
## 46 9.987478e-01 3.656745e-03 1.838186e-03
## 47 9.996807e-01 7.869665e-04 -1.070227e-04
## 49 1.001183e+00 -1.873562e-03 5.627091e-04
## 51 -3.763370e-04 9.988349e-01 1.099130e-03
## 52 -2.762307e-04 1.001414e+00 -1.408183e-03
## 54 -9.554934e-05 1.004405e+00 -4.323195e-03
## 55 -5.699568e-05 1.005338e+00 -5.230351e-03
## 58 4.565600e-04 9.995910e-01 2.633049e-04
## 60 -2.083388e-04 1.002506e+00 -2.472786e-03
## 61 -8.598424e-05 1.000514e+00 -5.609538e-04
## 64 -1.577308e-05 1.005488e+00 -5.381433e-03
## 66 -3.712460e-04 9.985182e-01 1.404755e-03
## 70 -1.011954e-04 9.994821e-01 4.386073e-04
## 74 -1.745247e-04 1.002140e+00 -2.123107e-03
## 78 -6.830261e-04 2.074590e-01 7.923998e-01
## 79 3.028198e-05 1.006274e+00 -6.147448e-03
## 81 -1.284744e-04 9.996328e-01 2.963328e-04
## 84 -5.932406e-04 -8.863543e-03 1.008719e+00
## 87 -2.001486e-04 1.002976e+00 -2.928272e-03
## 88 -2.999758e-04 1.002082e+00 -2.051019e-03
## 89 -2.900384e-04 9.988173e-01 1.105286e-03
## 91 -9.152633e-05 1.003708e+00 -3.649536e-03
## 92 -1.455232e-04 1.003406e+00 -3.351150e-03
## 96 -1.461193e-04 9.986999e-01 1.200768e-03
## 100 -2.527700e-04 1.000548e+00 -5.728662e-04
## 104 -7.254470e-05 -2.754730e-03 1.002746e+00
## 106 4.701378e-04 3.508095e-03 9.966201e-01
## 108 -7.992835e-05 -2.759759e-03 1.002751e+00
## 112 -1.705329e-05 -2.078190e-03 1.002084e+00
## 113 1.689799e-04 4.799085e-05 1.000005e+00
## 121 4.348533e-04 3.064279e-03 9.970538e-01
## 125 4.088910e-05 -1.455186e-03 1.001474e+00
## 128 -6.939103e-04 -9.436565e-03 1.009265e+00
## 130 -8.722329e-04 -1.209944e-02 1.011884e+00
## 133 5.785414e-04 4.699080e-03 9.954546e-01
## 140 5.237609e-06 -1.833493e-03 1.001845e+00
## 144 5.449598e-04 4.308112e-03 9.958370e-01
## 150 -5.533607e-04 -8.413872e-03 1.008279e+00
Vamos criar um dataframe com a coluna que se refere ao maior preso da classificação
resultado <- as.data.frame(teste$net.result)
names(resultado)[1] <- 'setosa'
names(resultado)[2] <- 'versicolor'
names(resultado)[3] <- 'virginica'
resultado$class <- colnames(resultado[,1:3])[max.col(resultado[,1:3],ties.method = 'first')]
head(resultado)
## setosa versicolor virginica class
## 4 0.9990730 0.0013651988 4.681509e-05 setosa
## 11 1.0014803 -0.0022196195 8.855900e-04 setosa
## 16 0.9992308 0.0016570435 -3.554142e-04 setosa
## 18 1.0004096 -0.0017259893 -5.384136e-04 setosa
## 22 0.9987055 -0.0001794166 -2.222609e-03 setosa
## 28 1.0010235 -0.0013927681 8.420464e-04 setosa
Observamos que nossa classificação alcançou quase 100% de acerto
confusao <- table(resultado$class,myiristeste$Species)
sum(diag(confusao)*100/sum(confusao))
## [1] 96.2963
A matriz de confusão demonstra os acertos e erros para cada coluna de classificação. Neste caso chegamos próximos de 100% de acerto.
confusao
##
## setosa versicolor virginica
## setosa 19 0 0
## versicolor 0 20 0
## virginica 0 2 13