Why multiple layers?

Preparing the data

aggregate(cbind(n.cases = Sepal.Length) ~ Species, iris, length)
##      Species n.cases
## 1     setosa      50
## 2 versicolor      50
## 3  virginica      50
set.seed(18121842)
iris.setosa <- iris[iris$Species == 'setosa',]
iris.versicolor <- iris[iris$Species == 'versicolor',]
iris.virginica <- iris[iris$Species == 'virginica',]

split_data <- function(N, p) {
  stopifnot(p > 0 & p < 1)
  n <- N * p
  trn.index <- sample.int(N, n, replace = FALSE)
  test.index <- setdiff(1:N, trn.index)
  list(train = trn.index, test = test.index)
}

setosa.split <- split_data(nrow(iris.setosa), 0.2)
setosa.train <- iris.setosa[setosa.split[["train"]],]
setosa.test  <- iris.setosa[setosa.split[["test"]],]

versicolor.split <- split_data(nrow(iris.versicolor), 0.2)
versicolor.train <- iris.versicolor[versicolor.split[["train"]],]
versicolor.test  <- iris.versicolor[versicolor.split[["test"]],]

virginica.split <- split_data(nrow(iris.virginica), 0.2)
virginica.train <- iris.virginica[virginica.split[["train"]],]
virginica.test  <- iris.virginica[virginica.split[["test"]],]

train.data <- rbind(setosa.train, versicolor.train, virginica.train)
test.data  <- rbind(setosa.test, versicolor.test, virginica.test)

rm(
  setosa.split,
  versicolor.split,
  virginica.split,
  iris.setosa,
  iris.versicolor,
  iris.virginica
)
rm(setosa.train, versicolor.train, virginica.train)
rm(setosa.test, versicolor.test, virginica.test)

Training the network - 1

Training the network - 2

nn.1 <-
  nnet(
    Species ~ .,
    data = train.data,
    size = 2,
    decay = 1e-5,
    maxit = 50
  )
## # weights:  19
## initial  value 35.411445 
## iter  10 value 32.955229
## iter  20 value 14.638913
## iter  30 value 13.888475
## iter  40 value 13.869530
## final  value 13.868609 
## converged
summary(nn.1)
## a 4-2-3 network with 19 weights
## options were - softmax modelling  decay=1e-05
##  b->h1 i1->h1 i2->h1 i3->h1 i4->h1 
##   0.02   1.00   4.49  -8.03  -3.67 
##  b->h2 i1->h2 i2->h2 i3->h2 i4->h2 
##  -0.20  -1.34  -0.28  -0.83  -0.53 
##  b->o1 h1->o1 h2->o1 
##  -6.14  14.48   0.09 
##  b->o2 h1->o2 h2->o2 
##   3.48  -5.69  -0.12 
##  b->o3 h1->o3 h2->o3 
##   3.48  -7.95   0.30

Testing the network

predicted.species <- predict(nn.1, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)

# How did we do?
table(comparison)
##             predicted
## actual       setosa versicolor
##   setosa         40          0
##   versicolor      0         40
##   virginica       0         40

Model with increasing the number of iterations

nn.2 <-
  nnet(
    Species ~ .,
    data = train.data,
    size = 2,
    decay = 1e-5,
    maxit = 100
  )
## # weights:  19
## initial  value 37.118395 
## iter  10 value 3.354185
## iter  20 value 0.356276
## iter  30 value 0.070115
## iter  40 value 0.062318
## iter  50 value 0.055942
## iter  60 value 0.054585
## iter  70 value 0.052726
## iter  80 value 0.040042
## iter  90 value 0.033890
## iter 100 value 0.031711
## final  value 0.031711 
## stopped after 100 iterations
predicted.species <- predict(nn.2, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)

# How did we do?
table(comparison)
##             predicted
## actual       setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0         12        28

Model with more units in the hidden layer.

nn.3 <-
  nnet(
    Species ~ .,
    data = train.data,
    size = 4,
    decay = 1e-5,
    maxit = 50
  )
## # weights:  35
## initial  value 39.909614 
## iter  10 value 13.743053
## iter  20 value 2.588907
## iter  30 value 0.436368
## iter  40 value 0.027481
## iter  50 value 0.022585
## final  value 0.022585 
## stopped after 50 iterations
predicted.species <- predict(nn.3, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)

# How did we do?
table(comparison)
##             predicted
## actual       setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0         11        29

Model with more units and more iterations.

nn.4 <-
  nnet(
    Species ~ .,
    data = train.data,
    size = 4,
    decay = 1e-5,
    maxit = 50
  )
## # weights:  35
## initial  value 35.989656 
## iter  10 value 9.590075
## iter  20 value 0.263699
## iter  30 value 0.072545
## iter  40 value 0.065118
## iter  50 value 0.054472
## final  value 0.054472 
## stopped after 50 iterations
predicted.species <- predict(nn.4, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)

# How did we do?
table(comparison)
##             predicted
## actual       setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0         11        29

Model with fewer features.

nn.5 <-
  nnet(
    Species ~ Petal.Length + Petal.Width,
    data = train.data,
    size = 4,
    decay = 1e-5,
    maxit = 50
  )
## # weights:  27
## initial  value 36.393243 
## iter  10 value 11.117675
## iter  20 value 0.566856
## iter  30 value 0.127323
## iter  40 value 0.065351
## iter  50 value 0.055558
## final  value 0.055558 
## stopped after 50 iterations
predicted.species <- predict(nn.5, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)
table(comparison)
##             predicted
## actual       setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          7        33

Model with fewer features and more iterations.

nn.6 <-
  nnet(
    Species ~ Petal.Length + Petal.Width,
    data = train.data,
    size = 4,
    decay = 1e-5,
    maxit = 100
  )
## # weights:  27
## initial  value 39.684126 
## iter  10 value 8.099589
## iter  20 value 0.208046
## iter  30 value 0.082138
## iter  40 value 0.071433
## iter  50 value 0.066694
## iter  60 value 0.050155
## iter  70 value 0.047392
## iter  80 value 0.040535
## iter  90 value 0.036129
## iter 100 value 0.034321
## final  value 0.034321 
## stopped after 100 iterations
predicted.species <- predict(nn.6, test.data, type = "class")
comparison <- data.frame(actual = test.data$Species, predicted = predicted.species)
table(comparison)
##             predicted
## actual       setosa versicolor virginica
##   setosa         40          0         0
##   versicolor      0         40         0
##   virginica       0          7        33

Closing remarks