2 Data preparation

library(ggplot2)
library(caret)

## Loading required package: lattice

library(nnet)

# In this tutorial we are going to use the iris dataset
data("iris")
str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

# We draw the petal length vs the petal width
ggplot(data = iris, aes(x = Petal.Width, y = Petal.Length)) + geom_point(aes(color = Species))

3 Prediction of species using neural networks

# It's very important to normalize the data before using neural networks
# We have to normalize the species column as well in order to perform te analysis
iris_norm <- iris
iris_norm$Species <- as.numeric(iris$Species)
iris_norm <- as.data.frame(apply(iris_norm, 2, function(x) (x - min(x))/(max(x)-min(x))))


# We create a trainset and a testset
index <- createDataPartition(iris_norm$Species, p = 0.7, list = FALSE)
trainset <- iris_norm[index,]
testset <- iris_norm[-index,]


# We create a model using the nnet package 
set.seed(5)
fit <- nnet(Species ~., data = trainset, linout = TRUE, size = 20)

## # weights:  121
## initial  value 118.708064 
## iter  10 value 1.401606
## iter  20 value 1.183145
## iter  30 value 0.942476
## iter  40 value 0.881153
## iter  50 value 0.819554
## iter  60 value 0.725504
## iter  70 value 0.586246
## iter  80 value 0.536380
## iter  90 value 0.498840
## iter 100 value 0.467937
## final  value 0.467937 
## stopped after 100 iterations

# Now we transform the normalized values to 0,1,2 on both prediction and real value
# We check the accuracy in the trainset
prediction <- factor(round(fit$fitted.values*2 +1))
real_value <- factor(trainset$Species*2 +1)


# We measure the accuracy of our prediction
postResample(prediction, real_value)

##  Accuracy     Kappa 
## 0.9714286 0.9571078

# We predict the species on the testset
prediction_test <- predict(fit, newdata = testset)

# Again we transform the normalized values to factors with levels 0,1,2
prediction_test <- factor(round(prediction_test*2 +1))
real_value_test <- factor(testset$Species*2 +1)

# We masure the accuracy on the testset 
postResample(prediction_test, real_value_test)

## Accuracy    Kappa 
##        1        1

# Now we can compare this result with a knn model
set.seed(5)
my_knn_model <- train(Species ~ Petal.Width + Petal.Length, data = trainset, method = "knn", tunelength = 5)
my_knn_model

## k-Nearest Neighbors 
## 
## 105 samples
##   2 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ... 
## Resampling results across tuning parameters:
## 
##   k  RMSE        Rsquared 
##   5  0.09440555  0.9427159
##   7  0.09057818  0.9461980
##   9  0.08756356  0.9490332
## 
## RMSE was used to select the optimal model using  the smallest value.
## The final value used for the model was k = 9.

# We repeat the same process again
prediction_knn <- predict(my_knn_model, newdata = testset)
prediction_knn <- factor(round(prediction_knn*2 +1))

# We see that the accuracy is very high but worst than the one using neural networks
postResample(prediction_knn, real_value_test)

##  Accuracy     Kappa 
## 0.9777778 0.9665179

My first neural network

Ubiqum Code Academy

1 Goal

2 Data preparation

3 Prediction of species using neural networks

4 Conclusion