2 Data preparation

library(ggplot2)
library(caret)

## Loading required package: lattice

library(nnet)

# In this tutorial we are going to use the iris dataset
data("iris")
str(iris)

## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

# We draw the petal length vs the petal width
ggplot(data = iris, aes(x = Petal.Width, y = Petal.Length)) + geom_point(aes(color = Species))

3 Prediction of species using neural networks

# It's very important to normalize the data before using neural networks
iris_norm <- iris
iris_norm$Species <- as.numeric(iris$Species)
iris_norm <- as.data.frame(apply(iris_norm, 2, function(x) (x - min(x))/(max(x)-min(x))))


# We create a trainset and a testset
index <- createDataPartition(iris_norm$Species, p = 0.7, list = FALSE)
trainset <- iris_norm[index,]
testset <- iris_norm[-index,]


# We create a model using the nnet package 
set.seed(5)
fit <- nnet(Species ~., data = trainset, linout = TRUE, size = 20)

## # weights:  121
## initial  value 115.499336 
## iter  10 value 1.126433
## iter  20 value 1.009238
## iter  30 value 0.874009
## iter  40 value 0.766632
## iter  50 value 0.694468
## iter  60 value 0.595637
## iter  70 value 0.460404
## iter  80 value 0.353221
## iter  90 value 0.310521
## iter 100 value 0.288040
## final  value 0.288040 
## stopped after 100 iterations

prediction <- factor(round(fit$fitted.values*2 +1))
real_value <- factor(trainset$Species*2 +1)

postResample(prediction, real_value)

##  Accuracy     Kappa 
## 0.9904762 0.9856948

prediction_test <- predict(fit, newdata = testset)
prediction_test <- factor(round(prediction_test*2 +1))
real_value_test <- factor(testset$Species*2 +1)

postResample(prediction_test, real_value_test)

##  Accuracy     Kappa 
## 0.9333333 0.8995536

# Now we can compare this result with a knn model
set.seed(5)
my_knn_model <- train(Species ~ Petal.Width + Petal.Length, data = trainset, method = "knn", tunelength = 5)
my_knn_model

## k-Nearest Neighbors 
## 
## 105 samples
##   2 predictor
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 105, 105, 105, 105, 105, 105, ... 
## Resampling results across tuning parameters:
## 
##   k  RMSE        Rsquared 
##   5  0.05606157  0.9748142
##   7  0.06006082  0.9739011
##   9  0.06335264  0.9731922
## 
## RMSE was used to select the optimal model using  the smallest value.
## The final value used for the model was k = 5.

prediction_knn <- predict(my_knn_model, newdata = testset)
prediction_knn <- factor(round(prediction_knn*2 +1))

postResample(prediction_knn, real_value_test)

##  Accuracy     Kappa 
## 0.9333333 0.8995536

My first neural network

Luis Serra @ Ubiqum Code Academy

1 Goal

2 Data preparation

3 Prediction of species using neural networks

4 Conclusion