# load library
library(neuralnet)
library(tidyverse)
library(caret)

MNIST

Mnist is the data that given to be classified with Neural Network.In this LBB let’s do image classification by using deep learning with framework keras. Mnist data is containing about data image of hand writing of some number. We can try classified every handwriting to a correct label number.

Data Explaratory

Firstly we should load library ‘keras’:

library(keras)

Read Data

  • train.csv -> for cross validation (train-test)
  • test.csv -> ufor data validation
# read mnist train
mnist <- read.csv("mnist/train.csv")
test <- read.csv("mnist/test.csv")

Exploratory Data

  1. Check dimension of the data
dim(mnist)
## [1] 42000   785
head(mnist)
range(mnist[,-1])
## [1]   0 255

Data:

  • There are 42000 available image
  • Variable target: number label
  • Variable predictor: 784 pixel
  • size of image: 28x28
  1. Check range value of every pixel in data mnist
range(mnist[,-1])
## [1]   0 255

pixel value: 0 (black) ~ 255 (white)

  1. Check target label:
unique(mnist$label)
##  [1] 1 0 4 7 3 5 8 9 2 6
  1. Check proportion of target variable
prop.table(table(mnist$label))
## 
##          0          1          2          3          4          5          6 
## 0.09838095 0.11152381 0.09945238 0.10359524 0.09695238 0.09035714 0.09850000 
##          7          8          9 
## 0.10478571 0.09673810 0.09971429
  1. Image visualization
categories <- c("zero",
                "one",
                "two",
                "three",
                "four",
                "five",
                "six",
                "seven",
                "eight",
                "nine")
vizTrain <- function(input, idx){

  dimmax <- sqrt(ncol(mnist[,-1]))

  dimn <- ceiling(sqrt(nrow(input)))

  par(mfrow=c(dimn, dimn), mar=c(0, 0, 1.5, 0))
  
  for (i in 1:nrow(input)){
      m1 <- matrix(input[i,2:ncol(input)], nrow=dimmax, byrow=T)
      m1 <- apply(m1, 2, as.numeric)
      m1 <- t(apply(m1, 2, rev))
      image(1:dimmax, 
            1:dimmax, 
            m1, col=gray((0:255)/255), 
            xaxt = 'n',
            yaxt = 'n',
            main = paste(categories[mnist$label[idx + i - 1] + 1]))
  }

}

# visualisasi

vizTrain(input = mnist[1:25,], idx = 1)

head(mnist)

Cross Validation

Split the data:

  • train: 80%
  • test: 20%

Use initial_split() from package rsample:

library(rsample)
## Warning: package 'rsample' was built under R version 4.0.5
set.seed(100)

idx <- initial_split(data = mnist, prop = 0.8, strata = "label")
train <- training(idx)
test <- testing(idx)

Data pre-processing

Before using model with Keras, there are some works that need to be prepared:

  1. Split predictor with target variable
  2. Change data format into array. From data.frame -> matrix -> array
  3. Do one-hot encoding if the target variable is a category
  4. Scalling the data

Split target-predictor and also do scalling

Scalling by dividing with number 255 so that data range that we have will be from 0 to 1.

# Change data to matrix

train <- as.matrix(train)
test <- as.matrix(test)
#predictor

train_x <- train[,-1]
test_x <- test[,-1]

# target

train_y <- train[,1]
test_y <- test[,1]

Processing predictor: Change matrix into array

Framework keras accept data in array format. So data predictor in the matrix format should be change into array by using array_reshape().

# predictor
# array 2D
train_x_keras <- array_reshape(x = train_x, 
                               dim = dim(train_x)) 
## Warning in normalizePath(path.expand(path), winslash, mustWork): path[1]="C:
## \Users\HP\anaconda3\envs\venv-1/python.exe": The system cannot find the file
## specified
test_x_keras <- array_reshape(x = test_x, 
                              dim = dim(test_x))

Processing target: One Hot Encoding

Change target (categorical) into variable one hot encoding using function to_categorical():

# process data y (target) one hot encoding

train_y_keras <- to_categorical(train_y)
test_y_keras <- to_categorical(test_y)
head(train_y)
##  1  2  5  6 13 16 
##  1  0  0  0  1  1
head(train_y_keras)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    0    1    0    0    0    0    0    0    0     0
## [2,]    1    0    0    0    0    0    0    0    0     0
## [3,]    1    0    0    0    0    0    0    0    0     0
## [4,]    1    0    0    0    0    0    0    0    0     0
## [5,]    0    1    0    0    0    0    0    0    0     0
## [6,]    0    1    0    0    0    0    0    0    0     0
range(mnist[,-1])
## [1]   0 255

min-max normalization -> (x - 0)/(255 - 0) = x/255

min: 0 max: 255

Result of min max normalization:

# min max normalization


train_x_keras <- train_x_keras/255
test_x_keras <- test_x_keras/255

Model Building Keras

Step in the making of neural network/deep learning model in Keras:

  1. Define Architecture
  2. Compile with training parameters
  3. Fitting (training) Model

Define Architecture

In this step we will build model architecture, include defining layers & nodes and also activation function inside of it.

# keras initialization
model <- keras_model_sequential()

Note: - keras_model_sequential() is first initialization in the process of building a model - If we want to change small parameter in the model, we should rerun keras_model_sequential()

Keras model sequential build architecture model layer by layer. Here is some arguments that we can use:

  • layer_dense: making layer that fully connected for input, hidden, until output layer.
  • input_shape: defining the amount of nodes inside input layer; in the first layer_dense only
  • units: defining the amount of nodes inside the layer
  • activation: defining activation function that will be used
  • name: for layer naming
# buat penguncian random bias + weight di awal
set.seed(100)
initializer <- initializer_random_normal(seed = 100)


model %>% 
  layer_dense(input_shape = ncol(train_x_keras), # input
              units = 128, activation = "relu", name = "hidden_1",
              kernel_initializer = initializer,
              bias_initializer = initializer) %>%  # hidden layer 1
  layer_dense(units = 64, activation = "relu", name = "hidden_2",
              kernel_initializer = initializer,
              bias_initializer = initializer) %>% # hidden layer 2
  layer_dense(units = 10, activation = "softmax", name = "output")
summary(model)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type)                        Output Shape                    Param #     
## ================================================================================
## hidden_1 (Dense)                    (None, 128)                     100480      
## ________________________________________________________________________________
## hidden_2 (Dense)                    (None, 64)                      8256        
## ________________________________________________________________________________
## output (Dense)                      (None, 10)                      650         
## ================================================================================
## Total params: 109,386
## Trainable params: 109,386
## Non-trainable params: 0
## ________________________________________________________________________________

Compile a Model

In this step we will combine architecture that already made with other important parameter for making the desire model by using function compile():

  • loss: cost function/error that we used:
    • multiclass classfication: categorical_crossentropy
    • biner classification: binary_crossentropy.
    • regression: mean_square_error
  • optimizer: back propagation method
    • optimizer_sgd
    • optimizer_adam
  • metric: metrics model performance that we used; for multi-class classification using accuracy
# compile model
model %>% 
  compile(loss = "categorical_crossentropy",
          optimizer = optimizer_sgd(lr = 0.001),
          metric = "accuracy")

Fit (Training Model)

After making a model, we should trained the model using data_train. We can use function fit() with parameter:

  • variable predictor train
  • variable target train
  • batch_size: amount of data per batch that we used for training model
  • epoch: 1 process cycle of training a model using all the data
# fitting a model 
history <- model %>% 
  fit(train_x_keras,
      train_y_keras,
      epoch = 30, 
      batch_size = 128)

Plotting Model:

plot(history)
## `geom_smooth()` using formula 'y ~ x'

Check information in the last epoch (30), to see the accuracy of data train

history$metrics$accuracy[30]
## [1] 0.8634145

Predict

Doing prediction using data test test_x_keras by using function predict_classes

# Doing prediction

prediction <- model %>% 
  predict_classes(x = test_x_keras)

# see prediction results in the first 5 row

prediction[1:5]
## [1] 1 3 9 3 7

Evaluation

Evaluation to see how good the prediction results using confusionMatrix(): *for case multiclass classification, we can just used accuration metric for evaluate the model

library(caret)

confusionMatrix(as.factor(prediction), reference = as.factor(test_y))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1   2   3   4   5   6   7   8   9
##          0 785   0   9   5   2  14  10  13   5   7
##          1   0 891  18   6   7  20  12  16  27  13
##          2   2   6 701  24   7  13  10  15  15   5
##          3   5   4  28 736   0  56   0   2  41  15
##          4   0   0  19   0 695  22   6  14   9  45
##          5  13   0   4  39   0 569  14   2  37   9
##          6  10   1  27   7  14  21 752   0   7   0
##          7   2   0  18  11   1   5   0 804   5  36
##          8   9  20  18  31  12  26   9   6 634  13
##          9   2   0   7  13  75   8   0  28  22 706
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8656          
##                  95% CI : (0.8581, 0.8729)
##     No Information Rate : 0.1097          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8506          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity           0.94807   0.9664  0.82568   0.8440  0.85486  0.75464
## Specificity           0.99142   0.9841  0.98716   0.9799  0.98485  0.98457
## Pos Pred Value        0.92353   0.8822  0.87845   0.8298  0.85802  0.82824
## Neg Pred Value        0.99431   0.9958  0.98054   0.9819  0.98446  0.97602
## Prevalence            0.09855   0.1097  0.10105   0.1038  0.09676  0.08974
## Detection Rate        0.09343   0.1060  0.08343   0.0876  0.08272  0.06772
## Detection Prevalence  0.10117   0.1202  0.09498   0.1056  0.09641  0.08177
## Balanced Accuracy     0.96974   0.9752  0.90642   0.9120  0.91985  0.86961
##                      Class: 6 Class: 7 Class: 8 Class: 9
## Sensitivity           0.92497  0.89333  0.79052  0.83157
## Specificity           0.98854  0.98960  0.98105  0.97948
## Pos Pred Value        0.89631  0.91156  0.81491  0.81998
## Neg Pred Value        0.99193  0.98723  0.97796  0.98104
## Prevalence            0.09676  0.10712  0.09545  0.10105
## Detection Rate        0.08950  0.09569  0.07546  0.08403
## Detection Prevalence  0.09986  0.10498  0.09260  0.10248
## Balanced Accuracy     0.95675  0.94147  0.88579  0.90552
model %>% evaluate(test_x_keras, test_y_keras,verbose = 0)
##      loss  accuracy 
## 0.5383357 0.8656272

Accuracy of data test is 86%

Evaluate the model by visualizing the results:

plotResults <- function(images, preds){

  x <- ceiling(sqrt(length(images)))
  par(mfrow=c(x,x), mar=c(0, 0, 1.5, 0))
  
  for (i in images){
    m <- matrix(test[i,-1], nrow=28, byrow=TRUE)
    m <- apply(m, 2, rev)
    
    predicted_label <- prediction[i]
    true_label <- test_y[i]
    if (predicted_label == true_label) {
      color <- 'darkgreen' 
    } else {
      color <- 'red'
    }
    
    image(t(m), col=gray((0:255)/255), axes=FALSE,
          main = paste0(categories[predicted_label + 1], " (",
                        categories[true_label + 1], ")"),
          col.main = color)
  }

}
plotResults(images = 1:49, preds = prediction)

  • accuracy data train = 86%

  • accuracy data test = 86%

the results is accurate and balance enough between data train and data test