library(keras)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(imager)
## Loading required package: magrittr
## 
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
## 
##     add
## The following object is masked from 'package:dplyr':
## 
##     where
## The following objects are masked from 'package:stats':
## 
##     convolve, spectrum
## The following object is masked from 'package:graphics':
## 
##     frame
## The following object is masked from 'package:base':
## 
##     save.image
library("EBImage")
## 
## Attaching package: 'EBImage'
## The following objects are masked from 'package:imager':
## 
##     channel, dilate, display, erode, resize, watershed
library(foreach)
library(tensorflow)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:tensorflow':
## 
##     train
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ imager::add()       masks magrittr::add()
## ✖ stringr::boundary() masks imager::boundary()
## ✖ EBImage::combine()  masks dplyr::combine()
## ✖ tidyr::extract()    masks magrittr::extract()
## ✖ tidyr::fill()       masks imager::fill()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ purrr::lift()       masks caret::lift()
## ✖ purrr::set_names()  masks magrittr::set_names()
## ✖ purrr::transpose()  masks EBImage::transpose()
## ✖ purrr::when()       masks foreach::when()
## ✖ imager::where()     masks dplyr::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
save_keras_model <- function(model, file_path){
  save_model_hdf5(model, file_path)
}

load_keras_model <- function(file_path){
  return(load_model_hdf5(file_path))
}

Introduction

This dataset was obtained from kaggle.

The dataset contains natural scenes image that distributed to 6 categories/classes: - Buildings - Forest - Glacier - Mountain - Sea - Street

I will make Neural Network model to classify these images using keras library.

EDA

Setting up global variables

BASE.DIR <- "dataset/"
TRAIN.DIR <-  file.path(BASE.DIR, "seg_train/")
TEST.DIR <-  file.path(BASE.DIR, "seg_test/")
PRED.DIR <-  file.path(BASE.DIR, "seg_pred/")

Check image distribution across all classes

*Total class

cat("train: ", list.files(file.path(TRAIN.DIR)), sep = "\n\t- ")
## train: 
##  - buildings
##  - forest
##  - glacier
##  - mountain
##  - sea
##  - street
cat("test: ", list.files(file.path(TEST.DIR)), sep = "\n\t- ")
## test: 
##  - buildings
##  - forest
##  - glacier
##  - mountain
##  - sea
##  - street

Train

sapply(file.path(list.files(TRAIN.DIR)), function(class_dir){
  length(list.files(file.path(TRAIN.DIR, class_dir)))
})
## buildings    forest   glacier  mountain       sea    street 
##      2191      2271      2404      2512      2274      2382

Test

sapply(file.path(list.files(TEST.DIR)), function(class_dir){
  length(list.files(file.path(TEST.DIR, class_dir)))
})
## buildings    forest   glacier  mountain       sea    street 
##       437       474       553       525       510       501

The data distribution across all class in Train and Test are balanced

Preprocessing

Normalize image pixel

train_data_gen <- image_data_generator(rescale = 1/255., 
                                       horizontal_flip = T,
                                       vertical_flip = T,
                                       rotation_range = 30,
                                       zoom_range = 20)
test_data_gen <- image_data_generator(rescale = 1/255.,
                                      horizontal_flip = T,
                                       vertical_flip = T,
                                       rotation_range = 30,
                                       zoom_range = 20)

The purpose of normalize the pixel value is to make the computational process faster because the range of every pixel is 0-1.

Batch image

train_data <-  flow_images_from_directory(directory = TRAIN.DIR, 
                                        generator = train_data_gen,
                                        target_size = c(150, 150),
                                        batch_size = 32,
                                        shuffle = T,
                                        class_mode = "categorical")

test_data <-  flow_images_from_directory(directory = TEST.DIR, 
                                        generator = test_data_gen,
                                        target_size = c(150, 150),
                                        batch_size = 32,
                                        shuffle = F,
                                        class_mode = "categorical")

Batching image will help deep learning to process smaller image sample instead of process all images at one time.

Modelling

Model 1

# model1 <- keras_model_sequential(name = "model1") %>% 
#   layer_conv_2d(input_shape = c(150, 150, 3),
#                 filters = 32,
#                 kernel_size = c(3, 3),
#                 activation = "relu", 
#                 padding = "same") %>% 
#   layer_max_pooling_2d(pool_size = c(2, 2),
#                        strides = 2) %>% 
#   layer_conv_2d(filters = 64,
#                 kernel_size = c(3, 3),
#                 padding = "same",
#                 activation = "relu") %>% 
#   layer_max_pooling_2d(pool_size = c(2, 2),
#                        strides = 2) %>% 
#   layer_flatten() %>% 
#   layer_dense(128, 
#               activation = "relu") %>% 
#   layer_dense(6,
#               activation = "softmax")
# summary(model1)

I made model 1 with 7 layers: - First layer is the input layer, the input image is rgb with 150x150 dimension, 32 filter - Second layer is the pooling layer to prevent overfitting, with pool size 2x2 and 2 strides - Third layer is convolution layer with 62 filter - Fourth layer is same as second layer to prevent overfitting - Fifth layer is a flatten layer to make the image in 1D - Sixth layer is a dense layer with 128 units - Last layer is the output layer with softmax activation function to classify 6 categories

# model1 %>% compile(loss = "categorical_crossentropy",
#                    optimizer = optimizer_adam(),
#                    metrics = "accuracy")
# history1 <- model1 %>%  fit(x = train_data,
#                             validation_data = test_data,
#                             epochs = 5,
#                             verbose = 1,
#                             view_metrics = 0)
# 
# plot(history1)

Evaluation

test_img_df <- data.frame(file_name = file.path(TEST.DIR, test_data$filenames)) %>% 
  mutate(class = str_extract(file_name, "buildings|forest|glacier|mountain|sea|street"))
image_prep <- function(x) {
  arrays <- lapply(x, function(path) {
    img <- image_load(path, target_size = c(150, 150), 
                      grayscale = F
                      )
    
    x <- image_to_array(img)
    x <- array_reshape(x, c(1, dim(x)))
    x <- x/255 # rescale pixels
  })
  do.call(abind::abind, c(arrays, list(along = 1)))
}

test_img <- image_prep(test_img_df$file_name)
decode <- function(x){
  case_when(x == 0 ~ "buildings",
            x == 1 ~ "forest",
            x == 2 ~ "glacier",
            x == 3 ~ "mountain",
            x == 4 ~ "sea",
            x == 5 ~ "street"
            )
}
# save_keras_model(model1, "model1.h5")
model1_load <- load_keras_model("model1.h5")

pred_res <- predict(object = model1_load, test_img)
pred_res_cls <- pred_res %>% k_argmax()

pred_res_cls <- sapply(pred_res_cls, decode)

confusionMatrix(data = as.factor(pred_res_cls),
                reference = as.factor(test_img_df$class))
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  buildings forest glacier mountain sea street
##   buildings        15      0      23       11   9     14
##   forest          270    465      82       89  88    323
##   glacier           1      0      97       30  29      0
##   mountain         79      7     262      368 340     46
##   sea               1      0      13       10  10      0
##   street           71      2      76       17  34    118
## 
## Overall Statistics
##                                           
##                Accuracy : 0.3577          
##                  95% CI : (0.3405, 0.3751)
##     No Information Rate : 0.1843          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.2294          
##                                           
##  Mcnemar's Test P-Value : < 2.2e-16       
## 
## Statistics by Class:
## 
##                      Class: buildings Class: forest Class: glacier
## Sensitivity                   0.03432        0.9810        0.17541
## Specificity                   0.97776        0.6627        0.97548
## Pos Pred Value                0.20833        0.3531        0.61783
## Neg Pred Value                0.85587        0.9947        0.83961
## Prevalence                    0.14567        0.1580        0.18433
## Detection Rate                0.00500        0.1550        0.03233
## Detection Prevalence          0.02400        0.4390        0.05233
## Balanced Accuracy             0.50604        0.8219        0.57544
##                      Class: mountain Class: sea Class: street
## Sensitivity                   0.7010   0.019608       0.23553
## Specificity                   0.7034   0.990361       0.91997
## Pos Pred Value                0.3339   0.294118       0.37107
## Neg Pred Value                0.9173   0.831423       0.85720
## Prevalence                    0.1750   0.170000       0.16700
## Detection Rate                0.1227   0.003333       0.03933
## Detection Prevalence          0.3673   0.011333       0.10600
## Balanced Accuracy             0.7022   0.504985       0.57775

The accuracy of model 1 is poor with score 35%. This might be caused to low epoch size and it needs more layer to learn the pixel more detail because it poorly classified all classes except for forest.

Model with Hyper parameter tunning

# set_random_seed(76)
# 
# model2 <- keras_model_sequential(name = "model5") %>% 
#   layer_conv_2d(input_shape = c(150, 150, 3),
#                 filters = 32,
#                 kernel_size = c(5, 5),
#                 activation = "relu", 
#                 padding = "same") %>% 
#   
#   layer_max_pooling_2d(pool_size = c(5, 5),
#                        strides = 2) %>% 
#   
#   layer_conv_2d(filters = 64,
#                 kernel_size = c(3, 3),
#                 activation = "relu", 
#                 padding = "same") %>% 
#   
#   layer_max_pooling_2d(pool_size = c(5, 5),
#                        strides = 2) %>% 
#   
#   layer_conv_2d(filters = 128,
#                 kernel_size = c(3, 3),
#                 padding = "same",
#                 activation = "relu") %>% 
#   
#   layer_max_pooling_2d(pool_size = c(7, 7),
#                        strides = 2) %>% 
#   
#   layer_conv_2d(filters = 256,
#                 kernel_size = c(3, 3),
#                 padding = "same",
#                 activation = "relu") %>% 
#   
#   layer_flatten() %>% 
#   
#   layer_dense(512, 
#               activation = "relu") %>% 
#   
#   layer_dense(6,
#               activation = "softmax")
# 
# summary(model2)

The model 2 is the modification from model 1 with 10 layers: - First layer is the input layer, the input image is rgb with 150x150 dimension, 32 filter - Second layer is the pooling layer to prevent overfitting, with pool size 5x5 and 2 strides - Third layer is the convolution layer 64 filter - Fourth layer is the pooling layer to prevent overfitting, with pool size 5x5 and 2 strides - Fifth layer is a convolution layer with 128 filter - Sixth layer is a max pooling layer with 7x7 pool size and 2 strides - Seventh layer is a convolution layer with 256 filter - Seventh layer is a flatten layer to make the image in 1D - Eight layer is a dense layer with 512 units - Last layer is the output layer with softmax activation function to classify 6 categories

# model2 %>% compile(loss = "categorical_crossentropy",
#                    optimizer = optimizer_adam(),
#                    metrics = "accuracy")
# 
# history2 <- model2 %>%  fit(x = train_data,
#                             validation_data = test_data,
#                             epochs = 25,
#                             verbose = 1,
#                             view_metrics = 0)
# 
# plot(history2)

Evaluation

# save_keras_model(model2, "model2.h5")
model2_load <- load_keras_model("model2.h5")


pred_res <- predict(object = model2_load, test_img)
pred_res_cls <- pred_res %>% k_argmax()

pred_res_cls <- sapply(pred_res_cls, decode)
confusionMatrix(data = as.factor(pred_res_cls),
                reference = as.factor(test_img_df$class))
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  buildings forest glacier mountain sea street
##   buildings       199      5      84       31  54    130
##   forest          153    462      17       54  55    217
##   glacier          17      0     279       59  49      0
##   mountain         22      3      73      316 105     16
##   sea              27      4      82       60 225     17
##   street           19      0      18        5  22    121
## 
## Overall Statistics
##                                         
##                Accuracy : 0.534         
##                  95% CI : (0.516, 0.552)
##     No Information Rate : 0.1843        
##     P-Value [Acc > NIR] : < 2.2e-16     
##                                         
##                   Kappa : 0.4421        
##                                         
##  Mcnemar's Test P-Value : < 2.2e-16     
## 
## Statistics by Class:
## 
##                      Class: buildings Class: forest Class: glacier
## Sensitivity                   0.45538        0.9747         0.5045
## Specificity                   0.88139        0.8036         0.9489
## Pos Pred Value                0.39563        0.4823         0.6906
## Neg Pred Value                0.90469        0.9941         0.8945
## Prevalence                    0.14567        0.1580         0.1843
## Detection Rate                0.06633        0.1540         0.0930
## Detection Prevalence          0.16767        0.3193         0.1347
## Balanced Accuracy             0.66838        0.8892         0.7267
##                      Class: mountain Class: sea Class: street
## Sensitivity                   0.6019     0.4412       0.24152
## Specificity                   0.9115     0.9237       0.97439
## Pos Pred Value                0.5907     0.5422       0.65405
## Neg Pred Value                0.9152     0.8897       0.86501
## Prevalence                    0.1750     0.1700       0.16700
## Detection Rate                0.1053     0.0750       0.04033
## Detection Prevalence          0.1783     0.1383       0.06167
## Balanced Accuracy             0.7567     0.6824       0.60795

The tuned model accuracy is improved but not a lot with score 53%