library(keras)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(imager)
## Loading required package: magrittr
##
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
##
## add
## The following object is masked from 'package:dplyr':
##
## where
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
## The following object is masked from 'package:graphics':
##
## frame
## The following object is masked from 'package:base':
##
## save.image
library("EBImage")
##
## Attaching package: 'EBImage'
## The following objects are masked from 'package:imager':
##
## channel, dilate, display, erode, resize, watershed
library(foreach)
library(tensorflow)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:tensorflow':
##
## train
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ imager::add() masks magrittr::add()
## ✖ stringr::boundary() masks imager::boundary()
## ✖ EBImage::combine() masks dplyr::combine()
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ tidyr::fill() masks imager::fill()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ purrr::set_names() masks magrittr::set_names()
## ✖ purrr::transpose() masks EBImage::transpose()
## ✖ purrr::when() masks foreach::when()
## ✖ imager::where() masks dplyr::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
save_keras_model <- function(model, file_path){
save_model_hdf5(model, file_path)
}
load_keras_model <- function(file_path){
return(load_model_hdf5(file_path))
}
This dataset was obtained from kaggle.
The dataset contains natural scenes image that distributed to 6 categories/classes: - Buildings - Forest - Glacier - Mountain - Sea - Street
I will make Neural Network model to classify these images using keras library.
BASE.DIR <- "dataset/"
TRAIN.DIR <- file.path(BASE.DIR, "seg_train/")
TEST.DIR <- file.path(BASE.DIR, "seg_test/")
PRED.DIR <- file.path(BASE.DIR, "seg_pred/")
*Total class
cat("train: ", list.files(file.path(TRAIN.DIR)), sep = "\n\t- ")
## train:
## - buildings
## - forest
## - glacier
## - mountain
## - sea
## - street
cat("test: ", list.files(file.path(TEST.DIR)), sep = "\n\t- ")
## test:
## - buildings
## - forest
## - glacier
## - mountain
## - sea
## - street
sapply(file.path(list.files(TRAIN.DIR)), function(class_dir){
length(list.files(file.path(TRAIN.DIR, class_dir)))
})
## buildings forest glacier mountain sea street
## 2191 2271 2404 2512 2274 2382
sapply(file.path(list.files(TEST.DIR)), function(class_dir){
length(list.files(file.path(TEST.DIR, class_dir)))
})
## buildings forest glacier mountain sea street
## 437 474 553 525 510 501
The data distribution across all class in Train and Test are balanced
train_data_gen <- image_data_generator(rescale = 1/255.,
horizontal_flip = T,
vertical_flip = T,
rotation_range = 30,
zoom_range = 20)
test_data_gen <- image_data_generator(rescale = 1/255.,
horizontal_flip = T,
vertical_flip = T,
rotation_range = 30,
zoom_range = 20)
The purpose of normalize the pixel value is to make the computational process faster because the range of every pixel is 0-1.
train_data <- flow_images_from_directory(directory = TRAIN.DIR,
generator = train_data_gen,
target_size = c(150, 150),
batch_size = 32,
shuffle = T,
class_mode = "categorical")
test_data <- flow_images_from_directory(directory = TEST.DIR,
generator = test_data_gen,
target_size = c(150, 150),
batch_size = 32,
shuffle = F,
class_mode = "categorical")
Batching image will help deep learning to process smaller image sample instead of process all images at one time.
# model1 <- keras_model_sequential(name = "model1") %>%
# layer_conv_2d(input_shape = c(150, 150, 3),
# filters = 32,
# kernel_size = c(3, 3),
# activation = "relu",
# padding = "same") %>%
# layer_max_pooling_2d(pool_size = c(2, 2),
# strides = 2) %>%
# layer_conv_2d(filters = 64,
# kernel_size = c(3, 3),
# padding = "same",
# activation = "relu") %>%
# layer_max_pooling_2d(pool_size = c(2, 2),
# strides = 2) %>%
# layer_flatten() %>%
# layer_dense(128,
# activation = "relu") %>%
# layer_dense(6,
# activation = "softmax")
# summary(model1)
I made model 1 with 7 layers: - First layer is the input layer, the input image is rgb with 150x150 dimension, 32 filter - Second layer is the pooling layer to prevent overfitting, with pool size 2x2 and 2 strides - Third layer is convolution layer with 62 filter - Fourth layer is same as second layer to prevent overfitting - Fifth layer is a flatten layer to make the image in 1D - Sixth layer is a dense layer with 128 units - Last layer is the output layer with softmax activation function to classify 6 categories
# model1 %>% compile(loss = "categorical_crossentropy",
# optimizer = optimizer_adam(),
# metrics = "accuracy")
# history1 <- model1 %>% fit(x = train_data,
# validation_data = test_data,
# epochs = 5,
# verbose = 1,
# view_metrics = 0)
#
# plot(history1)
test_img_df <- data.frame(file_name = file.path(TEST.DIR, test_data$filenames)) %>%
mutate(class = str_extract(file_name, "buildings|forest|glacier|mountain|sea|street"))
image_prep <- function(x) {
arrays <- lapply(x, function(path) {
img <- image_load(path, target_size = c(150, 150),
grayscale = F
)
x <- image_to_array(img)
x <- array_reshape(x, c(1, dim(x)))
x <- x/255 # rescale pixels
})
do.call(abind::abind, c(arrays, list(along = 1)))
}
test_img <- image_prep(test_img_df$file_name)
decode <- function(x){
case_when(x == 0 ~ "buildings",
x == 1 ~ "forest",
x == 2 ~ "glacier",
x == 3 ~ "mountain",
x == 4 ~ "sea",
x == 5 ~ "street"
)
}
# save_keras_model(model1, "model1.h5")
model1_load <- load_keras_model("model1.h5")
pred_res <- predict(object = model1_load, test_img)
pred_res_cls <- pred_res %>% k_argmax()
pred_res_cls <- sapply(pred_res_cls, decode)
confusionMatrix(data = as.factor(pred_res_cls),
reference = as.factor(test_img_df$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction buildings forest glacier mountain sea street
## buildings 15 0 23 11 9 14
## forest 270 465 82 89 88 323
## glacier 1 0 97 30 29 0
## mountain 79 7 262 368 340 46
## sea 1 0 13 10 10 0
## street 71 2 76 17 34 118
##
## Overall Statistics
##
## Accuracy : 0.3577
## 95% CI : (0.3405, 0.3751)
## No Information Rate : 0.1843
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.2294
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: buildings Class: forest Class: glacier
## Sensitivity 0.03432 0.9810 0.17541
## Specificity 0.97776 0.6627 0.97548
## Pos Pred Value 0.20833 0.3531 0.61783
## Neg Pred Value 0.85587 0.9947 0.83961
## Prevalence 0.14567 0.1580 0.18433
## Detection Rate 0.00500 0.1550 0.03233
## Detection Prevalence 0.02400 0.4390 0.05233
## Balanced Accuracy 0.50604 0.8219 0.57544
## Class: mountain Class: sea Class: street
## Sensitivity 0.7010 0.019608 0.23553
## Specificity 0.7034 0.990361 0.91997
## Pos Pred Value 0.3339 0.294118 0.37107
## Neg Pred Value 0.9173 0.831423 0.85720
## Prevalence 0.1750 0.170000 0.16700
## Detection Rate 0.1227 0.003333 0.03933
## Detection Prevalence 0.3673 0.011333 0.10600
## Balanced Accuracy 0.7022 0.504985 0.57775
The accuracy of model 1 is poor with score 35%. This might be caused to low epoch size and it needs more layer to learn the pixel more detail because it poorly classified all classes except for forest.
# set_random_seed(76)
#
# model2 <- keras_model_sequential(name = "model5") %>%
# layer_conv_2d(input_shape = c(150, 150, 3),
# filters = 32,
# kernel_size = c(5, 5),
# activation = "relu",
# padding = "same") %>%
#
# layer_max_pooling_2d(pool_size = c(5, 5),
# strides = 2) %>%
#
# layer_conv_2d(filters = 64,
# kernel_size = c(3, 3),
# activation = "relu",
# padding = "same") %>%
#
# layer_max_pooling_2d(pool_size = c(5, 5),
# strides = 2) %>%
#
# layer_conv_2d(filters = 128,
# kernel_size = c(3, 3),
# padding = "same",
# activation = "relu") %>%
#
# layer_max_pooling_2d(pool_size = c(7, 7),
# strides = 2) %>%
#
# layer_conv_2d(filters = 256,
# kernel_size = c(3, 3),
# padding = "same",
# activation = "relu") %>%
#
# layer_flatten() %>%
#
# layer_dense(512,
# activation = "relu") %>%
#
# layer_dense(6,
# activation = "softmax")
#
# summary(model2)
The model 2 is the modification from model 1 with 10 layers: - First layer is the input layer, the input image is rgb with 150x150 dimension, 32 filter - Second layer is the pooling layer to prevent overfitting, with pool size 5x5 and 2 strides - Third layer is the convolution layer 64 filter - Fourth layer is the pooling layer to prevent overfitting, with pool size 5x5 and 2 strides - Fifth layer is a convolution layer with 128 filter - Sixth layer is a max pooling layer with 7x7 pool size and 2 strides - Seventh layer is a convolution layer with 256 filter - Seventh layer is a flatten layer to make the image in 1D - Eight layer is a dense layer with 512 units - Last layer is the output layer with softmax activation function to classify 6 categories
# model2 %>% compile(loss = "categorical_crossentropy",
# optimizer = optimizer_adam(),
# metrics = "accuracy")
#
# history2 <- model2 %>% fit(x = train_data,
# validation_data = test_data,
# epochs = 25,
# verbose = 1,
# view_metrics = 0)
#
# plot(history2)
# save_keras_model(model2, "model2.h5")
model2_load <- load_keras_model("model2.h5")
pred_res <- predict(object = model2_load, test_img)
pred_res_cls <- pred_res %>% k_argmax()
pred_res_cls <- sapply(pred_res_cls, decode)
confusionMatrix(data = as.factor(pred_res_cls),
reference = as.factor(test_img_df$class))
## Confusion Matrix and Statistics
##
## Reference
## Prediction buildings forest glacier mountain sea street
## buildings 199 5 84 31 54 130
## forest 153 462 17 54 55 217
## glacier 17 0 279 59 49 0
## mountain 22 3 73 316 105 16
## sea 27 4 82 60 225 17
## street 19 0 18 5 22 121
##
## Overall Statistics
##
## Accuracy : 0.534
## 95% CI : (0.516, 0.552)
## No Information Rate : 0.1843
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4421
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: buildings Class: forest Class: glacier
## Sensitivity 0.45538 0.9747 0.5045
## Specificity 0.88139 0.8036 0.9489
## Pos Pred Value 0.39563 0.4823 0.6906
## Neg Pred Value 0.90469 0.9941 0.8945
## Prevalence 0.14567 0.1580 0.1843
## Detection Rate 0.06633 0.1540 0.0930
## Detection Prevalence 0.16767 0.3193 0.1347
## Balanced Accuracy 0.66838 0.8892 0.7267
## Class: mountain Class: sea Class: street
## Sensitivity 0.6019 0.4412 0.24152
## Specificity 0.9115 0.9237 0.97439
## Pos Pred Value 0.5907 0.5422 0.65405
## Neg Pred Value 0.9152 0.8897 0.86501
## Prevalence 0.1750 0.1700 0.16700
## Detection Rate 0.1053 0.0750 0.04033
## Detection Prevalence 0.1783 0.1383 0.06167
## Balanced Accuracy 0.7567 0.6824 0.60795
The tuned model accuracy is improved but not a lot with score 53%