reticulate::use_python("C:/Users/iwky_/.conda/envs/lbbnn/", required = T)
#reticulate::conda_install("pandas", envname = "r-tensorflow")library(tidyverse)
library(dplyr)
library(rsample)
library(keras)
library(caret)
library(tensorflow)
library(yardstick)
library(sjPlot)
library(reticulate)This report will study the Machine Learning and Computer Vision detection process using Keras and Tensorflow. It will try to detect sorts of images on the datasets, and try to differentiate wether the image can be categorized as vehicle, or others (non-vehicle) using Keras.
Keras is a deep learning API written in Python, running on top of the machine learning platform TensorFlow.
There are two categories of labels of dataset that will be used in this report : * Non-Vehicles * Vehicles
# mendapatkan list folder image
folder_list <- list.files("data/train/")
folder_list#> [1] "non-vehicles" "vehicles"
folder_path <- paste0("data/train/", folder_list, "/")
folder_path#> [1] "data/train/non-vehicles/" "data/train/vehicles/"
map() to iterate automatically# Get file name
file_name <- map(folder_path,
function(x) paste0(x, list.files(x))
) %>%
unlist()
# first 6 file name
head(file_name)#> [1] "data/train/non-vehicles/extra1.png"
#> [2] "data/train/non-vehicles/extra2.png"
#> [3] "data/train/non-vehicles/extra2071.png"
#> [4] "data/train/non-vehicles/extra2072.png"
#> [5] "data/train/non-vehicles/extra2073.png"
#> [6] "data/train/non-vehicles/extra2074.png"
library(imager)
# Randomly select image
set.seed(99)
sample_image <- sample(file_name, 6)
# Load image into R
img <- map(sample_image, load.image)
# Plot image
par(mfrow = c(2, 3)) # Create 2 x 3 image grid
map(img, plot)#> [[1]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
#>
#> [[2]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
#>
#> [[3]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
#>
#> [[4]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
#>
#> [[5]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
#>
#> [[6]]
#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
img <- load.image(file_name[1])
dim(img)#> [1] 64 64 1 3
img#> Image. Width: 64 pix Height: 64 pix Depth: 1 Colour channels: 3
train <- data.frame(class = str_extract(file_name, "non-vehicles|vehicles"),
name = map(paste0("data/train/", folder_list, "/"),
function(x) paste0(list.files(x))) %>% unlist(),
filename = file_name)
traintest_file <- map(paste0("data/test/", folder_list, "/"),
function(x) paste0(x, list.files(x))) %>%
unlist()
val_file <- map(paste0("data/validation/", folder_list, "/"),
function(x) paste0(x, list.files(x))) %>%
unlist()
test <- data.frame(class = str_extract(test_file, "non-vehicles|vehicles"),
name = map(paste0("data/test/", folder_list, "/"),
function(x) paste0(list.files(x))) %>% unlist(),
filename = test_file)
val <- data.frame(class = str_extract(val_file, "non-vehicles|vehicles"),
name = map(paste0("data/validation/", folder_list, "/"),
function(x) paste0(list.files(x))) %>% unlist(),
filename = val_file)# save into csv
#write_csv(train, "data/train_img.csv")
#write_csv(test, "data/test_img.csv")
#write_csv(val, "data/val_img.csv")Define the size of the image that we have and the value of the batch size to make it easier later in pre-processing the data.
# set data parameter
image_size <- c(64, 64)
batch_size <- 32# get all classes
classes <- unique(train$class)
# image generator
seen_image_gen = image_data_generator(
rescale = 1 / 255,
rotation_range = 15,
width_shift_range = 0.1,
height_shift_range = 0.1,
shear_range = 0.1,
zoom_range = 0.1,
fill_mode = "nearest"
)
unseen_image_gen <- image_data_generator(rescale = 1 / 255)To apply augmentation to images, we will use the flow_images_from_dataframe() function. Because the images we have are stored in the train folder, the augmentation results will also be stored in the data/train folder. In this step, we will do for each data train, test, and validation.
# data generator
train_gen <- flow_images_from_dataframe(
dataframe = train,
x_col = "filename",
y_col = "class",
generator = seen_image_gen,
target_size = image_size,
color_mode = "grayscale",
class_mode = "categorical",
batch_size = batch_size,
shuffle = TRUE,
seed = 100
)
val_gen <- flow_images_from_dataframe(
dataframe = val,
x_col = "filename",
y_col = "class",
generator = unseen_image_gen,
target_size = image_size,
color_mode = "grayscale",
class_mode = "categorical",
batch_size = batch_size,
shuffle = FALSE
)
test_gen <- flow_images_from_dataframe(
dataframe = test,
x_col = "filename",
y_col = "class",
generator = unseen_image_gen,
target_size = image_size,
color_mode = "grayscale",
class_mode = "categorical",
batch_size = batch_size,
shuffle = FALSE
)# Number of training samples
train_samples <- train_gen$n
# Number of validation samples
val_samples <- val_gen$n
# Number of test samples
test_samples <- test_gen$n
# Number of target classes/categories
output_n <- n_distinct(train_gen$classes)
# Get the class proportion
table("\nFrequency" = factor(train_gen$classes)
) %>%
prop.table()#>
#> Frequency
#> 0 1
#> 0.5228654 0.4771346
In this model architecture we will create several layers including:
# set initial ramdom
RNGkind(sample.kind = "Rounding")
set.seed(100)
initializer <- initializer_random_normal(seed = 100)
model <- keras_model_sequential() %>%
# convo layer
layer_conv_2d(filters = 14,
kernel_size = c(3,3),
padding = "same",
activation = "relu",
input_shape = c(image_size, 1),
name = "convo_layer",
kernel_initializer = initializer,
bias_initializer = initializer) %>%
# Max Pooling Layer
layer_max_pooling_2d(pool_size = c(2,2)) %>%
# flattening layer
layer_flatten(name = "dense_flatten") %>%
# dense layer
layer_dense(units = 32, name = "hidden_1",
activation = "relu",
kernel_initializer = initializer,
bias_initializer = initializer) %>%
layer_dense(units = length(classes), name = "output",
activation = "softmax",
kernel_initializer = initializer,
bias_initializer = initializer)
# compile the model
model %>% compile(
optimizer = optimizer_adam(lr = 0.01),
metrics = "accuracy",
loss = "categorical_crossentropy"
)
# model summary
summary(model)#> Model: "sequential"
#> ________________________________________________________________________________
#> Layer (type) Output Shape Param #
#> ================================================================================
#> convo_layer (Conv2D) (None, 64, 64, 14) 140
#> ________________________________________________________________________________
#> max_pooling2d (MaxPooling2D) (None, 32, 32, 14) 0
#> ________________________________________________________________________________
#> dense_flatten (Flatten) (None, 14336) 0
#> ________________________________________________________________________________
#> hidden_1 (Dense) (None, 32) 458784
#> ________________________________________________________________________________
#> output (Dense) (None, 2) 66
#> ================================================================================
#> Total params: 458,990
#> Trainable params: 458,990
#> Non-trainable params: 0
#> ________________________________________________________________________________
# callbacks
callbacks <- callback_tensorboard("logs/run_a")
# meta
steps_per_epoch <- ceiling(nrow(train) / batch_size)
validation_steps <- ceiling(nrow(val) / batch_size)
# fit the model
history <- model %>% fit_generator(
generator = train_gen,
steps_per_epoch = steps_per_epoch,
epochs = 30,
validation_data = val_gen,
validation_steps = validation_steps,
callbacks = callbacks
)
# save the model
save_model_hdf5(model, "final-model.hdf5")
# plot history
plot(history)Test Data Prediction
library(dplyr)
# predict on test
pred_test <- model %>%
keras::predict_generator(generator = test_gen,
steps = ceiling(nrow(test) / batch_size))
pred_test <- pred_test %>%
as_tibble(.name_repair = "universal") %>%
set_names(classes)
pred_test <- pred_test %>%
mutate(class = apply(., 1, function(x) names(x)[which.max(x)])) %>%
mutate(class = factor(class, levels = classes)) %>%
set_names(paste0(".pred_", colnames(.)))
# combine with test dataset
pred_test <- test %>%
select(class) %>%
mutate(class = factor(class, levels = classes)) %>%
bind_cols(pred_test)
# quick check
head(pred_test, 10)pred_test %>%
yardstick::conf_mat(class, .pred_class) %>%
autoplot(type = "heatmap")# metrics summary
pred_test %>%
summarise(
accuracy = accuracy_vec(class, .pred_class),
sensitivity = sens_vec(class, .pred_class),
specificity = spec_vec(class, .pred_class),
precision = precision_vec(class, .pred_class)
)It can be concluded from the metrics above that the model that been made has an approximately 87.5% accuracy in detecting the wether the images been presented is categorized as vehicle or non-vehicle types of images.