library(keras)
library(tensorflow)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.3 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(imager)
## Warning: package 'imager' was built under R version 4.1.3
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
##
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
##
## add
## The following object is masked from 'package:stringr':
##
## boundary
## The following object is masked from 'package:tidyr':
##
## fill
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
## The following object is masked from 'package:graphics':
##
## frame
## The following object is masked from 'package:base':
##
## save.image
library(caret)
## Warning: package 'caret' was built under R version 4.1.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
## The following object is masked from 'package:tensorflow':
##
## train
**There are 5 directories in this dataset for different kinds of insects. Each directory contains almost ~1000 images of the particular insect type.
The insects are:
Butterfly Dragonfly Grasshopper Ladybird Mosquito
** ** The link to the dataset is https://www.kaggle.com/datasets/hammaadali/insects-recognition
**The pixel values can range from 0 to 256. Each number represents a color code. When using the image as it is and passing through a Deep Neural Network, the computation of high numeric values may become more complex. To reduce this we can normalize the values to range from 0 to 1.
In this way, the numbers will be small and the computation becomes easier and faster. As the pixel values range from 0 to 256, apart from 0 the range is 255. So dividing all the values by 255 will convert it to range from 0 to 1.
train_datagen <- image_data_generator(rescale = 1/255)
validation_datagen <- image_data_generator(rescale = 1/255)
test_datagen <- image_data_generator(rescale = 1/255)
train_generator <- flow_images_from_directory(
"insects_recognition/train/", # Target directory
train_datagen, # Training data generator
target_size = c(150, 150), # Resizes all images to 150 × 150
batch_size = 20, # 20 samples in one batch
class_mode = "categorical" # Because we use categocal_crossentropy loss,
# we need categorical labels.
)
validation_generator <- flow_images_from_directory(
"insects_recognition/validation/",
validation_datagen,
target_size = c(150, 150),
batch_size = 20,
class_mode = "categorical"
)
test_generator <- flow_images_from_directory(
"insects_recognition/test/",
test_datagen,
target_size = c(150, 150),
batch_size = 20,
class_mode = "categorical"
)
model_file = "final_model.h5"
history_file = "final_model.rds"
model_v2 <- load_model_hdf5(model_file)
history_v2 <- read_rds(history_file)
Whenever the training and validation performances are diverging, it is an indicator of over-fitting. This is very common when applying complex model to small datasets so that noises are mis-characterized assignals
#Plotting the accuracy and loss for training and validation data
plot(history_v2)
## `geom_smooth()` using formula 'y ~ x'
#Evaluating the model on test data
model_v2 %>%
evaluate_generator(test_generator, steps = 50)
## $loss
## [1] 5.920262
##
## $acc
## [1] 0.4897959
1.To get the list of folders we have in our directory that are related to training data 2.File Name 3.Randomly select image
folder_list <- list.files("insects_recognition/train/")
folder_list
## [1] "Butterfly" "Dragonfly" "Grasshopper" "Ladybird" "Mosquito"
folder_path <- paste0("insects_recognition/train/", folder_list, "/")
file_name <- map(folder_path,
function(x) paste0(x, list.files(x))) %>%
unlist()
sample_image <- sample(file_name, 10)
# Load image into R
img <- map(sample_image, load.image)
# Plot image
par(mfrow = c(2, 5)) # Create 2 x 3 image grid
map(img, plot)
## [[1]]
## Image. Width: 276 pix Height: 183 pix Depth: 1 Colour channels: 3
##
## [[2]]
## Image. Width: 311 pix Height: 162 pix Depth: 1 Colour channels: 3
##
## [[3]]
## Image. Width: 244 pix Height: 207 pix Depth: 1 Colour channels: 3
##
## [[4]]
## Image. Width: 224 pix Height: 225 pix Depth: 1 Colour channels: 3
##
## [[5]]
## Image. Width: 276 pix Height: 183 pix Depth: 1 Colour channels: 3
##
## [[6]]
## Image. Width: 300 pix Height: 168 pix Depth: 1 Colour channels: 3
##
## [[7]]
## Image. Width: 275 pix Height: 183 pix Depth: 1 Colour channels: 3
##
## [[8]]
## Image. Width: 287 pix Height: 175 pix Depth: 1 Colour channels: 3
##
## [[9]]
## Image. Width: 251 pix Height: 201 pix Depth: 1 Colour channels: 3
##
## [[10]]
## Image. Width: 295 pix Height: 171 pix Depth: 1 Colour channels: 3
url <- "https://www.linkpicture.com/q/Proof_1.png"
knitr::include_url(url)