Masks play a crucial role in protecting the health of individuals against respiratory diseases, as is one of the few precautions available for COVID-19 in the absence of immunization. With this dataset, it is possible to create a model to detect people wearing masks or not wearing them.
Link to dataset: https://www.kaggle.com/prithwirajmitra/covid-face-mask-detection-dataset\
This dataset contains 1006 images belonging to the 2 classes: ‘Mask’ & ‘Non Mask’. The dataset is divided into Train, Validation and Test data.
library(tensorflow)
library(keras)
library(raster)
# path to image folders
train_dir <- "New Masks Dataset/Train"
validation_dir <- "New Masks Dataset/Validation"
test_dir <- "New Masks Dataset/Test"
#Plotting people wearing masks
fnames_mask <- list.files("New Masks Dataset/Train/Mask", full.names = TRUE)
par(mfrow=c(2,4))
for (i in 1:8){
plotRGB(stack(fnames_mask[i]))
}
mtext("Images of people wearing mask", side = 3, line = -19, outer = TRUE)
# plotting people without masks
fnames_nonmask <- list.files("New Masks Dataset/Train/Non Mask", full.names = TRUE)
par(mfrow=c(2,5))
for (i in 1:10){
plotRGB(stack(fnames_nonmask[i]))
}
mtext("Images of people without mask", side = 3, line = -20, outer = TRUE)
# Image augmentation for better model fit
train_data_gen = image_data_generator(
rescale = 1/255, #Re-scaling the pixel values
zoom_range = 0.2, #Randomly zooming inside pictures
horizontal_flip = TRUE, #Randomly flipping half the images horizontally
fill_mode = "nearest" #Filling in newly created pixels
)
valid_data_gen <- image_data_generator(
rescale = 1/255
)
# training images
train_generator <- flow_images_from_directory( train_dir, # Target directory
train_data_gen, # Training data generator
target_size = c(150, 150), # Resize all images to 150 × 150
batch_size = 20, # 20 samples in one batch
class_mode = "binary"
)
# Number of images per class:
table(factor(train_generator$classes))
##
## 0 1
## 300 300
# Class label vs index mapping:
train_generator$class_indices
## $Mask
## [1] 0
##
## $`Non Mask`
## [1] 1
# validation images
validation_generator <- flow_images_from_directory( validation_dir,
valid_data_gen,
target_size = c(150, 150),
batch_size = 20,
class_mode = "binary"
)
# Number of images per class:
table(factor(validation_generator$classes))
##
## 0 1
## 153 153
# Class label vs index mapping:
validation_generator$class_indices
## $Mask
## [1] 0
##
## $`Non Mask`
## [1] 1
The model used is a convolutional neural net with the following hidden layers: 4 convolutional layers, 4 pooling layers and 2 dense layers.
model_v1 <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(150, 150, 3)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 512, activation = "relu") %>%
layer_dense(units = 1, activation = "sigmoid")
model_v1 %>%
compile( loss = "binary_crossentropy",
optimizer = optimizer_rmsprop(),
metrics = c("acc"))
summary(model_v1)
## Model: "sequential"
## ________________________________________________________________________________
## Layer (type) Output Shape Param #
## ================================================================================
## conv2d (Conv2D) (None, 148, 148, 32) 896
## ________________________________________________________________________________
## max_pooling2d (MaxPooling2D) (None, 74, 74, 32) 0
## ________________________________________________________________________________
## conv2d_1 (Conv2D) (None, 72, 72, 64) 18496
## ________________________________________________________________________________
## max_pooling2d_1 (MaxPooling2D) (None, 36, 36, 64) 0
## ________________________________________________________________________________
## conv2d_2 (Conv2D) (None, 34, 34, 128) 73856
## ________________________________________________________________________________
## max_pooling2d_2 (MaxPooling2D) (None, 17, 17, 128) 0
## ________________________________________________________________________________
## conv2d_3 (Conv2D) (None, 15, 15, 128) 147584
## ________________________________________________________________________________
## max_pooling2d_3 (MaxPooling2D) (None, 7, 7, 128) 0
## ________________________________________________________________________________
## flatten (Flatten) (None, 6272) 0
## ________________________________________________________________________________
## dropout (Dropout) (None, 6272) 0
## ________________________________________________________________________________
## dense (Dense) (None, 512) 3211776
## ________________________________________________________________________________
## dense_1 (Dense) (None, 1) 513
## ================================================================================
## Total params: 3,453,121
## Trainable params: 3,453,121
## Non-trainable params: 0
## ________________________________________________________________________________
history_v1 <- model_v1 %>%
fit_generator(
train_generator,
steps_per_epoch = 80,
epochs = 15,
validation_data =
validation_generator
)
plot(history_v1)
The model looks quite accurate on the validation data. Let’s evaluate it on test dataset.
test_data_gen = image_data_generator(
rescale = 1/255
)
test_generator <- flow_images_from_directory( test_dir,
test_data_gen,
target_size = c(150, 150),
batch_size = 20,
class_mode = "binary"
)
eval <- model_v1 %>% evaluate_generator(test_generator, steps = 40)
sprintf("We got an impressive %d percent accuracy on our model", round(100*eval$acc))
## [1] "We got an impressive 97 percent accuracy on our model"