Kaggle Cat and Dog

gc()

##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 480274 25.7    1051414 56.2   641594 34.3
## Vcells 901650  6.9    8388608 64.0  1752684 13.4

rm(list=ls())
library(keras)
library(imager)

## Loading required package: magrittr

## 
## Attaching package: 'imager'

## The following object is masked from 'package:magrittr':
## 
##     add

## The following objects are masked from 'package:stats':
## 
##     convolve, spectrum

## The following object is masked from 'package:graphics':
## 
##     frame

## The following object is masked from 'package:base':
## 
##     save.image

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:imager':
## 
##     fill

## The following object is masked from 'package:magrittr':
## 
##     extract

library(ggplot2)
setwd('/home/sumit/Data_Science/R/Projects/Cat_and_Dog')
file = list.files("./images")
file = paste("./images/", file, sep='')  
table(substr(file, 10, 12))

## 
##   cat   dog 
## 12500 12500

Sys.time()

## [1] "2020-01-10 12:47:24 CST"

number_of_images = length(file)
width=80
height=80
channel = 3
images = array(NA, c(number_of_images, width, height, channel))
image_labels = c()
for(i in 1:number_of_images){
  img = resize(load.image(file[i]), size_x= width, size_y = height, interpolation_type = 3L )
  images[i,,,] = img[,,1,]
  #cat = 0
  #dog = 1
  image_labels[i] = ifelse(substr(file[i],10,12)=="cat",0,1)
}
Sys.time()

## [1] "2020-01-10 12:55:15 CST"

table(image_labels)

## image_labels
##     0     1 
## 12500 12500

#lets look at some of the images loaded
class_names = c("Cat","Dog")
par(mfrow=c(5,5))
par(mar=c(0,0,1.5,0), xaxs='i', yaxs='i')

for (i in 1:25) {
  img = t(apply(t(images[i,,,1]),2, rev))
  image(1:width, 1:height, img, col=gray((0:255)/255), xaxt = 'n', yaxt= 'n', main = class_names[image_labels[i]+1]) 
}

set.seed(1)
train = sample(1:number_of_images, number_of_images*0.8)
test  = -train

model <- keras_model_sequential() %>%
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
                input_shape = c(width, height, 3)) %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_flatten() %>%
  layer_dense(units = 512, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")

model %>% compile(
  loss = "binary_crossentropy",
  optimizer = optimizer_rmsprop(lr = 0.001),
  metrics = c('accuracy')
)

model %>% fit(images[train,,,], image_labels[train], epoch=5)

prediction = model %>% predict_classes(images[test,,,])
mean(prediction == image_labels[test])

## [1] 0.8634

/ The model correctly classfies 85.86% of the test images

Kaggle Cat and Dog

Sumit Kumar

1/7/2020