cd ~/Data_Science/R/Projects/animal_classification/
rm -rf raw-img
rm -rf animals
rm -rf animals10
mkdir animals
unzip -q animals10.zip -d animals10
rm(list=ls())
gc()
##          used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 494303 26.4    1091497 58.3   641594 34.3
## Vcells 910691  7.0    8388608 64.0  1752684 13.4
setwd("~/Data_Science/R/Projects/animal_classification/animals10/raw-img")
folders = list.files()

for(i in 1:length(folders)){
  
  setwd(paste0("~/Data_Science/R/Projects/animal_classification/animals10/raw-img/",folders[i],"/"))
  if ( folders[i]=="cane") {
          animal = "dog"
          }
  else if (folders[i]=="cavallo") {
          animal = "horse"
          }  
  else if (folders[i]=="elefante") {
          animal = "elephant"
           }  
  else if (folders[i]=="farfalla") {
          animal = "butterfly"
           }  
  else if (folders[i]=="gallina") {
          animal = "chicken"
           }  
  else if (folders[i]=="gatto") {
          animal = "cat"
           }  
  else if (folders[i]=="mucca") {
          animal = "cow"
           }  
  else if (folders[i]=="pecora") {
          animal = "sheep"
           }  
  else if (folders[i]=="ragno") {
          animal = "spider"
           }  
  else if (folders[i]=="scoiattolo") {
          animal = "squirrel"
          }          
  
  file.rename(list.files(), paste0(animal,"_",1:length(list.files()),".jpg"))  
  file.copy(list.files(), "~/Data_Science/R/Projects/animal_classification/animals/") 
  }
library(imager)
## Loading required package: magrittr
## 
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
## 
##     add
## The following objects are masked from 'package:stats':
## 
##     convolve, spectrum
## The following object is masked from 'package:graphics':
## 
##     frame
## The following object is masked from 'package:base':
## 
##     save.image
width=120
height=120
channel=3
Sys.time()
## [1] "2020-01-14 16:13:00 CST"
files = list.files("./animals/")
files = paste0("./animals/", files)
number_of_images = length(files)
images = array(NA, c(number_of_images, width, height, channel))
image_labels = c()
for(i in 1:number_of_images){
  images[i,,,] = rm.alpha(resize(load.image(files[i]), size_x=width, size_y=height))[,,1,]
  image_labels[i] = strsplit(basename(files[i]),'_')[[1]][1]
}
Sys.time()
## [1] "2020-01-14 16:18:43 CST"
#Some of the loaded images
par(mfrow=c(6,5))
par(mar=c(0,0,1.5,0), xaxs="i", yaxs="i")
for(i in 1:30){
  image(1:width, 1:height, t(apply(t(images[i,,,1]),2, rev)), col=gray((1:255)/255), xaxt='n', yaxt='n')
}

set.seed(1)
train = sample(1:number_of_images, number_of_images*0.80)
test = -train
table(image_labels[train])/table(image_labels)
## 
## butterfly       cat   chicken       cow       dog  elephant     horse 
## 0.8082386 0.7973621 0.8021304 0.7893891 0.7912811 0.7966805 0.8013725 
##     sheep    spider  squirrel 
## 0.7928571 0.8108276 0.8023631

/ This looks pretty good. We got 80% of the observations from each category for training

image_labels_numeric = image_labels
image_labels_numeric[image_labels_numeric=='dog']      =0
image_labels_numeric[image_labels_numeric=='horse']    =1
image_labels_numeric[image_labels_numeric=='elephant'] =2
image_labels_numeric[image_labels_numeric=='butterfly']=3
image_labels_numeric[image_labels_numeric=='chicken']  =4
image_labels_numeric[image_labels_numeric=='cat']      =5
image_labels_numeric[image_labels_numeric=='cow']      =6
image_labels_numeric[image_labels_numeric=='sheep']    =7
image_labels_numeric[image_labels_numeric=='spider']   =8
image_labels_numeric[image_labels_numeric=='squirrel'] =9

table(image_labels_numeric)
## image_labels_numeric
##    0    1    2    3    4    5    6    7    8    9 
## 4863 2623 1446 2112 3098 1668 1866 1820 4821 1862
class(image_labels_numeric)
## [1] "character"
image_labels_numeric = as.numeric(image_labels_numeric)
library(keras)
model <- keras_model_sequential() %>%
  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu", input_shape = c(width, height, channel)) %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") %>%
  layer_max_pooling_2d(pool_size = c(2, 2)) %>%
  layer_flatten() %>%
  layer_dense(units = 128, activation = "sigmoid") %>%
  layer_dense(units = 10, activation = "softmax")
model %>% compile(
  loss = "sparse_categorical_crossentropy",
  optimizer = optimizer_rmsprop(lr = 0.001),
  #optimizer = "adam",
  metrics = c('accuracy')
)
Sys.time()
## [1] "2020-01-14 16:18:48 CST"
model %>% fit(images[train,,,], image_labels_numeric[train], epoch=5)
Sys.time()
## [1] "2020-01-14 16:42:37 CST"
score <- model %>% evaluate(images[test,,,], image_labels_numeric[test], verbose=0)
print(paste0('This model achieves ', round(score$accuracy*100, digits = 2), '% accuracy in classifying images of 10 different animals'))
## [1] "This model achieves 67.46% accuracy in classifying images of 10 different animals"