Tutorial: Basic Classification w Keras

The Fashion MNIST dataset

library(keras)
fashion_mnist <- dataset_fashion_mnist()

c(train_images, train_labels) %<-% fashion_mnist$train
c(test_images, test_labels) %<-% fashion_mnist$test

class_names = c('Tee/top',
                'Pants',
                'Sweater',
                'Dress',
                'Coat', 
                'Sandal',
                'Shirt',
                'keds',
                'Bag',
                'Dirty boots')

dim(train_images)

## [1] 60000    28    28

dim(train_labels)

## [1] 60000

train_labels[1:20]

##  [1] 9 0 0 3 0 2 7 2 5 5 0 9 5 5 7 9 1 0 6 4

dim(test_images)

## [1] 10000    28    28

dim(test_labels)

## [1] 10000

Preprocess the data

The data must be preprocessed before training the network. If you inspect the first image in the training set, you will see that the pixel values fall in the range of 0 to 255:

library(tidyr)
library(ggplot2)

image_1 <- as.data.frame(train_images[1, , ])
colnames(image_1) <- seq_len(ncol(image_1))
image_1$y <- seq_len(nrow(image_1))
image_1 <- gather(image_1, "x", "value", -y)
image_1$x <- as.integer(image_1$x)

ggplot(image_1, aes(x = x, y = y, fill = value)) +
  geom_tile() +
  scale_fill_gradient(low = "white", high = "black", na.value = NA) +
  scale_y_reverse() +
  theme_minimal() +
  theme(panel.grid = element_blank())   +
  theme(aspect.ratio = 1) +
  xlab("") +
  ylab("")

train_images <- train_images / 255
test_images <- test_images / 255

par(mfcol = c(5,5))
par(mar = c(0, 0, 1.5, 0), xaxs  ='i', yaxs = 'i')
for (i in 1:25) { 
  img <- train_images[i, , ]
  img <- t(apply(img, 2, rev)) 
  image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
        main = paste(class_names[train_labels[i] + 1]))
}

Setup the layers

model <- keras_model_sequential()
model %>%
  layer_flatten(input_shape = c(28, 28)) %>%
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dense(units = 10, activation = 'softmax')

Compile the model

model %>% compile(
  optimizer = 'adam', 
  loss = 'sparse_categorical_crossentropy',
  metrics = c('accuracy')
)

Train the model

model %>% fit(train_images, train_labels, epochs = 5)

Evaluate accuracy

score <- model %>% evaluate(test_images, test_labels)

cat('Test loss:', score$loss, "\n")

## Test loss: 0.3553635

cat('Test accuracy:', score$acc, "\n")

## Test accuracy: 0.8692

Make predictions

predictions <- model %>% predict(test_images)

predictions[1, ]

##  [1] 3.382487e-06 1.677899e-06 1.413251e-06 6.706657e-08 4.638115e-06
##  [6] 8.495172e-03 3.923636e-05 3.834865e-02 2.688085e-05 9.530788e-01

which.max(predictions[1, ])

## [1] 10

class_pred <- model %>% predict_classes(test_images)
class_pred[1:20]

##  [1] 9 2 1 1 6 1 4 6 5 7 4 5 7 3 4 1 2 2 8 0

test_labels[1]

## [1] 9

par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) { 
  img <- test_images[i, , ]
  img <- t(apply(img, 2, rev)) 
  # subtract 1 as labels go from 0 to 9
  predicted_label <- which.max(predictions[i, ]) - 1
  true_label <- test_labels[i]
  if (predicted_label == true_label) {
    color <- '#008800' 
  } else {
    color <- '#bb0000'
  }
  image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
        main = paste0(class_names[predicted_label + 1], " (",
                      class_names[true_label + 1], ")"),
        col.main = color)
}

# Grab an image from the test dataset
# take care to keep the batch dimension, as this is expected by the model
img <- test_images[1, , , drop = FALSE]
dim(img)

## [1]  1 28 28

predictions <- model %>% predict(img)
predictions

##              [,1]         [,2]         [,3]         [,4]         [,5]
## [1,] 3.382481e-06 1.677896e-06 1.413248e-06 6.706631e-08 4.638115e-06
##             [,6]         [,7]       [,8]         [,9]     [,10]
## [1,] 0.008495172 3.923636e-05 0.03834865 2.688087e-05 0.9530788

# subtract 1 as labels are 0-based
prediction <- predictions[1, ] - 1
which.max(prediction)

## [1] 10

class_pred <- model %>% predict_classes(img)
class_pred

## [1] 9