rm(list=ls(all=T))
options(digits=4, scipen=40)
library(dplyr)
library(keras)
Reading & Examining data …
mnist = dataset_mnist()
par(mfrow = c(6, 8), pty = "s", mar = c(0.5, 0.5, 0, 0))
for(p in 1:48) mnist$train$x[p,,] %>% as.raster(max=255) %>% plot
Reshape the data …
train_images = array_reshape(mnist$train$x, c(60000, 28 * 28))
train_images = train_images / 255 # normalization
test_images = array_reshape(mnist$test$x, c(10000, 28 * 28))
test_images = normalization= test_images / 255 # normalization
train_labels = to_categorical(mnist$train$y)
test_labels = to_categorical(mnist$test$y)
mlp = keras_model_sequential() %>%
layer_dense(units = 512, # number of perceptron
activation = "relu", # activation function
input_shape = c(784) # dimensions of input tensor
) %>%
layer_dense(units = 10, # one output neuron per class
activation = "softmax" # activate the largest one
)
summary(mlp) # summary of the network spec
_____________________________________________________________________________________________________________________
Layer (type) Output Shape Param #
=====================================================================================================================
dense_1 (Dense) (None, 512) 401920
_____________________________________________________________________________________________________________________
dense_2 (Dense) (None, 10) 5130
=====================================================================================================================
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_____________________________________________________________________________________________________________________
No. Coefficients (Param #) in Each Layer
dense_1: (28 * 28 + 1) * 512 = 401,920dense_2: (512 + 1) * 10 = 5,130mlp %>% compile( # specify
optimizer = "rmsprop", # optimizer
loss = "categorical_crossentropy", # loss function
metrics = c("accuracy") # accuracy metrice
)
fit1 = mlp %>% fit(
train_images, # train data
train_labels, # label of train data
epochs=10, # no. epochs
batch_size=128, # no. input per mini-batch
verbose=2
)
2018-09-26 10:11:37.886758: I tensorflow/core/platform/cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2018-09-26 10:11:37.975990: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:898] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2018-09-26 10:11:37.976425: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1356] Found device 0 with properties:
name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235
pciBusID: 0000:00:04.0
totalMemory: 11.17GiB freeMemory: 11.09GiB
2018-09-26 10:11:37.976474: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1435] Adding visible gpu devices: 0
2018-09-26 10:11:38.263229: I tensorflow/core/common_runtime/gpu/gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-09-26 10:11:38.263297: I tensorflow/core/common_runtime/gpu/gpu_device.cc:929] 0
2018-09-26 10:11:38.263309: I tensorflow/core/common_runtime/gpu/gpu_device.cc:942] 0: N
2018-09-26 10:11:38.263683: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10750 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)
Epoch 1/10
- 3s - loss: 0.2601 - acc: 0.9245
Epoch 2/10
- 2s - loss: 0.1057 - acc: 0.9687
Epoch 3/10
- 2s - loss: 0.0690 - acc: 0.9798
Epoch 4/10
- 2s - loss: 0.0509 - acc: 0.9842
Epoch 5/10
- 2s - loss: 0.0381 - acc: 0.9885
Epoch 6/10
- 2s - loss: 0.0291 - acc: 0.9914
Epoch 7/10
- 2s - loss: 0.0220 - acc: 0.9937
Epoch 8/10
- 2s - loss: 0.0173 - acc: 0.9949
Epoch 9/10
- 2s - loss: 0.0136 - acc: 0.9960
Epoch 10/10
- 2s - loss: 0.0101 - acc: 0.9971
plot(fit1)
# Evaluation
mlp %>% evaluate(test_images, test_labels, verbose=2)
$loss
[1] 0.07375
$acc
[1] 0.9811
# Prediction - Classes
mlp %>% predict_classes(test_images[1:10,])
[1] 7 2 1 0 4 1 4 9 5 9
# Prediction Probability
mlp %>% predict_proba(test_images[1:10,]) %>% round(4)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0.0000 0 0.0000 0.0000 0.0000 0.0000 1.0000 0 0.0000
[2,] 0 0.0000 1 0.0000 0.0000 0.0000 0.0000 0.0000 0 0.0000
[3,] 0 0.9999 0 0.0000 0.0000 0.0000 0.0000 0.0000 0 0.0000
[4,] 1 0.0000 0 0.0000 0.0000 0.0000 0.0000 0.0000 0 0.0000
[5,] 0 0.0000 0 0.0000 0.9999 0.0000 0.0000 0.0000 0 0.0001
[6,] 0 0.9997 0 0.0000 0.0000 0.0000 0.0000 0.0003 0 0.0000
[7,] 0 0.0000 0 0.0000 1.0000 0.0000 0.0000 0.0000 0 0.0000
[8,] 0 0.0000 0 0.0008 0.0000 0.0000 0.0000 0.0000 0 0.9992
[9,] 0 0.0000 0 0.0000 0.0000 0.9874 0.0126 0.0000 0 0.0000
[10,] 0 0.0000 0 0.0000 0.0006 0.0000 0.0000 0.0000 0 0.9993
# mnist <- dataset_mnist()
# c(c(train_images, train_labels), c(test_images, test_labels)) %<-% mnist
# train_images <- array_reshape(train_images, c(60000, 28, 28, 1))
# train_images <- train_images / 255
# test_images <- array_reshape(test_images, c(10000, 28, 28, 1))
# test_images <- test_images / 255
# train_labels <- to_categorical(train_labels)
# test_labels <- to_categorical(test_labels)
train_images = array_reshape(mnist$train$x, c(60000, 28, 28, 1))
train_images = train_images / 255 # normalization
test_images = array_reshape(mnist$test$x, c(10000, 28, 28, 1))
test_images = normalization= test_images / 255 # normalization
train_labels = to_categorical(mnist$train$y)
test_labels = to_categorical(mnist$test$y)
cnn <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_flatten() %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 10, activation = "softmax")
summary(cnn)
_____________________________________________________________________________________________________________________
Layer (type) Output Shape Param #
=====================================================================================================================
conv2d_1 (Conv2D) (None, 26, 26, 32) 320
_____________________________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D) (None, 13, 13, 32) 0
_____________________________________________________________________________________________________________________
conv2d_2 (Conv2D) (None, 11, 11, 64) 18496
_____________________________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D) (None, 5, 5, 64) 0
_____________________________________________________________________________________________________________________
conv2d_3 (Conv2D) (None, 3, 3, 64) 36928
_____________________________________________________________________________________________________________________
flatten_1 (Flatten) (None, 576) 0
_____________________________________________________________________________________________________________________
dense_3 (Dense) (None, 64) 36928
_____________________________________________________________________________________________________________________
dense_4 (Dense) (None, 10) 650
=====================================================================================================================
Total params: 93,322
Trainable params: 93,322
Non-trainable params: 0
_____________________________________________________________________________________________________________________
No. Coefficients (Param #) in Each Layer
conv2d_1: (3 * 3 * 1 + 1) * 32 = 320conv2d_2: (3 * 3 * 32 + 1) * 64 = 18,496conv2d_3: (3 * 3 * 64 + 1) * 64 = 36,928dense_3: (576 + 1) * 64 = 36,928dense_4: (64 + 1) * 10 = 650cnn %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy"))
fit2 = cnn %>% fit(
train_images, train_labels,
epochs = 5, # 5 epochs
batch_size=64, # 64 samples per mini-batch
verbose = 2
)
Epoch 1/5
- 8s - loss: 0.1718 - acc: 0.9467
Epoch 2/5
- 8s - loss: 0.0480 - acc: 0.9855
Epoch 3/5
- 8s - loss: 0.0336 - acc: 0.9895
Epoch 4/5
- 8s - loss: 0.0246 - acc: 0.9926
Epoch 5/5
- 8s - loss: 0.0200 - acc: 0.9937
plot(fit2)
cnn %>% evaluate(test_images, test_labels, verbose=2)
$loss
[1] 0.0297
$acc
[1] 0.9915