rm(list=ls(all=T))
options(digits=4, scipen=40)
library(dplyr)
library(keras) # 類似xgboost,randomforest的模型
Reading & Examining data …
mnist = dataset_mnist()
par(mfrow = c(6, 8), pty = "s", mar = c(0.5, 0.5, 0, 0))
for(p in 1:48) mnist$train$x[p,,] %>% as.raster(max=255) %>% plot
Reshape the data …
train_images = array_reshape(mnist$train$x, c(60000, 28 * 28))
train_images = train_images / 255 # normalization #做神經網路的時候,值要介於在0~1之間的灰階數字(255)跑起來才快
test_images = array_reshape(mnist$test$x, c(10000, 28 * 28))
test_images = normalization= test_images / 255 # normalization
train_labels = to_categorical(mnist$train$y) # 做一個類別模型(數字0~9的圖片分類)
test_labels = to_categorical(mnist$test$y)
mlp = keras_model_sequential() %>%
layer_dense(units = 512, # number of perceptron
activation = "relu", # activation function
input_shape = c(784) # dimensions of input tensor
) %>%
layer_dense(units = 10, # one output neuron per class # 最後一層你有多少類別就有多少顆
activation = "softmax" # activate the largest one
)
summary(mlp) # summary of the network spec
_______________________________________________________________________________________
Layer (type) Output Shape Param #
=======================================================================================
dense_13 (Dense) (None, 512) 401920
_______________________________________________________________________________________
dense_14 (Dense) (None, 10) 5130
=======================================================================================
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_______________________________________________________________________________________
No. Coefficients (Param #) in Each Layer
dense_1: (28 * 28 + 1) * 512 = 401,920dense_2: (512 + 1) * 10 = 5,130# 照抄(此為default的寫法)
mlp %>% compile( # specify
optimizer = "rmsprop", # optimizer
loss = "categorical_crossentropy", # loss function
metrics = c("accuracy") # accuracy metrice
)
fit1 = mlp %>% fit(
train_images, # train data
train_labels, # label of train data
epochs=10, # no. epochs # 做幾次訓練
batch_size=128, # no. input per mini-batch # batch_size大會跑比較快(但GPU就要更大), depends on 硬體大小
verbose=2 # 每個epoch打一個進度
)
Epoch 1/10
- 2s - loss: 0.2554 - acc: 0.9260
Epoch 2/10
- 2s - loss: 0.1022 - acc: 0.9695
Epoch 3/10
- 2s - loss: 0.0680 - acc: 0.9800
Epoch 4/10
- 2s - loss: 0.0488 - acc: 0.9852
Epoch 5/10
- 2s - loss: 0.0374 - acc: 0.9887
Epoch 6/10
- 2s - loss: 0.0284 - acc: 0.9911
Epoch 7/10
- 2s - loss: 0.0218 - acc: 0.9935
Epoch 8/10
- 2s - loss: 0.0172 - acc: 0.9948
Epoch 9/10
- 2s - loss: 0.0128 - acc: 0.9964
Epoch 10/10
- 2s - loss: 0.0102 - acc: 0.9970
plot(fit1) # 每多做一次(epoch),acc就會上升一點,loss就會下降一點
# Evaluation
mlp %>% evaluate(test_images, test_labels, verbose=2) # 等同於predict(這裡寫法不一樣) # verbose不打也沒關係
$loss
[1] 0.07241
$acc
[1] 0.982
# 0.98還不夠好不夠深(神經網路應該要100)
# Prediction - Classes
mlp %>% predict_classes(test_images[1:10,])
[1] 7 2 1 0 4 1 4 9 5 9
# Prediction Probability
mlp %>% predict_proba(test_images[1:10,]) %>% round(4)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 0.0000 0 0.0000 0.0000 0.0000 0.0000 1.0000 0 0.0000
[2,] 0 0.0000 1 0.0000 0.0000 0.0000 0.0000 0.0000 0 0.0000
[3,] 0 0.9995 0 0.0000 0.0000 0.0000 0.0000 0.0004 0 0.0000
[4,] 1 0.0000 0 0.0000 0.0000 0.0000 0.0000 0.0000 0 0.0000
[5,] 0 0.0000 0 0.0000 0.9991 0.0000 0.0000 0.0001 0 0.0008
[6,] 0 0.9979 0 0.0000 0.0000 0.0000 0.0000 0.0021 0 0.0000
[7,] 0 0.0000 0 0.0000 1.0000 0.0000 0.0000 0.0000 0 0.0000
[8,] 0 0.0000 0 0.0004 0.0000 0.0000 0.0000 0.0000 0 0.9996
[9,] 0 0.0000 0 0.0000 0.0000 0.9995 0.0005 0.0000 0 0.0000
[10,] 0 0.0000 0 0.0000 0.0000 0.0000 0.0000 0.0087 0 0.9913
# mnist <- dataset_mnist()
# c(c(train_images, train_labels), c(test_images, test_labels)) %<-% mnist
# train_images <- array_reshape(train_images, c(60000, 28, 28, 1))
# train_images <- train_images / 255
# test_images <- array_reshape(test_images, c(10000, 28, 28, 1))
# test_images <- test_images / 255
# train_labels <- to_categorical(train_labels)
# test_labels <- to_categorical(test_labels)
train_images = array_reshape(mnist$train$x, c(60000, 28, 28, 1))
train_images = train_images / 255 # normalization
test_images = array_reshape(mnist$test$x, c(10000, 28, 28, 1))
test_images = normalization= test_images / 255 # normalization
train_labels = to_categorical(mnist$train$y)
test_labels = to_categorical(mnist$test$y)
cnn <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu",
input_shape = c(28, 28, 1)) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") %>%
layer_flatten() %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 10, activation = "softmax")
summary(cnn)
_______________________________________________________________________________________
Layer (type) Output Shape Param #
=======================================================================================
conv2d_10 (Conv2D) (None, 26, 26, 32) 320
_______________________________________________________________________________________
max_pooling2d_7 (MaxPooling2D) (None, 13, 13, 32) 0
_______________________________________________________________________________________
conv2d_11 (Conv2D) (None, 11, 11, 64) 18496
_______________________________________________________________________________________
max_pooling2d_8 (MaxPooling2D) (None, 5, 5, 64) 0
_______________________________________________________________________________________
conv2d_12 (Conv2D) (None, 3, 3, 64) 36928
_______________________________________________________________________________________
flatten_4 (Flatten) (None, 576) 0
_______________________________________________________________________________________
dense_15 (Dense) (None, 64) 36928
_______________________________________________________________________________________
dense_16 (Dense) (None, 10) 650
=======================================================================================
Total params: 93,322
Trainable params: 93,322
Non-trainable params: 0
_______________________________________________________________________________________
No. Coefficients (Param #) in Each Layer
conv2d_1: (3 * 3 * 1 + 1) * 32 = 320conv2d_2: (3 * 3 * 32 + 1) * 64 = 18,496conv2d_3: (3 * 3 * 64 + 1) * 64 = 36,928dense_3: (576 + 1) * 64 = 36,928dense_4: (64 + 1) * 10 = 650cnn %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy"))
# 這個階段才真正在做training
fit2 = cnn %>% fit(
train_images, train_labels,
epochs = 5, # 5 epochs # 用少一點的epoch來示範
batch_size=64, # 64 samples per mini-batch
verbose = 2
)
Epoch 1/5
- 8s - loss: 0.1672 - acc: 0.9474
Epoch 2/5
- 7s - loss: 0.0461 - acc: 0.9858
Epoch 3/5
- 7s - loss: 0.0327 - acc: 0.9903
Epoch 4/5
- 7s - loss: 0.0252 - acc: 0.9923
Epoch 5/5
- 7s - loss: 0.0196 - acc: 0.9943
plot(fit2)
cnn %>% evaluate(test_images, test_labels, verbose=2) # 從98提高到99
$loss
[1] 0.02187
$acc
[1] 0.9929