knitr::opts_chunk$set(echo = TRUE)
options(scipen = 1)
rm(list = ls())library(data.table)
library(dplyr)
library(keras)
library(tidyr)
library(ggplot2)
#Untuk menjaga reproducibility model (untuk mendapatkan hasil yang sama harus dijalankan semua dari awal)
use_session_with_seed(seed = 42)
#Penggunaan environment tensorflow
keras::use_condaenv("tensorflow")Pada dokumentasi kali ini akan kita aplikasikan proses training neural network menggunakan library keras pada data fashion mnist. Data ini terdiri dari 10 jenis garmen yang berbeda. Tujuan dari proses training ini adalah untuk mengetahui performa neural network dalam menebak jenis-jenis garmen dari hasil training menggunakan keras. Akan diberikan 3 jenis model neural network : 1. Simple Machine Learning Neural Network (Tanpa hidden layer) 2. Deep learning menggunakan hidden layer dense 3. Deep learning menggunakan convolutioanl neural network layer
Berikut adalah proses penginputan data fashion mnist
fmnist_train <- fread("data_input/train.csv")
fmnist_test <- fread("data_input/test.csv")
dim(fmnist_train)## [1] 60000 785
dim(fmnist_test)## [1] 10000 785
head(fmnist_train[,1])## label
## 1: 2
## 2: 9
## 3: 6
## 4: 0
## 5: 3
## 6: 4
Data fashion mnist merupakan data gambar berukuran pixel 28x28. Di atas kita dapat melihat bahwa kolom 2 sampai 784 merupakan nilai tiap pixel, sedangkan kolom satu adalah label.
head(fmnist_train[,c(1:3,784,785)])## label pixel1 pixel2 pixel783 pixel784
## 1: 2 0 0 0 0
## 2: 9 0 0 0 0
## 3: 6 0 0 0 0
## 4: 0 0 0 0 0
## 5: 3 0 0 0 0
## 6: 4 0 0 0 0
range(fmnist_train[,1])## [1] 0 9
Label tersebut adalah nilai 0-9 dengan keterangan tiap nilai adalah :
class_names = c('T-shirt/top',
'Trouser',
'Pullover',
'Dress',
'Coat',
'Sandal',
'Shirt',
'Sneaker',
'Bag',
'Ankle boot')
class_names## [1] "T-shirt/top" "Trouser" "Pullover" "Dress" "Coat"
## [6] "Sandal" "Shirt" "Sneaker" "Bag" "Ankle boot"
Sedangkan data test akan berjumlah 10000 gambar fashion yang nantinya akan kita prediksi.
Lalu kita bisa melihat preview dari salah satu data tersebut dengan cara sebagai berikut :
rotate <- function(x) t(apply(x, 2, rev))
pict_no <- 5
image_1 <- as.data.frame(rotate(array(as.vector(t(fmnist_train[pict_no,-1])),dim = c(28,28,1))))
colnames(image_1) <- seq_len(ncol(image_1))
image_1$y <- seq_len(nrow(image_1))
image_1 <- gather(image_1, "x", "value", -y)
image_1$x <- as.integer(image_1$x)
ggplot(image_1, aes(x = x, y = y, fill = value)) +
geom_tile() +
scale_fill_gradient(low = "white", high = "black", na.value = NA) +
scale_y_reverse() +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(aspect.ratio = 1) +
xlab("") +
ylab("")Beberapa preprocessing data yang akan kita lakukan diantaranya adalah menscale nilai dalam matrix menjadi range 0-1 dengan membagi nilai di tiap pixel dengan 255
x_train <- data.matrix(fmnist_train[,-1])/255
x_test <- data.matrix(fmnist_test[,-1])/255
dim(x_train)## [1] 60000 784
dim(x_test)## [1] 10000 784
train_labels <- array(as.vector(t(fmnist_train[,1])))
test_labels <- array(as.vector(t(fmnist_test[,1])))
class(test_labels)## [1] "array"
dim(test_labels)## [1] 10000
Lalu kita bisa kembali tampilkan 25 data pertama pada data train dan test
par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) {
img <- matrix(x_train[i,], nrow=28, byrow=T)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
#img <- train_images[i, , ]
#img <- t(apply(img, 2, rev))
image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
main = paste(class_names[train_labels[i] + 1]))
}par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) {
img <- matrix(x_test[i,], nrow=28, byrow=T)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
#img <- train_images[i, , ]
#img <- t(apply(img, 2, rev))
image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
main = paste(class_names[test_labels[i] + 1]))
}Selanjutnya kita harus mengeset target label dalam bentuk one hot encoding
y_train <- to_categorical(fmnist_train[,1])
y_test <- to_categorical(fmnist_test[,1])
dim(y_train)## [1] 60000 10
dim(y_test)## [1] 10000 10
Dalam perbandingan model kali ini target hanya dikhususkan pada perbedaan model berdasarkan jumlah dan jenis layernya. Untuk parameter-parameter dalam model tidak akan banyak dirubah. Model akan menggunakan loss function berupa crossentropy dikarenakan model prediksi berupa klasifikasi. Lalu optimizer juga tidak akan dirubah, pada model ini optimizer yang digunakan adalah Adam.
Penentuan Layer :
model_test <- keras_model_sequential()
model_test %>%
layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%
# layer_dropout(rate = 0.2) %>%
# layer_dense(units = 128, activation = 'relu') %>%
# layer_dropout(rate = 0) %>%
layer_dense(units = 10, activation = 'softmax')
summary(model_test)## ___________________________________________________________________________
## Layer (type) Output Shape Param #
## ===========================================================================
## dense_1 (Dense) (None, 256) 200960
## ___________________________________________________________________________
## dense_2 (Dense) (None, 10) 2570
## ===========================================================================
## Total params: 203,530
## Trainable params: 203,530
## Non-trainable params: 0
## ___________________________________________________________________________
Parameter-parameter penentuan error model :
model_test %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_adam(),
metrics = c('accuracy')
)Proses training dengan mengambil history saat training:
history <- model_test %>% fit(
x_train, y_train,
epochs = 12,
batch_size = 128,
validation_split = 0.2
)scores_img_train <- model_test %>% evaluate(
x_train, y_train, verbose = 0
)
# Output metrics
cat('Train loss:', scores_img_train[[1]], '\n')## Train loss: 0.2376318
cat('Train accuracy:', scores_img_train[[2]], '\n')## Train accuracy: 0.91425
scores_img_test <- model_test %>% evaluate(
x_test, y_test, verbose = 0
)
# Output metrics
cat('Test loss:', scores_img_test[[1]], '\n')## Test loss: 0.2947126
cat('Test accuracy:', scores_img_test[[2]], '\n')## Test accuracy: 0.8917
Dari hasil di atas dapat kita lihat bahwa penmbentukan neural network tanpa hidden layer pun sudah cukup baik dalam menebak prediksi
Berikut adalah plot proses training di tiap epoch:
plot(1:12,history$metrics$acc,type="l",col="blue",ylim=c(0.8,1))
lines(history$metrics$val_acc, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))plot(1:12,history$metrics$loss,type="l",col="blue",ylim=c(0.2,0.7))
lines(history$metrics$val_loss, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))Lalu kita bisa mencari prediksi hasil dari model tersebut, dibawah akan ditampilkan 25 prediksi pertama
class_pred_img <- model_test %>% predict_classes(x_test)
class_pred_img[1:25]## [1] 0 1 2 2 4 6 8 3 5 0 3 4 4 6 8 5 6 3 6 4 4 4 2 1 5
test_labels[1:25]## [1] 0 1 2 2 3 2 8 6 5 0 3 4 4 6 8 5 6 3 6 4 4 4 2 1 5
Dan bisa ditampilkan dalam preview masing-masing gambar :
par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) {
img <- matrix(x_test[i,], nrow=28, byrow=T)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
predicted_label <- class_pred_img[i]
true_label <- test_labels[i]
if (predicted_label == true_label) {
color <- '#008800'
} else {
color <- '#bb0000'
}
image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
main = paste0(class_names[predicted_label + 1], " (",
class_names[true_label + 1], ")"),
col.main = color)
}Bagian yang memiliki tulisan merah mengindikasikan ketidakcocokkan antara prediksi dan data actual.
model_test2 <- keras_model_sequential()
model_test2 %>%
layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0) %>%
layer_dense(units = 10, activation = 'softmax')
summary(model_test2)## ___________________________________________________________________________
## Layer (type) Output Shape Param #
## ===========================================================================
## dense_3 (Dense) (None, 256) 200960
## ___________________________________________________________________________
## dropout_1 (Dropout) (None, 256) 0
## ___________________________________________________________________________
## dense_4 (Dense) (None, 128) 32896
## ___________________________________________________________________________
## dropout_2 (Dropout) (None, 128) 0
## ___________________________________________________________________________
## dense_5 (Dense) (None, 10) 1290
## ===========================================================================
## Total params: 235,146
## Trainable params: 235,146
## Non-trainable params: 0
## ___________________________________________________________________________
model_test2 %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_adam(),
metrics = c('accuracy')
)history2 <- model_test2 %>% fit(
x_train, y_train,
epochs = 12,
batch_size = 128,
validation_split = 0.2
)scores_img_train2 <- model_test2 %>% evaluate(
x_train, y_train, verbose = 0
)
# Output metrics
cat('Train loss:', scores_img_train2[[1]], '\n')## Train loss: 0.2494444
cat('Train accuracy:', scores_img_train2[[2]], '\n')## Train accuracy: 0.90705
scores_img_test2 <- model_test2 %>% evaluate(
x_test, y_test, verbose = 0
)
# Output metrics
cat('Test loss:', scores_img_test2[[1]], '\n')## Test loss: 0.313064
cat('Test accuracy:', scores_img_test2[[2]], '\n')## Test accuracy: 0.8861
Pada kasus ini penggunaan multiple layer sepertinya tidak banyak berpengaruh, bahkan akurasi pada data test pun menurun dari model sebelumnya.
plot(1:12,history2$metrics$acc,type="l",col="blue",ylim=c(0.8,1))
lines(history2$metrics$val_acc, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))plot(1:12,history2$metrics$loss,type="l",col="blue",ylim=c(0.2,0.7))
lines(history2$metrics$val_loss, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))class_pred_img2 <- model_test2 %>% predict_classes(x_test)
class_pred_img2[1:25]## [1] 6 1 2 2 4 6 8 2 5 0 3 4 6 6 8 5 6 3 6 4 4 4 2 1 5
test_labels[1:25]## [1] 0 1 2 2 3 2 8 6 5 0 3 4 4 6 8 5 6 3 6 4 4 4 2 1 5
par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) {
img <- matrix(x_test[i,], nrow=28, byrow=T)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
predicted_label <- class_pred_img2[i]
true_label <- test_labels[i]
if (predicted_label == true_label) {
color <- '#008800'
} else {
color <- '#bb0000'
}
image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
main = paste0(class_names[predicted_label + 1], " (",
class_names[true_label + 1], ")"),
col.main = color)
} Terlihat semakin memburuknya prediksi data test dari gambar di atas. Banyak tebakan yang berwarna merah menandakan model melakukan kesalahan prediksi. Dalam kasus ini 25 data test pertama sudah memiliki salah prediksi sebanyak 5.
Untuk membuat convolutional neural network, layer input yang masuk harus berbentuk matriks persegi, oleh karena itu ada beberapa tambahan preprocess data input. Setelah preprocess data tersebut ditampilkan untuk memastikan data tidak teracak.
x_traincnn <- array_reshape(x_train, c(nrow(x_train), 28, 28, 1))
x_testcnn <- array_reshape(x_test, c(nrow(x_test), 28, 28, 1))
for (i in 1:nrow(x_traincnn)) {
x_traincnn[i,,,] <- rotate(x_traincnn[i,,,])
}
for (i in 1:nrow(x_testcnn)) {
x_testcnn[i,,,] <- rotate(x_testcnn[i,,,])
}
dim(x_traincnn)## [1] 60000 28 28 1
dim(x_testcnn)## [1] 10000 28 28 1
image(x_traincnn[1,,,])image(x_testcnn[1,,,])model_test3 <- keras_model_sequential() %>%
layer_conv_2d(filters = 32, kernel_size = c(3,3), activation = 'relu',
input_shape = c(28,28,1)) %>%
layer_conv_2d(filters = 64, kernel_size = c(3,3), activation = 'relu') %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_dropout(rate = 0.25) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = 'relu') %>%
layer_dropout(rate = 0.5) %>%
layer_dense(units = 10, activation = 'softmax')
summary(model_test3)## ___________________________________________________________________________
## Layer (type) Output Shape Param #
## ===========================================================================
## conv2d_1 (Conv2D) (None, 26, 26, 32) 320
## ___________________________________________________________________________
## conv2d_2 (Conv2D) (None, 24, 24, 64) 18496
## ___________________________________________________________________________
## max_pooling2d_1 (MaxPooling2D) (None, 12, 12, 64) 0
## ___________________________________________________________________________
## dropout_3 (Dropout) (None, 12, 12, 64) 0
## ___________________________________________________________________________
## flatten_1 (Flatten) (None, 9216) 0
## ___________________________________________________________________________
## dense_6 (Dense) (None, 128) 1179776
## ___________________________________________________________________________
## dropout_4 (Dropout) (None, 128) 0
## ___________________________________________________________________________
## dense_7 (Dense) (None, 10) 1290
## ===========================================================================
## Total params: 1,199,882
## Trainable params: 1,199,882
## Non-trainable params: 0
## ___________________________________________________________________________
model_test3 %>% compile(
loss = 'categorical_crossentropy',
optimizer = optimizer_adam(),
metrics = c('accuracy')
)#Model tidak akan di run pada saat knit
history3 <- model_test3 %>% fit(
x_traincnn, y_train,
epochs = 12,
batch_size = 128,
validation_split = 0.2
)#Melakukan save model dan history training karena proses training yg cukup memakan waktu
save_model_hdf5(model_test3, "data_input/cnn_fmnist_lbb.h5")
saveRDS(history3,"data_input/cnn_history.rds")#Loading model dan history cnn
model_test3l <- load_model_hdf5("data_input/cnn_fmnist_lbb.h5")
history3l <- readRDS("data_input/cnn_history.rds")scores_img_train3 <- model_test3l %>% evaluate(
x_traincnn, y_train, verbose = 0
)
# Output metrics
cat('Train loss:', scores_img_train3[[1]], '\n')## Train loss: 0.1625951
cat('Train accuracy:', scores_img_train3[[2]], '\n')## Train accuracy: 0.94015
scores_img_test3 <- model_test3l %>% evaluate(
x_testcnn, y_test, verbose = 0
)
# Output metrics
cat('Test loss:', scores_img_test3[[1]], '\n')## Test loss: 0.2299302
cat('Test accuracy:', scores_img_test3[[2]], '\n')## Test accuracy: 0.9157
plot(1:12,history3l$metrics$acc,type="l",col="blue",ylim=c(0.8,1))
lines(history3l$metrics$val_acc, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))plot(1:12,history3l$metrics$loss,type="l",col="blue",ylim=c(0.2,0.7))
lines(history3l$metrics$val_loss, col="green")
legend("topright", c("train","val"), col=c("blue", "green"), lty=c(1,1))class_pred_img3 <- model_test3l %>% predict_classes(x_testcnn)
class_pred_img3[1:25]## [1] 0 1 2 2 3 6 8 6 5 0 3 2 4 6 8 5 6 3 6 4 4 4 2 1 5
test_labels[1:25]## [1] 0 1 2 2 3 2 8 6 5 0 3 4 4 6 8 5 6 3 6 4 4 4 2 1 5
par(mfcol=c(5,5))
par(mar=c(0, 0, 1.5, 0), xaxs='i', yaxs='i')
for (i in 1:25) {
img <- matrix(x_test[i,], nrow=28, byrow=T)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
img <- apply(img, 1, rev)
predicted_label <- class_pred_img3[i]
true_label <- test_labels[i]
if (predicted_label == true_label) {
color <- '#008800'
} else {
color <- '#bb0000'
}
image(1:28, 1:28, img, col = gray((0:255)/255), xaxt = 'n', yaxt = 'n',
main = paste0(class_names[predicted_label + 1], " (",
class_names[true_label + 1], ")"),
col.main = color)
} Terlihat perubahan nilai akurasi baik itu dalam data training ataupun test naik sebesar 0.94 dan 0.92. Plot prediksi data test di atas juga membuktikan perubahan yang terjadi. Kesalahan tebakan terbukti lebih sedikit, hanya 2 kesalahan pada 25 data test pertama