1 Libraries and Setup

set.seed(100)
library(keras)
use_condaenv("r-tensorflow")
use_session_with_seed(100)
library(dplyr)
library(caret)
library(plotly)
# test library
model <- keras_model_sequential()

2 Import Data

train <- read.csv("fashionmnist/train.csv")
test <- read.csv("fashionmnist/test.csv")

3 Inspect Data

# mengecek jumlah kolom (variabel)
ncol(train)
## [1] 785
# mengecek nama kolom ke 1-5 dan ke 782-785
colnames(train)[c(1:5, 782:785)]
## [1] "label"    "pixel1"   "pixel2"   "pixel3"   "pixel4"   "pixel781"
## [7] "pixel782" "pixel783" "pixel784"
# mengecek dimensi data (jumlah baris dan kolom)
dim(train)
## [1] 60000   785
# mengecek interval nilai data train
range(train)
## [1]   0 255
# mengecek proporsi setiap levels pada target
prop.table(table(train$label))
## 
##   0   1   2   3   4   5   6   7   8   9 
## 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1

4 Cross Validation

Data yang disediakan sudah dalam bentuk training set dan testing set (sudah dilakukan cross validation)

5 Data Preprocess

5.1 Prepare Train

train_x <- train %>% 
  select(-label) %>% 
  data.matrix()

train_x_keras <- array_reshape(train_x, dim = dim(train_x))/255

train_y <- train %>% 
  select(label) %>% 
  data.matrix()

train_y_keras <- to_categorical(train_y, num_classes = 10)

5.2 Prepare Test

test_x <- test %>% 
  select(-label) %>% 
  data.matrix()

test_x_keras <- array_reshape(test_x, dim = dim(test_x))/255

test_y <- test %>% 
  select(label) %>% 
  data.matrix()

test_y_keras <- to_categorical(train_y, num_classes = 10)

6 Building Architecture

# set-up reproducible weight initializer
initializer <- initializer_random_uniform(minval = -0.5, maxval = 0.5, seed = 100)

# menunjukkan bahwa objek model berupa sequence (membuat tempat arsitektur)
model <- keras_model_sequential()

# membuat arsitektur
model %>% 
  layer_dense(input_shape = 784, units = 256, activation = "relu", kernel_initializer = initializer, bias_initializer = initializer) %>%
  layer_dense(units = 128, activation = "relu", kernel_initializer = initializer, bias_initializer = initializer) %>% 
  layer_dense(units = 68, activation = "relu", kernel_initializer = initializer, bias_initializer = initializer) %>% 
  layer_dense(units = 10, activation = "softmax", kernel_initializer = initializer, bias_initializer = initializer) %>% 
  compile(loss = "categorical_crossentropy",
          optimizer = optimizer_adam(),
          metrics = "accuracy")

summary(model)
## ___________________________________________________________________________
## Layer (type)                     Output Shape                  Param #     
## ===========================================================================
## dense (Dense)                    (None, 256)                   200960      
## ___________________________________________________________________________
## dense_1 (Dense)                  (None, 128)                   32896       
## ___________________________________________________________________________
## dense_2 (Dense)                  (None, 68)                    8772        
## ___________________________________________________________________________
## dense_3 (Dense)                  (None, 10)                    690         
## ===========================================================================
## Total params: 243,318
## Trainable params: 243,318
## Non-trainable params: 0
## ___________________________________________________________________________

7 Modeling on Train Data

history <- model %>% 
  fit(train_x_keras, train_y_keras,
      epoch = 20,
      batch_size = 128,shuffle=F)
ggplotly(plot(history))

8 Predicting on Test Data

prediction <- model %>% 
  predict_classes(test_x_keras)

prediction[1:5]
## [1] 0 1 2 2 3

9 Evaluating

confusionMatrix(as.factor(prediction), as.factor(test_y))
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1   2   3   4   5   6   7   8   9
##          0 834   2  12  19   1   0 139   0   7   0
##          1   0 979   1  19   3   1   2   0   1   0
##          2  13   3 711   5  39   1  54   0   6   0
##          3  50  12  16 916  35   1  40   0   5   0
##          4   6   0 160  24 866   0  84   0   5   0
##          5   1   1   0   0   0 901   0   6   1   3
##          6  83   2  92  15  54   0 667   1   9   0
##          7   0   0   3   0   0  58   0 939   3  30
##          8  12   1   5   1   2   8  14   1 960   1
##          9   1   0   0   1   0  30   0  53   3 966
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8739          
##                  95% CI : (0.8672, 0.8803)
##     No Information Rate : 0.1             
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.8599          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
## Sensitivity            0.8340   0.9790   0.7110   0.9160   0.8660   0.9010
## Specificity            0.9800   0.9970   0.9866   0.9823   0.9690   0.9987
## Pos Pred Value         0.8225   0.9732   0.8546   0.8521   0.7563   0.9869
## Neg Pred Value         0.9815   0.9977   0.9685   0.9906   0.9849   0.9891
## Prevalence             0.1000   0.1000   0.1000   0.1000   0.1000   0.1000
## Detection Rate         0.0834   0.0979   0.0711   0.0916   0.0866   0.0901
## Detection Prevalence   0.1014   0.1006   0.0832   0.1075   0.1145   0.0913
## Balanced Accuracy      0.9070   0.9880   0.8488   0.9492   0.9175   0.9498
##                      Class: 6 Class: 7 Class: 8 Class: 9
## Sensitivity            0.6670   0.9390   0.9600   0.9660
## Specificity            0.9716   0.9896   0.9950   0.9902
## Pos Pred Value         0.7226   0.9090   0.9552   0.9165
## Neg Pred Value         0.9633   0.9932   0.9956   0.9962
## Prevalence             0.1000   0.1000   0.1000   0.1000
## Detection Rate         0.0667   0.0939   0.0960   0.0966
## Detection Prevalence   0.0923   0.1033   0.1005   0.1054
## Balanced Accuracy      0.8193   0.9643   0.9775   0.9781

10 Summary

Setelah melakukan beberapa percobaan variasi jumlah layer (1 sampai dengan 4 layer), units (mulai dari 512 kebawah), dan optimizer (sgd, rmsprop, adam, adamax, adadelta), nilai accuracy yang tertinggi adalah dengan 3 layer, dengan units 256 pada layer pertama, 128 pada layer kedua, dan 64 pada layer ketiga, serta optimizer menggunakan optimizer adam.