INTRO

Data terdiri dari gambar MRI. Data memiliki empat kelas diantaranya Mild Demented, Moderate Demented, Non Demented dan Very Mild Demented. LBB ini untuk membuat model yang akurat memprediksi tahap Alzheimers.

DATA

folder_list <- list.files("data_lbb/train/")
folder_path <- paste0("data_lbb/train/", folder_list, "/")
folder_path
## [1] "data_lbb/train/MildDemented/"     "data_lbb/train/ModerateDemented/"
## [3] "data_lbb/train/NonDemented/"      "data_lbb/train/VeryMildDemented/"
# Get file name
file_name <- map(folder_path, 
                 function(x) paste0(x, list.files(x))
                 ) %>% 
  unlist()

# first 6 file name
head(file_name)
## [1] "data_lbb/train/MildDemented/mildDem0.jpg"  
## [2] "data_lbb/train/MildDemented/mildDem1.jpg"  
## [3] "data_lbb/train/MildDemented/mildDem10.jpg" 
## [4] "data_lbb/train/MildDemented/mildDem100.jpg"
## [5] "data_lbb/train/MildDemented/mildDem101.jpg"
## [6] "data_lbb/train/MildDemented/mildDem102.jpg"
tail(file_name)
## [1] "data_lbb/train/VeryMildDemented/verymildDem994.jpg"
## [2] "data_lbb/train/VeryMildDemented/verymildDem995.jpg"
## [3] "data_lbb/train/VeryMildDemented/verymildDem996.jpg"
## [4] "data_lbb/train/VeryMildDemented/verymildDem997.jpg"
## [5] "data_lbb/train/VeryMildDemented/verymildDem998.jpg"
## [6] "data_lbb/train/VeryMildDemented/verymildDem999.jpg"

EXPLORATORY DATA ANALYSIS

length(file_name)
## [1] 5121
# Randomly select image
set.seed(99)
sample_image <- sample(file_name, 6)

# Load image into R
img <- map(sample_image, load.image)

# Plot image
par(mfrow = c(2, 3)) # Create 2 x 3 image grid
map(img, plot)

## [[1]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1 
## 
## [[2]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1 
## 
## [[3]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1 
## 
## [[4]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1 
## 
## [[5]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1 
## 
## [[6]]
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1
# Full Image Description
img <- load.image(file_name[1])
img
## Image. Width: 176 pix Height: 208 pix Depth: 1 Colour channels: 1
dim(img)
## [1] 176 208   1   1
# Function for acquiring width and height of an image
get_dim <- function(x){
  img <- load.image(x) 
  
  df_img <- data.frame(height = height(img),
                       width = width(img),
                       filename = x
                       )
  
  return(df_img)
}

get_dim(file_name[1])
##   height width                                 filename
## 1    208   176 data_lbb/train/MildDemented/mildDem0.jpg
# Randomly get 1000 sample images
set.seed(123)
sample_file <- sample(file_name, 1000)

# Run the get_dim() function for each image
file_dim <- map_df(sample_file, get_dim)

head(file_dim, 10)
##    height width                                            filename
## 1     208   176           data_lbb/train/NonDemented/nonDem2521.jpg
## 2     208   176            data_lbb/train/NonDemented/nonDem261.jpg
## 3     208   176           data_lbb/train/NonDemented/nonDem2309.jpg
## 4     208   176          data_lbb/train/MildDemented/mildDem571.jpg
## 5     208   176  data_lbb/train/VeryMildDemented/verymildDem250.jpg
## 6     208   176             data_lbb/train/NonDemented/nonDem69.jpg
## 7     208   176           data_lbb/train/NonDemented/nonDem1963.jpg
## 8     208   176           data_lbb/train/NonDemented/nonDem1332.jpg
## 9     208   176 data_lbb/train/VeryMildDemented/verymildDem1034.jpg
## 10    208   176 data_lbb/train/VeryMildDemented/verymildDem1101.jpg
summary(file_dim)
##      height        width       filename        
##  Min.   :208   Min.   :176   Length:1000       
##  1st Qu.:208   1st Qu.:176   Class :character  
##  Median :208   Median :176   Mode  :character  
##  Mean   :208   Mean   :176                     
##  3rd Qu.:208   3rd Qu.:176                     
##  Max.   :208   Max.   :176

DATA PREPROCESSING

# Desired height and width of images
target_size <- c(176, 208)

# Batch size for training the model
batch_size <- 8
train_data_gen <- image_data_generator(rescale = 1/255, # Scaling pixel value
                                       horizontal_flip = T, # Flip image horizontally
                                       vertical_flip = T, # Flip image vertically 
                                       rotation_range = 45, # Rotate image from 0 to 45 degrees
                                       zoom_range = 0.25, # Zoom in or zoom out range
                                       validation_split = 0.2 # 20% data as validation data
                                       )
## Loaded Tensorflow version 2.10.0
# Training Dataset
train_image_array_gen <- flow_images_from_directory(directory = "data_lbb/train/", # Folder of the data
                                                    target_size = target_size, # target of the image dimension (64 x 64)  
                                                    color_mode = "rgb", # use RGB color
                                                    batch_size = batch_size , 
                                                    seed = 123,  # set random seed
                                                    subset = "training", # declare that this is for training data
                                                    generator = train_data_gen
                                                    )

# Validation Dataset
val_image_array_gen <- flow_images_from_directory(directory = "data_lbb/train/",
                                                  target_size = target_size, 
                                                  color_mode = "rgb", 
                                                  batch_size = batch_size ,
                                                  seed = 123,
                                                  subset = "validation", # declare that this is the validation data
                                                  generator = train_data_gen
                                                  )
# Number of training samples
train_samples <- train_image_array_gen$n

# Number of validation samples
valid_samples <- val_image_array_gen$n

# Number of target classes/categories
output_n <- n_distinct(train_image_array_gen$classes)

# Get the class proportion
table("\nFrequency" = factor(train_image_array_gen$classes)
      ) %>% 
  prop.table()
## 
## Frequency
##         0         1         2         3 
## 0.1400683 0.0102489 0.4997560 0.3499268

VGG-19

MODEL ARCHITECTURE

library(tensorflow)
## Warning: package 'tensorflow' was built under R version 4.2.2
## 
## Attaching package: 'tensorflow'
## The following object is masked from 'package:caret':
## 
##     train
vgg <- tf$keras$applications$VGG19(input_shape = as.integer(c(176,208,3)), weights = "imagenet", include_top = FALSE)

for (layer in vgg$layers) {
  layer$trainable <- FALSE
}

x <- tf$keras$layers$Flatten()(vgg$output)
prediction <- tf$keras$layers$Dense(units = 4, activation = "softmax")(x)

modelvgg <- tf$keras$Model(inputs = vgg$input, outputs = prediction)
modelvgg$summary()

MODEL FITTING

modelvgg %>% compile(optimizer = "adam",
                     loss = tf$losses$CategoricalCrossentropy(),
                     metrics = list(tf$keras$metrics$AUC(name = "auc")))
callback <- tf$keras$callbacks$EarlyStopping(monitor = "val_loss",
                                           patience = 8,
                                           restore_best_weights = TRUE)
modelvgg %>% fit(train_image_array_gen, epochs=10 , validation_data=val_image_array_gen, callbacks=callback) 
modelvgg %>% evaluate(val_image_array_gen)
##     loss      auc 
## 1.312332 0.776450

MODEL EVALUATION

val_data <- data.frame(file_name = paste0("data_lbb/train/", val_image_array_gen$filenames)) %>% 
  mutate(class = str_extract(file_name, "MildDemented|ModerateDemented|NonDemented|VeryMildDemented"))

head(val_data, 10)
##                                      file_name        class
## 1    data_lbb/train/MildDemented\\mildDem0.jpg MildDemented
## 2    data_lbb/train/MildDemented\\mildDem1.jpg MildDemented
## 3   data_lbb/train/MildDemented\\mildDem10.jpg MildDemented
## 4  data_lbb/train/MildDemented\\mildDem100.jpg MildDemented
## 5  data_lbb/train/MildDemented\\mildDem101.jpg MildDemented
## 6  data_lbb/train/MildDemented\\mildDem102.jpg MildDemented
## 7  data_lbb/train/MildDemented\\mildDem103.jpg MildDemented
## 8  data_lbb/train/MildDemented\\mildDem104.jpg MildDemented
## 9  data_lbb/train/MildDemented\\mildDem105.jpg MildDemented
## 10 data_lbb/train/MildDemented\\mildDem106.jpg MildDemented
tail(val_data, 10)
##                                                 file_name            class
## 1014 data_lbb/train/VeryMildDemented\\verymildDem1310.jpg VeryMildDemented
## 1015 data_lbb/train/VeryMildDemented\\verymildDem1311.jpg VeryMildDemented
## 1016 data_lbb/train/VeryMildDemented\\verymildDem1312.jpg VeryMildDemented
## 1017 data_lbb/train/VeryMildDemented\\verymildDem1313.jpg VeryMildDemented
## 1018 data_lbb/train/VeryMildDemented\\verymildDem1314.jpg VeryMildDemented
## 1019 data_lbb/train/VeryMildDemented\\verymildDem1315.jpg VeryMildDemented
## 1020 data_lbb/train/VeryMildDemented\\verymildDem1316.jpg VeryMildDemented
## 1021 data_lbb/train/VeryMildDemented\\verymildDem1317.jpg VeryMildDemented
## 1022 data_lbb/train/VeryMildDemented\\verymildDem1318.jpg VeryMildDemented
## 1023 data_lbb/train/VeryMildDemented\\verymildDem1319.jpg VeryMildDemented
# Function to convert image to array
image_prep <- function(x) {
  arrays <- lapply(x, function(path) {
    img <- image_load(path, target_size = target_size, 
                      grayscale = F # Set FALSE if image is RGB
                      )
    
    x <- image_to_array(img)
    x <- array_reshape(x, c(1, dim(x)))
    x <- x/255 # rescale image pixel
  })
  do.call(abind::abind, c(arrays, list(along = 1)))
}
test_x <- image_prep(val_data$file_name)

# Check dimension of testing data set
dim(test_x)
## [1] 1023  176  208    3
pred_test_vgg <-  predict(modelvgg,test_x) %>% 
  k_argmax() %>% # untuk mengambil nilai probability paling besar
  as.array() %>% 
  as.factor() 

head(pred_test_vgg, 10)
##  [1] 0 0 0 3 3 3 2 3 3 3
## Levels: 0 2 3
tail(pred_test_vgg, 10)
##  [1] 2 3 2 3 3 2 2 2 2 2
## Levels: 0 2 3
decode <- function(x){
  case_when(x == 0 ~ "MildDemented",
            x == 1 ~ "ModerateDemented",
            x == 2 ~ "NonDemented",
            x == 3 ~ "VeryMildDemented"
            )
}

pred_test_vgg <- sapply(pred_test_vgg, decode) 
confusionMatrix(as.factor(pred_test_vgg), 
                as.factor(val_data$class)
                )
## Warning in levels(reference) != levels(data): longer object length is not a
## multiple of shorter object length
## Warning in confusionMatrix.default(as.factor(pred_test_vgg),
## as.factor(val_data$class)): Levels are not in the same order for reference and
## data. Refactoring data to match.
## Confusion Matrix and Statistics
## 
##                   Reference
## Prediction         MildDemented ModerateDemented NonDemented VeryMildDemented
##   MildDemented               23                0          18               28
##   ModerateDemented            0                0           0                0
##   NonDemented                32                4         370              209
##   VeryMildDemented           88                6         124              121
## 
## Overall Statistics
##                                           
##                Accuracy : 0.5024          
##                  95% CI : (0.4713, 0.5335)
##     No Information Rate : 0.5005          
##     P-Value [Acc > NIR] : 0.4626          
##                                           
##                   Kappa : 0.1328          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: MildDemented Class: ModerateDemented
## Sensitivity                      0.16084                0.000000
## Specificity                      0.94773                1.000000
## Pos Pred Value                   0.33333                     NaN
## Neg Pred Value                   0.87421                0.990225
## Prevalence                       0.13978                0.009775
## Detection Rate                   0.02248                0.000000
## Detection Prevalence             0.06745                0.000000
## Balanced Accuracy                0.55428                0.500000
##                      Class: NonDemented Class: VeryMildDemented
## Sensitivity                      0.7227                  0.3380
## Specificity                      0.5205                  0.6722
## Pos Pred Value                   0.6016                  0.3569
## Neg Pred Value                   0.6520                  0.6535
## Prevalence                       0.5005                  0.3500
## Detection Rate                   0.3617                  0.1183
## Detection Prevalence             0.6012                  0.3314
## Balanced Accuracy                0.6216                  0.5051

Conclusion

Model VGG-19 yang telah dibuat tidak cukup akurat untuk memprediksi tingkat-tingkat Alzheimer secara akurat.