setwd("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection")
library(magick)
## Linking to ImageMagick 6.9.9.14
## Enabled features: cairo, freetype, fftw, ghostscript, lcms, pango, rsvg, webp
## Disabled features: fontconfig, x11
library(stringi)
library(keras)
Create folders for image deep learning training classification.
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/train"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\train' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/normal")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/train/normal"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\train\normal' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/lung_op")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/train/lung_op"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\train\lung_op' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/validation"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\validation' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation/normal")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/validation/normal"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\validation\normal' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation/lung_op")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/validation/lung_op"): 'G:
## \DataScienceProject\Kaggle_RSNA_Pneumonia_Detection\validation\lung_op'
## already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/test"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\test' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/normal")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/test/normal"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\test\normal' already exists
dir.create("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/lung_op")
## Warning in dir.create("G:/DataScienceProject/
## Kaggle_RSNA_Pneumonia_Detection/test/lung_op"): 'G:\DataScienceProject
## \Kaggle_RSNA_Pneumonia_Detection\test\lung_op' already exists
Loading CSV for both classification & later croping.
RSNA_train_detailed <- read.csv(file = "stage_1_detailed_class_info.csv", header = FALSE, sep = ",", skip = 1)
colnames(RSNA_train_detailed) <- c("imgID", "class")
RSNA_train_labeled <- read.csv(file = "stage_1_train_labels.csv", header = FALSE, sep = ",", skip = 1)
colnames(RSNA_train_labeled) <- c("patientId", "x", "y", "width", "height", "target")
RSNA_train_lung_op <- na.omit(RSNA_train_labeled)
Feaguring out how to crop the not noraml & normal cases.
AvgMinXCrop <- floor(mean(RSNA_train_lung_op$x) - sd(RSNA_train_lung_op$x))
AvgMinYCrop <- floor(mean(RSNA_train_lung_op$y) - sd(RSNA_train_lung_op$y))
WidthCrop <- floor(mean(RSNA_train_lung_op$width) + sd(RSNA_train_lung_op$width))
HeightCrop <- floor(mean(RSNA_train_lung_op$height) + sd(RSNA_train_lung_op$height))
Calculating location & croping size.
i <- 1
for (i in 1:length(RSNA_train_detailed$class)){
if (RSNA_train_labeled$target[i] == 0){
RSNA_train_labeled$x <- AvgMinXCrop
RSNA_train_labeled$y <- AvgMinYCrop
RSNA_train_labeled$width <- WidthCrop
RSNA_train_labeled$height <- HeightCrop
} else if(RSNA_train_detailed$class[i] == 'Lung Opacity'){
}
i <- i + 1
}
Converting train set into jpeg files & croping according to averages.
i <- 1
for (i in 1:length(RSNA_train_labeled$patientId)) {
OrigConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_train_images/", RSNA_train_labeled$patientId[i], ".dcm", "")
img <- image_read(OrigConvFile)
crop <- paste0(RSNA_train_labeled$width[i], "x", RSNA_train_labeled$height[i], "+", RSNA_train_labeled$x[i], "+", RSNA_train_labeled$y[i], "")
img <- image_crop(img, crop)
img <- image_scale(img, "96x96")
img1 <- image_colorize(img, 20, "red")
img2 <- image_colorize(img, 20, "green")
img3 <- c(img1, img2)
img <- image_append(image_scale(img3, "x200"))
if(RSNA_train_labeled$target[i] == 1) {
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/lung_op/", RSNA_train_labeled$patientId[i], ".jpeg", "")
} else if(RSNA_train_labeled$target[i] == 0){
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/normal/", RSNA_train_labeled$patientId[i], ".jpeg", "")
}
image_write(image_convert(img, "jpeg"), path = NewConvFile, format = "jpeg")
i <- i + 1
}
Handling cross validation for deep learning.
Sampling train datasets 5% per each cross validation.
vlidation_normal <- head(RSNA_train_labeled[which(RSNA_train_labeled$target == 0),], 1000)
vlidation_lung_op <- head(RSNA_train_labeled[which(RSNA_train_labeled$target == 1),], 500)
validation <- rbind(vlidation_normal, vlidation_lung_op)
RSNA_train_labeled <- RSNA_train_labeled[!RSNA_train_labeled$patientId %in% validation$patientId,]
#Handling images from train -> validation folders
i <- 1
for (i in 1:length(validation$patientId)) {
if (validation$target[i] == 1){
FromWhere <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/lung_op/", validation$patientId[i], ".jpeg", "")
ToWhere <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation/lung_op/", validation$patientId[i], ".jpeg", "")
} else if(validation$target[i] == 0){
FromWhere <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train/normal/", validation$patientId[i], ".jpeg", "")
ToWhere <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation/normal/", validation$patientId[i], ".jpeg", "")
}
file.rename(from = FromWhere, to = ToWhere)
i <- i + 1
}
Croping predicted images
testList <- dir("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images")
i <- 1
testList <- dir("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images")
for (i in 1:500) {
OrigConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images/", testList[i], "")
img <- image_read(OrigConvFile)
crop <- paste0(WidthCrop, "x", HeightCrop, "+", AvgMinXCrop, "+", AvgMinYCrop, "")
img <- image_crop(img, crop)
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/normal/", testList[i], ".jpeg", "")
image_write(image_convert(img, "jpeg"), path = NewConvFile, format = "jpeg")
i <- i + 1
}
for (i in 501:1000) {
OrigConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images/", testList[i], "")
img <- image_read(OrigConvFile)
crop <- paste0(WidthCrop, "x", HeightCrop, "+", AvgMinXCrop, "+", AvgMinYCrop, "")
img <- image_crop(img, crop)
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/lung_op/", testList[i], ".jpeg", "")
image_write(image_convert(img, "jpeg"), path = NewConvFile, format = "jpeg")
i <- i + 1
}
Deep Learning
Pneumonia_list <- c("normal", "lung_op")
# number of output classes
output_n <- 2
# image size to scale down to (original images are 54 x 96 px)
img_width <- 54
img_height <- 96
target_size <- c(img_width, img_height)
#RGB = 3 channels
channels <- 3
#Path to image folders
train_image_files_path <- "G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/train"
valid_image_files_path <- "G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/validation"
# optional data augmentation
train_data_gen = image_data_generator(
rescale = 1/255 #,
#rotation_range = 40,
#width_shift_range = 0.2,
#height_shift_range = 0.2,
#shear_range = 0.2,
#zoom_range = 0.2,
#horizontal_flip = TRUE,
#fill_mode = "nearest"
)
# Validation data shouldn't be augmented! But it should also be scaled.
valid_data_gen <- image_data_generator(
rescale = 1/255
)
# training images
train_image_array_gen <- flow_images_from_directory(train_image_files_path,
train_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = Pneumonia_list,
seed = 42)
# validation images
valid_image_array_gen <- flow_images_from_directory(valid_image_files_path,
valid_data_gen,
target_size = target_size,
class_mode = "categorical",
classes = Pneumonia_list,
seed = 42)
cat("Number of images per class:")
## Number of images per class:
## Number of images per class:
table(factor(train_image_array_gen$classes))
##
## 0 1
## 19025 5353
cat("\nClass label vs index mapping:\n")
##
## Class label vs index mapping:
## Class label vs index mapping:
train_image_array_gen$class_indices
## $normal
## [1] 0
##
## $lung_op
## [1] 1
Pneumonia_classes_indices <- train_image_array_gen$class_indices
# number of training samples
train_samples <- train_image_array_gen$n
# number of validation samples
valid_samples <- valid_image_array_gen$n
# define batch size and number of epochs
batch_size <- 50
epochs <- 32
# initialise model
model <- keras_model_sequential()
# add layers
model %>%
layer_conv_2d(filter = 32, kernel_size = c(3,3), padding = "same", input_shape = c(img_width, img_height, channels)) %>%
layer_activation("relu") %>%
#Second hidden layer
layer_conv_2d(filter = 32, kernel_size = c(3,3), padding = "same") %>%
layer_activation_leaky_relu(0.5) %>%
layer_batch_normalization() %>%
#Use max pooling
layer_max_pooling_2d(pool_size = c(2,2)) %>%
layer_dropout(0.25) %>%
#Flatten max filtered output into feature vector
# and feed into dense layer
layer_flatten() %>%
layer_dense(100) %>%
layer_activation("relu") %>%
layer_dropout(0.5) %>%
#Outputs from dense layer are projected onto output layer
layer_dense(output_n) %>%
layer_activation("softmax")
#Compile model
model %>% compile(
loss = "categorical_crossentropy",
optimizer = optimizer_rmsprop(lr = 0.0001, decay = 1e-6),
metrics = "accuracy"
)
# Fit DL model
hist <- model %>% fit_generator(
# training data
train_image_array_gen,
# epochs
steps_per_epoch = as.integer(train_samples / batch_size),
epochs = epochs,
# validation data
validation_data = valid_image_array_gen,
validation_steps = as.integer(valid_samples / batch_size),
# print progress
verbose = 2,
callbacks = list(
# save best model after every epoch
callback_model_checkpoint("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/Pneumonia_checkpoints.h5", save_best_only = TRUE),
# only needed for visualising with TensorBoard
callback_tensorboard(log_dir = "G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection")
)
)
Load DL model
model2 <- load_model_hdf5(filepath = "G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/Pneumonia_checkpoints.h5")
TotalPredictions <- data.frame()
#Building prediction foldersb & run image predictions
testList <- dir("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images")
w <- 1
for (w in 1:floor(length(testList) / 8)) {
i <- (w - 1) * 8 + 1
j <- i + 7
k <- j + 1
l <- k + 7
for (i in i:j) {
OrigConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/stage_1_test_images/", testList[i], "")
img <- image_read(OrigConvFile)
crop <- paste0(WidthCrop, "x", HeightCrop, "+", AvgMinXCrop, "+", AvgMinYCrop, "")
img <- image_crop(img, crop)
img <- image_scale(img, "96x96")
img1 <- image_colorize(img, 20, "red")
img2 <- image_colorize(img, 20, "green")
img3 <- c(img1, img2)
img <- image_append(image_scale(img3, "x200"))
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/normal/", testList[i], ".jpeg", "")
image_write(image_convert(img, "jpeg"), path = NewConvFile, format = "jpeg")
i <- i + 1
}
for (i in k:l) {
OrigConvFile <- paste0("./stage_1_test_images/", testList[i], "")
img <- image_read(OrigConvFile)
crop <- paste0(WidthCrop, "x", HeightCrop, "+", AvgMinXCrop, "+", AvgMinYCrop, "")
img <- image_crop(img, crop)
img <- image_scale(img, "96x96")
img1 <- image_colorize(img, 20, "red")
img2 <- image_colorize(img, 20, "green")
img3 <- c(img1, img2)
img <- image_append(image_scale(img3, "x200"))
NewConvFile <- paste0("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/lung_op/", testList[i], ".jpeg", "")
image_write(image_convert(img, "jpeg"), path = NewConvFile, format = "jpeg")
i <- i + 1
}
test_image_files_path <- "G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test"
test_datagen <- image_data_generator(rescale = 1/255)
test_generator <- flow_images_from_directory(
test_image_files_path,
test_datagen,
target_size = c(54, 96),
class_mode = 'categorical')
predictions <- as.data.frame(predict_generator(model2, test_generator, steps = 1)) #matrix of elements
Pneumonia_classes_indices_df <- data.frame(indices = unlist(Pneumonia_classes_indices)) #categories
Pneumonia_classes_indices_df <- Pneumonia_classes_indices_df[order(Pneumonia_classes_indices_df$indices), , drop = FALSE]
colnames(predictions) <- rownames(Pneumonia_classes_indices_df)
predictions <- round(predictions, digits = 2)
predictions$patientId <- as.character(test_generator$filenames)
predictions$PredictionString <- 0
##Merge into single prediction dataframe
TotalPredictions <- rbind(TotalPredictions, predictions)
##Delete
files.to.delete <- dir("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/normal",pattern=".[0-9]",recursive=T,full.names=T)
file.remove(files.to.delete)
files.to.delete <- dir("G:/DataScienceProject/Kaggle_RSNA_Pneumonia_Detection/test/lung_op",pattern=".[0-9]",recursive=T,full.names=T)
file.remove(files.to.delete)
w <- w + 1
}
Predict the correlation & Pneumonia expected location
i <- 1
for (i in 1:length(TotalPredictions$patientId)){
if(TotalPredictions$lung_op[i] > 85){
TotalPredictions$PredictionString[i] <- paste0(round(TotalPredictions$lung_op[i], digits = 2), " ", AvgMinXCrop, " ", AvgMinYCrop, " ", WidthCrop, " ", HeightCrop)
} else if (TotalPredictions$lung_op[i] < 85){
TotalPredictions$PredictionString[i] <- paste0(round(TotalPredictions$lung_op[i], digits = 2) , " 0 0 768 768", collapse = ",")
}
i <- i + 1
}
#Tide the prediction table.
TotalPredictions <- TotalPredictions[,c("patientId", "PredictionString")]
write.csv(TotalPredictions, file = "TotalPredictions.csv", row.names = FALSE)