Parallel Computing

Nguyen Chi Dung

#==============================
#     Parallel Computing
#==============================

rm(list = ls())
library(magrittr)
library(tidyverse)
library(MASS)
data("Boston")
Boston %<>% mutate(medv = medv / 50)

library(caret)

set.seed(1)
control <- trainControl(method = "repeatedcv", 
                        number = 5, 
                        repeats = 5)

#----------------------
#      Mặc định
#----------------------

system.time(
  ann2 <- train(medv ~ ., 
                data = Boston, 
                method = "nnet", 
                metric = "RMSE",
                tuneLength = 10, 
                trace = FALSE, 
                trControl = control)
)
##    user  system elapsed 
##  278.30    0.60  296.49
#---------------------------------
#   Sử dụng doParallel package
#---------------------------------

# References: https://github.com/tobigithub/R-parallel/wiki/R-parallel-Setups
# References: http://rpubs.com/chidungkt/315749
#             https://cran.r-project.org/web/packages/doParallel/vignettes/gettingstartedParallel.pdf

library(doParallel)
n_cores <- detectCores()
n_cores
## [1] 4
registerDoParallel(cores = n_cores - 1)

system.time(
  ann3 <- train(medv ~ ., 
                data = Boston, 
                method = "nnet", 
                metric = "RMSE",
                tuneLength = 10, 
                trace = FALSE, 
                trControl = control)
  
)
##    user  system elapsed 
##    3.62    0.15  140.97
#----------------------------
#  Sử dụng parallel package
#----------------------------
# References: https://github.com/tobigithub/R-parallel/wiki/R-parallel-Setups

library(parallel)
nCores <- detectCores(logical = TRUE)
nThreads <- detectCores(logical = TRUE)
cl <- makeCluster(nThreads)

system.time(
  ann1 <- train(medv ~ ., 
                data = Boston, 
                method = "nnet", 
                metric = "RMSE",
                tuneLength = 10, 
                trace = FALSE, 
                trControl = control)
  
)
##    user  system elapsed 
##    2.91    0.13  130.86
stopCluster(cl)

#------------------------
#  Sử dụng doMC package
#------------------------

# References: https://github.com/tobigithub/R-parallel/wiki/R-parallel-package-overview
# install.packages("doMC", repos = "http://R-Forge.R-project.org") 

library(doMC)
registerDoMC(cores = 4)

system.time(
  ann4 <- train(medv ~ .,
                data = Boston,
                method = "nnet",
                metric = "RMSE",
                tuneLength = 10,
                trace = FALSE,
                trControl = control)
)
##    user  system elapsed 
##  261.41    0.25  266.58