(2023S) 1152-4001 Statistical consulting

Prof. Daewon Yang (Chungnam National University)

*email address:

Faster computations in R

Section 1. 빠른 R 계산을 위한 팁

1.1 Bottleneck

system.time() 함수

###########################################
# Method 1 - system.time()
###########################################

### library
library(ggplot2movies)
library(profvis)

### system.time
system.time({ data(movies, package = "ggplot2movies")  })
 사용자  시스템 elapsed 
      0       0       0 
system.time({ plot(movies$year, movies$rating)  })
 사용자  시스템 elapsed 
   0.00    0.06    0.42 
system.time({ model <- loess(rating ~ year, data = movies)  })
 사용자  시스템 elapsed 
   5.06    0.02   10.35 
system.time({ j <- order(movies$year) })
 사용자  시스템 elapsed 
      0       0       0 
system.time({ lines(movies$year[j], model$fitted[j]) })

 사용자  시스템 elapsed 
   0.00    0.00    0.01 

tictoc 패키지

###########################################
# Method 2 - tictoc package
###########################################

### library
library(ggplot2)
library(tictoc)


### system.time
tic("Total")
# step 1
tic("Step 1")
data(diamonds, package = "ggplot2")
toc()
Step 1: 0 sec elapsed
# step 2
tic("Step 2")
plot(price ~ carat, data = diamonds)
toc()
Step 2: 0.43 sec elapsed
# step 3
tic("Step 3")
m <- lm(price ~ carat, data = diamonds)
toc()
Step 3: 0.01 sec elapsed
# step 4
tic("Step 4")
abline(m, col = "red")
toc()
Step 4: 0 sec elapsed
toc()
Total: 0.46 sec elapsed

Rprof() 함수

###########################################
# Method 3 - Rprof()
###########################################

### library
library(ggplot2)

### Rprof()
Rprof("result.out", interval=0.005) # Turn on the profiler
data(diamonds, package = "ggplot2")
plot(price ~ carat, data = diamonds)
m <- lm(price ~ carat, data = diamonds)
abline(m, col = "red")
Rprof(NULL)        # Turn off the profiler
summaryRprof("result.out")   # result
$by.self
           self.time self.pct total.time total.pct
"plot.xy"      0.110    78.57      0.110     78.57
"deparse"      0.025    17.86      0.025     17.86
"sys.call"     0.005     3.57      0.005      3.57

$by.total
                          total.time total.pct self.time self.pct
"block_exec"                   0.140    100.00     0.000     0.00
"call_block"                   0.140    100.00     0.000     0.00
"eng_r"                        0.140    100.00     0.000     0.00
"eval"                         0.140    100.00     0.000     0.00
"eval_with_user_handlers"      0.140    100.00     0.000     0.00
"evaluate"                     0.140    100.00     0.000     0.00
"evaluate::evaluate"           0.140    100.00     0.000     0.00
"evaluate_call"                0.140    100.00     0.000     0.00
"handle"                       0.140    100.00     0.000     0.00
"in_dir"                       0.140    100.00     0.000     0.00
"in_input_dir"                 0.140    100.00     0.000     0.00
"knitr::knit"                  0.140    100.00     0.000     0.00
"process_file"                 0.140    100.00     0.000     0.00
"process_group"                0.140    100.00     0.000     0.00
"process_group.block"          0.140    100.00     0.000     0.00
"rmarkdown::render"            0.140    100.00     0.000     0.00
"timing_fn"                    0.140    100.00     0.000     0.00
"withCallingHandlers"          0.140    100.00     0.000     0.00
"withVisible"                  0.140    100.00     0.000     0.00
"do.call"                      0.135     96.43     0.000     0.00
"plot"                         0.135     96.43     0.000     0.00
"plot.default"                 0.135     96.43     0.000     0.00
"plot.formula"                 0.135     96.43     0.000     0.00
"plot.xy"                      0.110     78.57     0.110    78.57
"deparse"                      0.025     17.86     0.025    17.86
"deparse1"                     0.025     17.86     0.000     0.00
"paste"                        0.025     17.86     0.000     0.00
"sys.call"                     0.005      3.57     0.005     3.57
"%in%"                         0.005      3.57     0.000     0.00
".External2"                   0.005      3.57     0.000     0.00
"["                            0.005      3.57     0.000     0.00
"[.data.frame"                 0.005      3.57     0.000     0.00
"[["                           0.005      3.57     0.000     0.00
"[[.data.frame"                0.005      3.57     0.000     0.00
"lm"                           0.005      3.57     0.000     0.00
"model.frame.default"          0.005      3.57     0.000     0.00
"na.omit"                      0.005      3.57     0.000     0.00
"na.omit.data.frame"           0.005      3.57     0.000     0.00
"stats::model.frame"           0.005      3.57     0.000     0.00

$sample.interval
[1] 0.005

$sampling.time
[1] 0.14

profvis패키지

https://support.posit.co/hc/en-us/articles/218221837-Profiling-R-code-with-the-RStudio-IDE 참조.

1.2 for loop를 최대한 자제

Vectorize

### library
library(tictoc)

### data
x <- seq(1:100000000)

### vectorize
tic("vectorize")
x2 <- x+1
toc()
vectorize: 0.25 sec elapsed
### for loop
tic("for loop")
x2 <- x
for(i in seq(x)){ 
  x2[i] <- x[i] + 1 
}
toc()
for loop: 2.58 sec elapsed

apply 관련 함수

### library
library(tictoc)
library(dplyr)

### data
y <- matrix(1:1000000, ncol=10000)

### apply (matrix)
tic("apply")
y2 <- apply(y,2, function(x){ mean(sqrt(x)) })
toc()
apply: 0.05 sec elapsed
### sapply (vector or list / return vector or matrix)
tic("sapply")
y2 <- sapply(1:ncol(y), function(i){ mean(sqrt(y[,i])) }, simplify = T)
toc()
sapply: 0.03 sec elapsed
tic("sapply")
y2 <- sapply(as.data.frame(y), function(x){ mean(sqrt(x)) })
toc()
sapply: 0.05 sec elapsed
### lapply (vector or list / return list)
tic("lapply")
y2 <- lapply(1:ncol(y), function(i){ mean(sqrt(y[,i])) }) %>% unlist()
toc()
lapply: 0.08 sec elapsed
### for loop
tic("for loop")
y2 <- NULL
for(i in 1:ncol(y)){
  y2 <- c(y2, mean(sqrt(y[,i])))
}
toc()
for loop: 0.17 sec elapsed
### data 2
z <- 1:1000000
f <- rep(1:10000,each=100) 
tmp.dat <- data.frame(z=z,f=f)

### tapply (by group)
tic("tapply")
y2 <- tapply(z,f,function(x){ mean(sqrt(x)) })
toc()
tapply: 0.06 sec elapsed
# ### dplyr
# tic("tapply")
# y2 <- tmp.dat %>% group_by(f) %>% summarise( x = mean(sqrt(z))) %>% pull(x)
# toc()

### for loop
tic("for loop")
y2 <- NULL
for(i in unique(f)){
  y2 <- c(y2, mean(sqrt(z[f==i])))
}
toc()
for loop: 12.92 sec elapsed

for loop 안에서 object 수정은 최대한 피하기

### library
library(tictoc)

tic("Time 1")
x <- data.frame(matrix(runif(100*1e4), ncol = 100))
y <- data.frame(matrix(runif(100*1e4), ncol = 100))
x <- rbind(x,y)
toc()
Time 1: 0.03 sec elapsed
# for loop with data.frame
tic("Time 2 : for loop with data.frame")
x <- data.frame(matrix(runif(100*1e4), ncol = 100))
for(i in seq_along(1:100)){
  x <- rbind(x, data.frame(matrix(runif(1*1e4), ncol = 100)))
}
toc()
Time 2 : for loop with data.frame: 1.05 sec elapsed
# for loop with matrix
tic("Time 3 : for loop with matrix")
x <- matrix(runif(100*1e4), ncol = 100)
system.time(
  for(i in seq_along(1:100)){
    x <- rbind(x, matrix(runif(1*1e4), ncol = 100))
  }
)
 사용자  시스템 elapsed 
   0.11    0.06    0.45 
toc()
Time 3 : for loop with matrix: 0.53 sec elapsed

Readability vs Speed

1.3 더 좋은 R package 활용

### library
library(fastglm)
library(tictoc)

### data generation
set.seed(1)
n <- 100000
p <- 100
x <- matrix(rnorm(n*p),n,p)
y <- sample(0:1, n, replace=T)

### glm
tic("glm : ")
glm.fit0 <- glm.fit(x, y, family=binomial())
toc()
glm : : 1.46 sec elapsed
### fastglm
tic("fastglm : ")
glm.fit1 <- fastglm(x, y, family=binomial())
toc()
fastglm : : 1.18 sec elapsed

1.5 대용량 csv파일 읽기 & 쓰기

### library
library(data.table)
library(readr)

### data generation
set.seed(1)
n <- 100000
p <- 100

DAT <- data.frame(matrix(rnorm(n*p),n,p))

### write
# base
tic("(write) base : ")
write.csv(DAT, "base.csv")
toc()
(write) base : : 12.1 sec elapsed
# readr
tic("(write) readr : ")
write_csv(DAT, "readr.csv")
toc()
(write) readr : : 0.2 sec elapsed
# data.table
tic("(write) data.table : ")
fwrite(DAT, "datatable.csv")
toc()
(write) data.table : : 0.06 sec elapsed
### read
# base
tic("(read) base : ")
csv1 <- read.csv("base.csv")
toc()
(read) base : : 9.89 sec elapsed
# readr
tic("(read) readr : ")
csv2 <- read_csv("readr.csv")
toc()
(read) readr : : 0.47 sec elapsed
# data.table
tic("(read) data.table : ")
csv3 <- fread("datatable.csv")
toc()
(read) data.table : : 0.16 sec elapsed
### library
library(dplyr)

### class
csv1 %>% class()
[1] "data.frame"
csv2 %>% class()
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
csv3 %>% class()
[1] "data.table" "data.frame"
### data generation
set.seed(1)
n <- 10000000
x <- rnorm(n)
f <- sample(1:1000, n, replace=T)
DAT1 <- data.frame(x,f)
DAT2 <- tibble(x,f)
DAT3 <- data.table(x,f)

### base
tic("base : ")
result1 <- tapply(DAT1$x,DAT1$f,mean)
toc()
base : : 1.27 sec elapsed
### dplyr
tic("dplyr : ")
result2 <- DAT2 %>% group_by(f) %>% summarise(m=mean(x))
toc()
dplyr : : 1.03 sec elapsed
### data.table
tic("data.table : ")
result3 <- DAT3[,mean(x),by='f']
toc()
data.table : : 0.06 sec elapsed

Section 2. Parallel computing

2.1 parallel 패키지

parApply() & parSapply() & parLapply() 함수 사용법

##############################################
# library
##############################################
library(parallel)


##############################################
# data
##############################################
n <- 100
p <- 10000
Xmat <- matrix(rnorm(n*p), n, p)
##############################################
# parApply
##############################################

### (Step 1) check the # of cores
numCores <- min( detectCores()-1, 12)
### (Step 2) set up a cluster 
myCluster <- makeCluster(numCores)
### (Step 3) parApply
result <- parApply(myCluster, Xmat, 2, mean)
### (Step 4) stop the cluster 
stopCluster(myCluster)
##############################################
# function
##############################################
a <- 1
b <- 2
custoFUN0 <- function(x){ mean(x) + a*b }

##############################################
# parApply
##############################################

### (Step 1) check the # of cores
numCores <- min( detectCores()-1, 12)

### (Step 2) set up a cluster 
myCluster <- makeCluster(numCores)

### (Step 3-0) export to clusters
clusterExport(myCluster, varlist=c("a","b"))

### (Step 3) parApply
result <- parApply(myCluster, Xmat, 2, custoFUN0)

### (Step 4) stop the cluster 
stopCluster(myCluster)
##############################################
# parSapply
##############################################

### (Step 1) check the # of cores
numCores <- min( detectCores()-1, 12)

### (Step 2) set up a cluster 
myCluster <- makeCluster(numCores)

### (Step 3-0) export to clusters
clusterExport(myCluster, varlist=c("a","b","custoFUN0","Xmat"))

### (Step 3) parApply
result <- parSapply(myCluster, 1:ncol(Xmat), function(i){ custoFUN0(Xmat[,i]) })

### (Step 4) stop the cluster 
stopCluster(myCluster)


##############################################
# parLapply
##############################################

### (Step 1) check the # of cores
numCores <- min( detectCores()-1, 12)

### (Step 2) set up a cluster 
myCluster <- makeCluster(numCores)

### (Step 3-0) export to clusters
clusterExport(myCluster, varlist=c("a","b","custoFUN0","Xmat"))

### (Step 3) parApply
result <- parLapply(myCluster, 1:ncol(Xmat), function(i){ custoFUN0(Xmat[,i]) })

### (Step 4) stop the cluster 
stopCluster(myCluster)

apply() vs parApply()

##############################################
# library
##############################################
library(parallel)
library(tictoc)


##############################################
# data
##############################################
n <- 100
p <- 10000
Xmat <- matrix(rnorm(n*p), n, p)


##############################################
# custom function
##############################################

### function 1
custoFUN1 <- function(x){ 
  
  # temporary setting
  tmp.n <- 1000
  tmp.p <- 5
  
  # temporary data
  tmp.y <- rnorm(tmp.n)
  tmp.x <- matrix(rnorm(tmp.n*tmp.p), tmp.n, tmp.p)
  
  # lm
  tmp.lm <- lm(tmp.y ~ tmp.x)
  tmp.result <- mean((fitted.values(tmp.lm) - tmp.y)^2)
  
  return(tmp.result)
}

### function 2
n0 <- 1000
p0 <- 5
custoFUN2 <- function(x){ 
  
  # temporary setting
  tmp.n <- n0
  tmp.p <- p0
  
  # temporary data
  tmp.y <- rnorm(tmp.n)
  tmp.x <- matrix(rnorm(tmp.n*tmp.p), tmp.n, tmp.p)
  
  # lm
  tmp.lm <- lm(tmp.y ~ tmp.x)
  tmp.result <- mean((fitted.values(tmp.lm) - tmp.y)^2)
  
  return(tmp.result)
}

##############################################
# apply vs parApply - case 1
##############################################

### apply 
tic("apply : ")
result0 <- apply(Xmat, 2, mean)
toc()
apply : : 0.05 sec elapsed
### parApply
tic("parApply : ")
# check the # of cores
tic("step 1 - ")
numCores <- min( detectCores()-1, 12)
toc()
step 1 - : 0 sec elapsed
# set up a cluster 
tic("step 2 - ")
myCluster <- makeCluster(numCores)
toc()
step 2 - : 0.22 sec elapsed
# parApply
tic("step 3 - ")
result <- parApply(myCluster, Xmat, 2, mean)
toc()
step 3 - : 0.04 sec elapsed
# stop the cluster 
tic("step 4 - ")
stopCluster(myCluster)
toc()
step 4 - : 0 sec elapsed
toc()
parApply : : 0.26 sec elapsed
##############################################
# apply vs parApply - case 2
##############################################

### apply 
tic("apply : ")
result0 <- apply(Xmat, 2, custoFUN1)
toc()
apply : : 6.05 sec elapsed
### parApply
tic("parApply : ")
# check the # of cores
tic("step 1 - ")
numCores <- min( detectCores()-1, 12)
toc()
step 1 - : 0 sec elapsed
# set up a cluster 
tic("step 2 - ")
myCluster <- makeCluster(numCores)
toc()
step 2 - : 0.2 sec elapsed
# parApply
tic("step 3 - ")
result <- parApply(myCluster, Xmat, 2, custoFUN1)
toc()
step 3 - : 0.75 sec elapsed
# stop the cluster 
tic("step 4 - ")
stopCluster(myCluster)
toc()
step 4 - : 0 sec elapsed
toc()
parApply : : 0.95 sec elapsed

sapply() vs parSapply()

##############################################
# sapply vs parSapply - case 2
##############################################

### sapply 
tic("sapply : ")
result0 <- sapply(1:ncol(Xmat), function(i){custoFUN2(Xmat[,i])})
toc()
sapply : : 5.45 sec elapsed
### parSapply
tic("parSapply : ")
# check the # of cores
tic("step 1 - ")
numCores <- min( detectCores()-1, 12)
toc()
step 1 - : 0 sec elapsed
# set up a cluster 
tic("step 2 - ")
myCluster <- makeCluster(numCores)
toc()
step 2 - : 0.21 sec elapsed
# export to clusters
tic("step 3-0 - ")
clusterExport(myCluster, varlist=c("n0","p0","custoFUN2"))
toc()
step 3-0 - : 0.01 sec elapsed
# parSapply
tic("step 3 - ")
result <- parSapply(myCluster, 1:ncol(Xmat), function(i){custoFUN2(Xmat[,i])})
toc()
step 3 - : 0.74 sec elapsed
# stop the cluster 
tic("step 4 - ")
stopCluster(myCluster)
toc()
step 4 - : 0 sec elapsed
toc()
parSapply : : 0.96 sec elapsed

lapply() vs parLapply()

##############################################
# sapply vs parLapply - case 2
##############################################

### lapply 
tic("lapply : ")
result0 <- lapply(1:ncol(Xmat), function(i){custoFUN2(Xmat[,i])})
toc()
lapply : : 5.65 sec elapsed
### parLapply
tic("parLapply : ")
# check the # of cores
tic("step 1 - ")
numCores <- min( detectCores()-1, 12)
toc()
step 1 - : 0 sec elapsed
# set up a cluster 
tic("step 2 - ")
myCluster <- makeCluster(numCores)
toc()
step 2 - : 0.22 sec elapsed
# export to clusters
tic("step 3-0 - ")
clusterExport(myCluster, varlist=c("n0","p0","custoFUN2"))
toc()
step 3-0 - : 0 sec elapsed
# parLapply
tic("step 3 - ")
result <- parLapply(myCluster, 1:ncol(Xmat), function(i){custoFUN2(Xmat[,i])})
toc()
step 3 - : 0.67 sec elapsed
# stop the cluster 
tic("step 4 - ")
stopCluster(myCluster)
toc()
step 4 - : 0 sec elapsed
toc()
parLapply : : 0.89 sec elapsed

2.2 foreach 패키지

######################################################
# library
######################################################
library(foreach)
library(doParallel)

######################################################
# foreach package
######################################################

### for loop
for(i in 1:3){ sqrt(i) }

### Without parallelization --> %do%
output <- foreach(i = 1:3) %do% {
  sqrt(i)
}
output
[[1]]
[1] 1

[[2]]
[1] 1.414214

[[3]]
[1] 1.732051
### With parallelization --> %dopar%
output <- foreach(i = 1:3) %dopar% {
  sqrt(i)
}
### foreach - other options : .combine
# .combine='c'
output <- foreach(i = 1:3, .combine='c') %do% {
  sqrt(i)
}
output
[1] 1.000000 1.414214 1.732051
# .combine='cbind' 
output <- foreach(x = 1:3, .combine = 'cbind') %do% {
  c(sqrt(x), log(x), x^2)
} 
output
     result.1  result.2 result.3
[1,]        1 1.4142136 1.732051
[2,]        0 0.6931472 1.098612
[3,]        1 4.0000000 9.000000
# .combine='rbind' 
output <- foreach(x = 1:3, .combine = 'rbind') %do% {
  c(sqrt(x), log(x), x^2)
} 
output
             [,1]      [,2] [,3]
result.1 1.000000 0.0000000    1
result.2 1.414214 0.6931472    4
result.3 1.732051 1.0986123    9
# .combine='+"
output <- foreach(i = 1:3, .combine='+') %do% {
  sqrt(i)
}
output
[1] 4.146264
# .combine='*"
output <- foreach(i = 1:3, .combine='*') %do% {
  sqrt(i)
}
output
[1] 2.44949

2.2 foreach 패키지 + doParallel 패키지

######################################################
# foreach & doParallel packages
######################################################

### step 1
# check # of cores
detectCores()
[1] 32
# create the cluster
myCluster <- makeCluster(12)
### step 2
# register the cluster with the foreach package
registerDoParallel(myCluster)
### step 3
# parallel computing using foreach with %dopar% 
output <- foreach(i = 1:3) %dopar% {
  sqrt(i)
}
### Step 4
# stop the cluster 
stopCluster(myCluster)

2.3 R코드 예시

######################################################
# R code exmaple 
######################################################
# foreach & doParallel packages
######################################################

tic("foreach & doParallel : ")
### step 1
# check # of cores
detectCores()
[1] 32
# create the cluster
myCluster <- makeCluster(12)

### step 2
# register the cluster with the foreach package
registerDoParallel(myCluster)

### step 3
# parallel computing using foreach with %dopar% 
output <- foreach(i = 1:200) %dopar% {
  
  n <- 100000
  p <- 20
  y <- sample(0:1, size=n, replace=T)
  x <- matrix(rnorm(n*p), nrow=n)
  glm.fit <- glm(y~x, family="binomial")
  glm.fit
}

### Step 4
# stop the cluster 
stopCluster(myCluster)
toc()
foreach & doParallel : : 25.41 sec elapsed
######################################################
# for loop
######################################################

tic("for loop : ")
output2 <- list()
for(i in 1:200){
  
  n <- 100000
  p <- 20
  y <- sample(0:1, size=n, replace=T)
  x <- matrix(rnorm(n*p), nrow=n)
  glm.fit <- glm(y~x, family="binomial")
  output2[[i]] <- glm.fit
}
toc()
for loop : : 82.66 sec elapsed
######################################################
# R code exmaple 
######################################################
# foreach & doParallel packages
######################################################

### step 1
# check # of cores
detectCores()
[1] 32
# create the cluster
myCluster <- makeCluster(3)

### step 2
# register the cluster with the foreach package
registerDoParallel(myCluster)

### step 3
# parallel computing using foreach with %dopar% 
output <- foreach(i = 1:10, .packages = c("glmnet")) %dopar% {
  
  n <- 10000
  p <- 10
  y <- rnorm(n)
  x <- matrix(rnorm(n*p), nrow=n)
  
  train <- sample(1:nrow(x), nrow(x)/2)
  test <- (-train)
  y.test <- y[test]
  
  ridge.mod <- glmnet(x[train,], y[train], alpha = 1, lambda = 1, thresh = 1e-12)
  ridge.pred <- predict(ridge.mod, newx=x[test, ])
  mean((ridge.pred-y.test)^2)
}

### Step 4
# stop the cluster 
stopCluster(myCluster)
tmp.ParallelComputing___FUN <- function(x,y, niter, core.num=8){
  
  # create the cluster
  myCluster <- makeCluster(core.num)
  
  ### step 2
  # register the cluster with the foreach package
  registerDoParallel(myCluster)
  
  ### step 3
  # parallel computing using foreach with %dopar% 
  output <- foreach(i = 1:niter, .combine="c", .export=c("tmp.intercept1","tmp.intercept2")) %dopar% {
    
    tmp.result <- x + i*y + tmp.intercept1 * tmp.intercept2
  }
  
  ### Step 4
  # stop the cluster 
  stopCluster(myCluster)
  
  return(output)
}

tmp.intercept1 <- -1
tmp.intercept2 <- -3

tmp.result <- tmp.ParallelComputing___FUN(x=3, y=2, niter=30, core.num=3)
tmp.result
 [1]  8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50
[23] 52 54 56 58 60 62 64 66

Section 3. Rcpp package