lapply() VS parallel::mclapply()library(parallel)
benchmark(
lapply(1000:9999, rnorm),
mclapply(1000:9999, rnorm, mc.cores = 4)
)
test replications elapsed relative
1 lapply(1000:9999, rnorm) 100 464.604 1.607
2 mclapply(1000:9999, rnorm, mc.cores = 4) 100 289.162 1.000
반면에 input 이 상당히 적은경우 좋지 않은 성능
benchmark(
lapply(10:99, rnorm),
mclapply(10:99, rnorm, mc.cores = 4)
)
test replications elapsed relative
1 lapply(10:99, rnorm) 100 0.118 1.000
2 mclapply(10:99, rnorm, mc.cores = 4) 100 1.994 16.898
DF <- data.frame(x = runif(2.6e+07), y = rep(LETTERS, each = 10000))
DT <- as.data.table(DF)
setkey(DT, y)
benchmark(
x <- DF[DF$y == "C", ],
x <- DT[J("C"), ],
x <- DF %>% dplyr::filter(y == "C"),
x <- DT %>% dplyr::filter(y == "C")
)
test replications elapsed relative
2 x <- DT[J("C"), ] 100 2.500 1.000
4 x <- DT %>% dplyr::filter(y == "C") 100 23.083 9.233
3 x <- DF %>% dplyr::filter(y == "C") 100 23.248 9.299
1 x <- DF[DF$y == "C", ] 100 38.298 15.319
sessionInfo()
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.12.3 (Sierra)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats grDevices utils datasets graphics methods
## [8] base
##
## other attached packages:
## [1] rbenchmark_1.0.0 data.table_1.10.4
## [3] ggplot2_2.2.1 dplyr_0.5.0
## [5] knitr_1.15.1 useful.lovetoken_0.1.0.0090
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.9 magrittr_1.5 munsell_0.4.3 colorspace_1.3-2
## [5] R6_2.2.0 stringr_1.2.0 plyr_1.8.4 tools_3.3.1
## [9] grid_3.3.1 gtable_0.2.0 pacman_0.4.1 DBI_0.5-1
## [13] htmltools_0.3.5 yaml_2.1.14 lazyeval_0.2.0 assertthat_0.1
## [17] digest_0.6.12 rprojroot_1.2 tibble_1.2 evaluate_0.10
## [21] rmarkdown_1.3 stringi_1.1.2 scales_0.4.1 backports_1.0.5