library(rbenchmark)
library(dplyr)
Here is a bentchmark comparison between base::split and dplyr::group_split.
# Make a long table with 100 copies of mtcars
dt <- list()
for (i in 1:100) dt[[i]] <- mtcars
dt <- bind_rows(dt)
benchmark(
"group_split" = {
group_split(dt, cyl)
},
"group_nest" = {
group_nest(dt, cyl)
},
"split" = {
split(dt, dt$cyl)
},
"split2" = {
dt %>%
split(.$cyl)
},
replications = 1000
) %>%
knitr::kable()
| test | replications | elapsed | relative | user.self | sys.self | user.child | sys.child | |
|---|---|---|---|---|---|---|---|---|
| 2 | group_nest | 1000 | 2.593 | 2.614 | 2.507 | 0.041 | 0 | 0 |
| 1 | group_split | 1000 | 0.992 | 1.000 | 0.874 | 0.091 | 0 | 0 |
| 3 | split | 1000 | 2.995 | 3.019 | 2.844 | 0.081 | 0 | 0 |
| 4 | split2 | 1000 | 3.312 | 3.339 | 3.105 | 0.084 | 0 | 0 |