Some setup stuff:
library(microbenchmark)
# Simpler microbenchmark output
print.microbenchmark <- function(x, ...) {
mb_summary <- function(x) {
res <- summary(x, unit="us")
data.frame(median = res$median, name = res$expr)
}
print(mb_summary(x))
}
There’s a significant amount of overhead just from calling the R function get(). This is true even when you skip the pos argument and provide envir. For example, if you call get(), it takes much more time than .Internal(get()), which is what get() does.
If you already know that the object exists in an environment, it’s faster to use e$x, and slightly faster still to use e[["x"]]:
e <- new.env()
e$a <- 1
# Accessing objects in environments
microbenchmark(
get("a", e, inherits = FALSE),
get("a", envir = e, inherits = FALSE),
.Internal(get("a", e, "any", FALSE)),
e$a,
e[["a"]],
.Primitive("[[")(e, "a"),
unit = "us"
)
#> median name
#> 1 1.0300 get("a", e, inherits = FALSE)
#> 2 0.9425 get("a", envir = e, inherits = FALSE)
#> 3 0.3080 .Internal(get("a", e, "any", FALSE))
#> 4 0.2305 e$a
#> 5 0.1740 e[["a"]]
#> 6 0.2905 .Primitive("[[")(e, "a")
A similar thing happens with exists(): the R function wrapper adds significant overhead on top of .Internal(exists()). It’s also faster to use $ and [[, then test for NULL, but of course this won’t distinguish between objects that don’t exist, and those that do exist but have a NULL value:
# Test for existence of `a` (which exists), and `c` (which doesn't)
microbenchmark(
exists('a', e, inherits = FALSE),
exists('a', envir = e, inherits = FALSE),
.Internal(exists('a', e, 'any', FALSE)),
'a' %in% ls(e, all.names = TRUE),
is.null(e[['a']]),
is.null(e$a),
exists('c', e, inherits = FALSE),
exists('c', envir = e, inherits = FALSE),
.Internal(exists('c', e, 'any', FALSE)),
'c' %in% ls(e, all.names = TRUE),
is.null(e[['c']]),
is.null(e$c),
unit = "us"
)
#> median name
#> 1 1.2015 exists("a", e, inherits = FALSE)
#> 2 1.0545 exists("a", envir = e, inherits = FALSE)
#> 3 0.3615 .Internal(exists("a", e, "any", FALSE))
#> 4 7.6345 "a" %in% ls(e, all.names = TRUE)
#> 5 0.3055 is.null(e[["a"]])
#> 6 0.3270 is.null(e$a)
#> 7 1.1890 exists("c", e, inherits = FALSE)
#> 8 1.0370 exists("c", envir = e, inherits = FALSE)
#> 9 0.3465 .Internal(exists("c", e, "any", FALSE))
#> 10 7.5475 "c" %in% ls(e, all.names = TRUE)
#> 11 0.2675 is.null(e[["c"]])
#> 12 0.3010 is.null(e$c)
sessionInfo()
#> R version 3.1.2 (2014-10-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
#> [4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] microbenchmark_1.3-0
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.4 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6 knitr_1.6
#> [6] rmarkdown_0.3.13 stringr_0.6.2 tools_3.1.2 yaml_2.1.13