for, lapply, vapply timings

# ========================
library(microbenchmark)
library(compiler)
library(plyr)
square <- function(x) x^2
f_for <- function(n) for (x in 1:n) x^2
f_lapply <- function(n) lapply(1:n, square)
f_vapply <- function(n) vapply(1:n, square, numeric(1))

c_square <- cmpfun(square)
c_for <- cmpfun(f_for)
c_lapply <- cmpfun(function(n) lapply(1:n, c_square))
c_vapply <- cmpfun(function(n) vapply(1:n, c_square, numeric(1)))

# Run benchmarks for n=10^pow10 cycles
run_benchmarks <- function(pow10) {
    n <- 10^pow10
    # Run a maximum of 100 iterations, and drop off until there's 1 iteration
    # for 1e7
    times <- round(min(1e+07/n, 100))
    res <- microbenchmark(f_for(n), f_lapply(n), f_vapply(n), c_for(n), c_lapply(n), 
        c_vapply(n), times = times)

    res$size <- n
    res
}

# Run benchmarks
res <- ldply(1:7, run_benchmarks)


# Summarize
res_summary <- ddply(res, c("expr", "size"), summarise, N = length(time), m_time = mean(time), 
    sd = sd(time), se = sd/sqrt(N))

# Calculate time per iteration
res_summary$iter_time <- res_summary$m_time/res_summary$size

# Put compiled and function type into separate columns
res_summary$compiled <- "non-compiled"
res_summary$compiled[grepl("^c_", res_summary$expr)] <- "compiled"

res_summary$type <- sub("^._", "", res_summary$expr)

# print it out
arrange(res_summary, size, compiled, type)
##           expr  size   N    m_time        sd        se iter_time
## 1     c_for(n) 1e+01 100 2.331e+03 6.056e+02 6.056e+01     233.1
## 2  c_lapply(n) 1e+01 100 1.012e+04 2.071e+03 2.071e+02    1012.1
## 3  c_vapply(n) 1e+01 100 1.184e+04 4.055e+03 4.055e+02    1183.9
## 4     f_for(n) 1e+01 100 3.411e+03 5.213e+02 5.213e+01     341.1
## 5  f_lapply(n) 1e+01 100 1.165e+04 3.163e+03 3.163e+02    1164.8
## 6  f_vapply(n) 1e+01 100 1.306e+04 2.648e+03 2.648e+02    1306.0
## 7     c_for(n) 1e+02 100 1.990e+04 6.525e+04 6.525e+03     199.0
## 8  c_lapply(n) 1e+02 100 7.398e+04 1.308e+04 1.308e+03     739.8
## 9  c_vapply(n) 1e+02 100 7.766e+04 1.436e+04 1.436e+03     776.6
## 10    f_for(n) 1e+02 100 3.209e+04 6.639e+04 6.639e+03     320.9
## 11 f_lapply(n) 1e+02 100 8.805e+04 1.084e+04 1.084e+03     880.5
## 12 f_vapply(n) 1e+02 100 9.150e+04 1.314e+04 1.314e+03     915.0
## 13    c_for(n) 1e+03 100 1.101e+05 1.598e+04 1.598e+03     110.1
## 14 c_lapply(n) 1e+03 100 6.370e+05 1.282e+05 1.282e+04     637.0
## 15 c_vapply(n) 1e+03 100 7.022e+05 3.589e+05 3.589e+04     702.2
## 16    f_for(n) 1e+03 100 2.291e+05 7.336e+04 7.336e+03     229.1
## 17 f_lapply(n) 1e+03 100 8.094e+05 2.086e+05 2.086e+04     809.4
## 18 f_vapply(n) 1e+03 100 8.147e+05 1.717e+05 1.717e+04     814.7
## 19    c_for(n) 1e+04 100 1.344e+06 4.767e+05 4.767e+04     134.4
## 20 c_lapply(n) 1e+04 100 7.907e+06 3.565e+06 3.565e+05     790.7
## 21 c_vapply(n) 1e+04 100 7.722e+06 2.811e+06 2.811e+05     772.2
## 22    f_for(n) 1e+04 100 2.452e+06 4.524e+05 4.524e+04     245.2
## 23 f_lapply(n) 1e+04 100 8.692e+06 1.407e+06 1.407e+05     869.2
## 24 f_vapply(n) 1e+04 100 8.749e+06 9.409e+05 9.409e+04     874.9
## 25    c_for(n) 1e+05 100 1.485e+07 7.084e+06 7.084e+05     148.5
## 26 c_lapply(n) 1e+05 100 9.667e+07 2.086e+07 2.086e+06     966.7
## 27 c_vapply(n) 1e+05 100 7.923e+07 1.625e+07 1.625e+06     792.3
## 28    f_for(n) 1e+05 100 2.661e+07 7.814e+06 7.814e+05     266.1
## 29 f_lapply(n) 1e+05 100 1.062e+08 2.215e+07 2.215e+06    1061.8
## 30 f_vapply(n) 1e+05 100 9.298e+07 1.741e+07 1.741e+06     929.8
## 31    c_for(n) 1e+06  10 1.663e+08 6.281e+07 1.986e+07     166.3
## 32 c_lapply(n) 1e+06  10 1.777e+09 2.162e+08 6.836e+07    1776.9
## 33 c_vapply(n) 1e+06  10 7.241e+08 7.160e+07 2.264e+07     724.1
## 34    f_for(n) 1e+06  10 2.420e+08 4.454e+07 1.409e+07     242.0
## 35 f_lapply(n) 1e+06  10 1.467e+09 7.177e+07 2.270e+07    1466.9
## 36 f_vapply(n) 1e+06  10 8.320e+08 7.565e+07 2.392e+07     832.0
## 37    c_for(n) 1e+07   1 1.950e+09        NA        NA     195.0
## 38 c_lapply(n) 1e+07   1 3.798e+10        NA        NA    3798.4
## 39 c_vapply(n) 1e+07   1 6.800e+09        NA        NA     680.0
## 40    f_for(n) 1e+07   1 2.266e+09        NA        NA     226.6
## 41 f_lapply(n) 1e+07   1 2.152e+10        NA        NA    2152.3
## 42 f_vapply(n) 1e+07   1 8.894e+09        NA        NA     889.4
##        compiled      type
## 1      compiled    for(n)
## 2      compiled lapply(n)
## 3      compiled vapply(n)
## 4  non-compiled    for(n)
## 5  non-compiled lapply(n)
## 6  non-compiled vapply(n)
## 7      compiled    for(n)
## 8      compiled lapply(n)
## 9      compiled vapply(n)
## 10 non-compiled    for(n)
## 11 non-compiled lapply(n)
## 12 non-compiled vapply(n)
## 13     compiled    for(n)
## 14     compiled lapply(n)
## 15     compiled vapply(n)
## 16 non-compiled    for(n)
## 17 non-compiled lapply(n)
## 18 non-compiled vapply(n)
## 19     compiled    for(n)
## 20     compiled lapply(n)
## 21     compiled vapply(n)
## 22 non-compiled    for(n)
## 23 non-compiled lapply(n)
## 24 non-compiled vapply(n)
## 25     compiled    for(n)
## 26     compiled lapply(n)
## 27     compiled vapply(n)
## 28 non-compiled    for(n)
## 29 non-compiled lapply(n)
## 30 non-compiled vapply(n)
## 31     compiled    for(n)
## 32     compiled lapply(n)
## 33     compiled vapply(n)
## 34 non-compiled    for(n)
## 35 non-compiled lapply(n)
## 36 non-compiled vapply(n)
## 37     compiled    for(n)
## 38     compiled lapply(n)
## 39     compiled vapply(n)
## 40 non-compiled    for(n)
## 41 non-compiled lapply(n)
## 42 non-compiled vapply(n)

# Plot
library(ggplot2)
library(scales)
xlog <- scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", 
    math_format(10^.x)))
ylog <- scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", 
    math_format(10^.x)))

p <- ggplot(res_summary, aes(x = size, y = m_time, linetype = compiled, colour = type)) + 
    geom_line() + geom_point()

# Show with linear scale: large values are more expensive per cycle
p + xlab("size of data") + ylab("total time (ns)")

plot of chunk unnamed-chunk-1


# Show with log-log scale
p + xlog + ylog + xlab("size of data (log)") + ylab("total time (ns) (log)")

plot of chunk unnamed-chunk-1


# Time per iteration on y, log(size) on x
ggplot(res_summary, aes(x = size, y = iter_time, linetype = compiled, colour = type)) + 
    geom_line() + geom_point() + xlog + xlab("size of data (log)") + ylab("Time per iteration (ns)")

plot of chunk unnamed-chunk-1