library(plyr)
library(ggplot2)

rm(list = ls())

The call that is measured will process data rowwise and combine using rbind.fill. The data frame contains a single factor with one row per level. Garbage collection and one warm-up run is performed before.

t <- function(N) {
    gc()
    data <- data.frame(a = factor(1:N))
    system.time(a_ply(data, 1, identity))
    system.time(a_ply(data, 1, identity))
}

The measurement is performed for the following numbers of levels:

(N <- seq(from = 400, to = 8000, by = 400))
##  [1]  400  800 1200 1600 2000 2400 2800 3200 3600 4000 4400 4800 5200 5600
## [15] 6000 6400 6800 7200 7600 8000

Analysis of the results is below:

timing <- adply(data.frame(N = N), 1, function(N) {
    t(N[1, ])
})

timing
##       N user.self sys.self elapsed user.child sys.child
## 1   400     0.072    0.004   0.080          0         0
## 2   800     0.184    0.000   0.186          0         0
## 3  1200     0.284    0.000   0.283          0         0
## 4  1600     0.668    0.008   0.677          0         0
## 5  2000     0.708    0.004   0.715          0         0
## 6  2400     0.908    0.008   0.918          0         0
## 7  2800     1.020    0.020   1.043          0         0
## 8  3200     1.152    0.028   1.184          0         0
## 9  3600     1.264    0.032   1.300          0         0
## 10 4000     1.436    0.020   1.461          0         0
## 11 4400     1.564    0.040   1.609          0         0
## 12 4800     1.712    0.064   1.781          0         0
## 13 5200     2.044    0.044   2.095          0         0
## 14 5600     2.024    0.108   2.134          0         0
## 15 6000     2.232    0.084   2.319          0         0
## 16 6400     2.372    0.136   2.512          0         0
## 17 6800     2.588    0.136   2.732          0         0
## 18 7200     2.933    0.156   3.093          0         0
## 19 7600     3.032    0.120   3.157          0         0
## 20 8000     3.149    0.120   3.277          0         0

timing$kN <- timing$N/1000
lm(user.self ~ kN + I(kN^2), timing)
## 
## Call:
## lm(formula = user.self ~ kN + I(kN^2), data = timing)
## 
## Coefficients:
## (Intercept)           kN      I(kN^2)  
##     -0.0370       0.3415       0.0074

ggplot(timing) + geom_point(aes(x = kN, y = user.self))

plot of chunk unnamed-chunk-4

In a linear process, the points should lie on a horizontal line in this plot:

ggplot(timing) + geom_point(aes(x = kN, y = kN/user.self))

plot of chunk unnamed-chunk-5


sessionInfo()
## R version 3.0.3 (2014-03-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_0.9.3.1.99 plyr_1.8.1         knitr_1.5.5       
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-1   dichromat_2.0-0    digest_0.6.3      
##  [4] evaluate_0.5.1     formatR_0.9        grid_3.0.3        
##  [7] gtable_0.1.2       labeling_0.1       MASS_7.3-29       
## [10] munsell_0.4        proto_0.3-10       RColorBrewer_1.0-5
## [13] Rcpp_0.11.0        reshape2_1.2.2     scales_0.2.3      
## [16] stringr_0.6.2      tools_3.0.3