library(plyr)
library(ggplot2)

rm(list = ls())

The call that is measured will process data rowwise and combine using rbind.fill. The data frame contains a single factor with one row per level. Garbage collection and one warm-up run is performed before.

t <- function(N) {
    gc()
    data <- data.frame(a = factor(1:N))
    system.time(a_ply(data, 1, force))
    system.time(a_ply(data, 1, force))
}

The measurement is performed for the following numbers of levels:

(N <- seq(from = 400, to = 8000, by = 400))
##  [1]  400  800 1200 1600 2000 2400 2800 3200 3600 4000 4400 4800 5200 5600
## [15] 6000 6400 6800 7200 7600 8000

Analysis of the results is below:

timing <- adply(data.frame(N = N), 1, function(N) {
    t(N[1, ])
})

timing
##       N user.self sys.self elapsed user.child sys.child
## 1   400     0.072    0.008   0.081          0         0
## 2   800     0.160    0.000   0.159          0         0
## 3  1200     0.344    0.004   0.351          0         0
## 4  1600     0.684    0.000   0.686          0         0
## 5  2000     0.668    0.012   0.681          0         0
## 6  2400     0.960    0.020   0.983          0         0
## 7  2800     0.968    0.020   0.993          0         0
## 8  3200     1.188    0.016   1.208          0         0
## 9  3600     1.856    0.024   1.883          0         0
## 10 4000     1.364    0.048   1.414          0         0
## 11 4400     1.508    0.020   1.533          0         0
## 12 4800     1.661    0.040   1.706          0         0
## 13 5200     1.944    0.064   2.016          0         0
## 14 5600     2.036    0.084   2.124          0         0
## 15 6000     2.345    0.084   2.435          0         0
## 16 6400     2.376    0.096   2.479          0         0
## 17 6800     2.640    0.100   2.741          0         0
## 18 7200     2.896    0.132   3.037          0         0
## 19 7600     3.488    0.132   3.627          0         0
## 20 8000     3.032    0.136   3.175          0         0

timing$kN <- timing$N/1000
lm(user.self ~ kN + I(kN^2), timing)
## 
## Call:
## lm(formula = user.self ~ kN + I(kN^2), data = timing)
## 
## Coefficients:
## (Intercept)           kN      I(kN^2)  
##    -0.02812      0.35082      0.00715

ggplot(timing) + geom_point(aes(x = kN, y = user.self))

plot of chunk unnamed-chunk-4

In a linear process, the points should lie on a horizontal line in this plot:

ggplot(timing) + geom_point(aes(x = kN, y = kN/user.self))

plot of chunk unnamed-chunk-5


sessionInfo()
## R version 3.0.3 (2014-03-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_0.9.3.1.99 plyr_1.8.1         knitr_1.5.5       
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-1   dichromat_2.0-0    digest_0.6.3      
##  [4] evaluate_0.5.1     formatR_0.9        grid_3.0.3        
##  [7] gtable_0.1.2       labeling_0.1       MASS_7.3-29       
## [10] munsell_0.4        proto_0.3-10       RColorBrewer_1.0-5
## [13] Rcpp_0.11.0        reshape2_1.2.2     scales_0.2.3      
## [16] stringr_0.6.2      tools_3.0.3