library(plyr)
library(ggplot2)

rm(list = ls())

The call that is measured will process data rowwise and combine using rbind.fill. The data frame contains a single factor with one row per level. Garbage collection and one warm-up run is performed before.

t <- function(N) {
    gc()
    data <- data.frame(a = factor(1:N))
    system.time(adply(data, 1))
    system.time(adply(data, 1))
}

The measurement is performed for the following numbers of levels:

(N <- seq(from = 400, to = 8000, by = 400))
##  [1]  400  800 1200 1600 2000 2400 2800 3200 3600 4000 4400 4800 5200 5600
## [15] 6000 6400 6800 7200 7600 8000

Analysis of the results is below:

timing <- adply(data.frame(N = N), 1, function(N) {
    t(N[1, ])
})

timing
##       N user.self sys.self elapsed user.child sys.child
## 1   400     0.264    0.000   0.263          0         0
## 2   800     1.576    0.008   1.589          0         0
## 3  1200     3.056    0.008   3.073          0         0
## 4  1600     6.168    0.044   6.231          0         0
## 5  2000    11.697    0.008  11.744          0         0
## 6  2400    21.529    0.104  21.692          0         0
## 7  2800    22.217    0.052  22.332          0         0
## 8  3200    34.050    0.100  34.248          0         0
## 9  3600    36.746    0.056  36.915          0         0
## 10 4000    50.627    0.044  50.854          0         0
## 11 4400    67.208    0.300  67.721          0         0
## 12 4800    72.496    0.140  72.871          0         0
## 13 5200    68.925    0.280  69.384          0         0
## 14 5600   113.175    0.152 113.752          0         0
## 15 6000    85.709    0.632  86.748          0         0
## 16 6400   122.148    0.364 122.918          0         0
## 17 6800   160.610    0.272 161.380          0         0
## 18 7200   139.176    1.064 140.746          0         0
## 19 7600   162.114    0.692 163.488          0         0
## 20 8000   189.640    0.632 190.835          0         0

timing$kN <- timing$N/1000
lm(user.self ~ kN + I(kN^2), timing)
## 
## Call:
## lm(formula = user.self ~ kN + I(kN^2), data = timing)
## 
## Coefficients:
## (Intercept)           kN      I(kN^2)  
##       -3.04         2.84         2.59

ggplot(timing) + geom_point(aes(x = kN, y = user.self))

plot of chunk unnamed-chunk-4

In a linear process, the points should lie on a horizontal line in this plot:

ggplot(timing) + geom_point(aes(x = kN, y = kN/user.self))

plot of chunk unnamed-chunk-5


sessionInfo()
## R version 3.0.3 (2014-03-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_0.9.3.1.99 plyr_1.8.1         knitr_1.5.5       
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-1   dichromat_2.0-0    digest_0.6.3      
##  [4] evaluate_0.5.1     formatR_0.9        grid_3.0.3        
##  [7] gtable_0.1.2       labeling_0.1       MASS_7.3-29       
## [10] munsell_0.4        proto_0.3-10       RColorBrewer_1.0-5
## [13] Rcpp_0.11.0        reshape2_1.2.2     scales_0.2.3      
## [16] stringr_0.6.2      tools_3.0.3