Setup
Ricardo's data generation as a function
makeSomeData <- function(nrow = 100, ncol = 100) {
set.seed(1)
LLLL <- apply(expand.grid(LETTERS, LETTERS[10:15], LETTERS[1:20], LETTERS[1:5],
stringsAsFactors = FALSE), 1, paste0, collapse = "")
size <- nrow
dateSamples <- nrow
startDate <- as.Date("1980-01-01")
Name <- apply(matrix(LLLL[1:(2 * size)], ncol = 2), 1, paste0, collapse = "")
Code <- sample(1000:max(10000 - 1, size + 1000), length(Name))
CURRENCY <- sample(c("USD", "EUR", "YEN"), length(Name), TRUE)
Dates <- seq(startDate, length.out = dateSamples, by = "mon")
Values <- sample(c(1:100, 1:500), size = size * dateSamples, TRUE)/100
data <- data.frame(Name, Code, CURRENCY, matrix(Values, ncol = length(Dates),
dimnames = list(c(), as.character(Dates))), check.names = FALSE)
data
}
The benchmarking commands
library(data.table)
library(rbenchmark)
DT <- quote({
nvc <- c("Name", "Code", "CURRENCY")
dateCols <- setdiff(names(data), nvc)
rbindlist(lapply(dateCols, function(d) {
dtt[, `:=`(Date, d)]
cols <- c(nvc, "Date", d)
setnames(dtt[, cols, with = FALSE], cols, c(nvc, "Date", "value"))
}))
})
Manual <- quote(data.frame(data[1:3], Date = rep(names(data)[-c(1, 2, 3)], each = nrow(data)),
value = as.vector(as.matrix(data[-c(1, 2, 3)]))))
Benchmarking
1000 x 8003
data <- makeSomeData(1000, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 2 1.00 Manual 0.248 0.236 0.008 1
## 1 6.54 DT 1.622 1.604 0.004 1
rm(data, dateCols, nvc, dtt)
1500 x 8003
data <- makeSomeData(1500, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 2 1.000 Manual 0.558 0.412 0.076 1
## 1 5.188 DT 2.895 2.620 0.024 1
rm(data, dateCols, nvc, dtt)
2000 x 8003
data <- makeSomeData(2000, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 2 1.000 Manual 1.025 0.696 0.096 1
## 1 3.636 DT 3.727 3.588 0.016 1
rm(data, dateCols, nvc, dtt)
2500 x 8003
data <- makeSomeData(2500, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 2 1.000 Manual 3.084 0.972 0.388 1
## 1 1.614 DT 4.977 4.729 0.060 1
rm(data, dateCols, nvc, dtt)
2750 x 8003
data <- makeSomeData(2750, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 1 1.00 DT 5.381 5.277 0.056 1
## 2 1.38 Manual 7.428 1.268 0.412 1
rm(data, dateCols, nvc, dtt)
3000 x 8003
data <- makeSomeData(3000, 8000)
dtt <- data.table(data)
suppressWarnings(benchmark(DT = eval(DT), Manual = eval(Manual), replications = 1,
columns = c("relative", "test", "elapsed", "user.self", "sys.self", "replications"),
order = "relative"))
## relative test elapsed user.self sys.self replications
## 1 1.000 DT 5.806 5.693 0.072 1
## 2 4.184 Manual 24.291 1.592 0.700 1
rm(data, dateCols, nvc, dtt)