library(plyr)
library(data.table)
time.df <- data.frame()
for (i in c(20000, 50000, 1e+05, 2e+05, 3e+05, 4e+05, 5e+05, 6e+05)) {
# cat(i, '\n')
df <- data.frame(a = rep(c("A", "B", "C", "D", "E", "F"), i), b = sample(rnorm(i *
6), i * 6), c = sample(rnorm(i * 6), i * 6), d = sample(rnorm(i * 6),
i * 6))
split.df <- split(df, df$a)
t1 <- Sys.time()
df1 <- data.frame()
for (subdf in split.df) {
df1 <- rbind(df1, subdf)
}
t2 <- Sys.time()
t3 <- Sys.time()
df2 <- do.call("rbind", split.df)
t4 <- Sys.time()
t5 <- Sys.time()
df3 <- rbind.fill(split.df)
t6 <- Sys.time()
t7 <- Sys.time()
n <- sum(sapply(split.df, nrow))
dt <- data.table(a = vector("character", n), b = vector("numeric", n), c = vector("numeric",
n), d = vector("numeric", n))
start.index <- 1
for (subdf in split.df) {
end.index <- start.index + nrow(subdf) - 1
for (name in colnames(subdf)) {
dt[start.index:end.index, `:=`(eval(name), subdf[[name]])]
}
start.index <- end.index + 1
}
t8 <- Sys.time()
new.row <- data.frame(n = i * 6, classic = difftime(t2, t1), docall = difftime(t4,
t3), rbindfill = difftime(t6, t5), data.table = difftime(t8, t7))
time.df <- rbind(time.df, new.row)
}
time.df
## n classic docall rbindfill data.table
## 1 120000 0.5179 secs 0.2214 secs 0.1533 secs 0.1003 secs
## 2 300000 0.7703 secs 0.5925 secs 0.3121 secs 0.1683 secs
## 3 600000 1.1631 secs 1.2249 secs 0.5113 secs 0.3003 secs
## 4 1200000 2.3787 secs 3.0730 secs 1.0282 secs 0.5440 secs
## 5 1800000 3.2554 secs 3.5348 secs 1.2719 secs 0.7111 secs
## 6 2400000 4.3607 secs 5.0810 secs 1.4441 secs 0.9351 secs
## 7 3000000 4.7178 secs 5.9982 secs 2.5647 secs 0.9330 secs
## 8 3600000 5.4804 secs 7.3200 secs 2.2450 secs 1.3701 secs