The idea of this paper to compare the performance of the conventional aprroaches to handle rows and columns (dataframes) vs. faster ones.
rm(list=ls())
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842438 45.0 2613377 139.6 9493837 507.1
Vcells 5475159 41.8 18260854 139.4 255390158 1948.5
df_size<-10000
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
df<-rbind(df,c(1043,1004545,78788))
}
print(proc.time()-start)
user system elapsed
9.64 0.01 10.96
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
df<-do.call("rbind",list(df,c(1043,1004545,78788)))
#print(i)
}
print(proc.time()-start)
user system elapsed
9.74 0.01 10.93
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
df<-data.frame(x=numeric(df_size),y=numeric(df_size),z=numeric(df_size))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
df<-rbind(df,c(1043,1004545,78788))
#print(i)
}
print(proc.time()-start)
user system elapsed
29.04 0.01 30.73
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
df<-rbind(df,data.frame(x=1043,y=1004545,z=078788))
#print(i)
}
print(proc.time()-start)
user system elapsed
17.40 0.02 19.57
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
df<-data.frame(x=numeric(df_size),y=numeric(df_size),z=numeric(df_size))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
#df<-rbind(df,c(1043,1004545,78788))
df[i,1]<-1043
df[i,2]<-1004545
df[i,3]<-78788
#print(i)
}
print(proc.time()-start)
user system elapsed
3.48 0.02 6.21
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
start<-proc.time()
i<-1
for(i in 1:df_size){
#df<-rbind(df,c(1043,1004545,78788))
df[i,1]<-1043
df[i,2]<-1004545
df[i,3]<-78788
#print(i)
}
print(proc.time()-start)
user system elapsed
7.97 0.02 13.56
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842453 45.0 2613377 139.6 9493837 507.1
Vcells 5475184 41.8 18260854 139.4 255390158 1948.5
The example is based upon here
library(data.table)
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
df_row<-data.frame(x=1043,y=1004545,z=078788)
start<-proc.time()
i<-1
for(i in 1:df_size){
df <- rbindlist(list(df,df_row))
}
df<-as.data.frame(df)
print(proc.time()-start)
user system elapsed
2.94 0.01 4.37
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 843006 45.1 2613377 139.6 9493837 507.1
Vcells 5699949 43.5 18260854 139.4 255390158 1948.5
The example is based upon here
df_row<-data.frame(x=1043,y=1004545,z=078788)
start<-proc.time()
df<-data.frame(rbindlist(lapply(1:df_size,function(x) df_row)))
df<-as.data.frame(df)
print(proc.time()-start)
user system elapsed
0.02 0.02 0.16
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842476 45.0 2613377 139.6 9493837 507.1
Vcells 5479363 41.9 18260854 139.4 255390158 1948.5
library(plyr)
df<-data.frame(x=numeric(0),y=numeric(0),z=numeric(0))
#to build a dataframe
start<-proc.time()
i<-1
for(i in 1:df_size){
df<-rbind.fill(df,data.frame(x=1043,y=1004545,z=078788))
#print(i)
}
print(proc.time()-start)
user system elapsed
20.61 0.06 26.91
rm(df)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 842463 45.0 2613377 139.6 9493837 507.1
Vcells 5475196 41.8 18260854 139.4 255390158 1948.5