library(microbenchmark)
library(Matrix)
library(data.table)
library(RSQLite)
library(rhdf5)
library(lz4)
library(ggplot2)
path <- "/loc/no-backup/mike/shared"
path <- file.path(path, "benchmark")
source("IO.R")
source("simulate_sparse_mat.R")

benchmark different formats

sparsity (zero-value rate)

dt[, unique(sparsity), by = list(nCells, beta_value)]
##     nCells             beta_value        V1
##  1:   1000 alpha = 0.1 beta = 0.8 0.9225260
##  2:   4000 alpha = 0.1 beta = 0.8 0.9243426
##  3:   7000 alpha = 0.1 beta = 0.8 0.9227970
##  4:  20000 alpha = 0.1 beta = 0.8 0.9229823
##  5:  50000 alpha = 0.1 beta = 0.8 0.9224632
##  6:   1000 alpha = 0.2 beta = 0.9 0.8730451
##  7:   4000 alpha = 0.2 beta = 0.9 0.8713663
##  8:   7000 alpha = 0.2 beta = 0.9 0.8717319
##  9:  20000 alpha = 0.2 beta = 0.9 0.8719348
## 10:  50000 alpha = 0.2 beta = 0.9 0.8712112
## 11:   1000 alpha = 0.3 beta = 0.7 0.7897353
## 12:   4000 alpha = 0.3 beta = 0.7 0.7927173
## 13:   7000 alpha = 0.3 beta = 0.7 0.7883399
## 14:  20000 alpha = 0.3 beta = 0.7 0.7883345
## 15:  50000 alpha = 0.3 beta = 0.7 0.7901626
## 16:   1000 alpha = 0.4 beta = 0.6 0.7195631
## 17:   4000 alpha = 0.4 beta = 0.6 0.7213056
## 18:   7000 alpha = 0.4 beta = 0.6 0.7199958
## 19:  20000 alpha = 0.4 beta = 0.6 0.7184199
## 20:  50000 alpha = 0.4 beta = 0.6 0.7190410

plot the result

ggplot(dt, aes(y = time, x = nCells, color = format)) + geom_line() + geom_point() + facet_grid(slicing~beta_value, scales ="free") + ylab("time (ms)") + scale_x_continuous(breaks = nCellsVec) + ggtitle("time") + scale_y_log10()
plot of chunk unnamed-chunk-4

plot of chunk unnamed-chunk-4

ggplot(dt[format != "H5(chunked.read)"], aes(x = nCells, y = size, color = format)) + geom_line() + geom_point() + facet_wrap(~beta_value) + ylab("size (GB)") + scale_x_continuous(breaks = nCellsVec) + ggtitle("space")
plot of chunk unnamed-chunk-4

plot of chunk unnamed-chunk-4