suppressPackageStartupMessages(
  { library(flowCore)
    library(flowWorkspace)
    library(microbenchmark)
  })
## Warning: replacing previous import 'ncdfFlow::filter' by 'dplyr::filter' when
## loading 'flowWorkspace'

simulate a big gs ## in-mem idx

data("GvHD")
fs <- GvHD[1:2]
use_on_disk_idx(F)
## [1] FALSE
gs <- gen_big_tree(fs)
gh_idx_get_uri(gs[[1]])
## [1] ""

on-disk idx

use_on_disk_idx(T)
## [1] TRUE
gs2 <- gen_big_tree(fs)
length(gs_get_pop_paths(gs2))
## [1] 2381
gh_idx_get_uri(gs2[[1]])
## [1] "/tmp/71cb469b-3617-4435-8959-ccc367825540.idx"

gating(i.e. recompute) is comparable due to cached idx

microbenchmark(recompute(gs), recompute(gs2), times = 1)
## Unit: milliseconds
##            expr      min       lq     mean   median       uq      max neval
##   recompute(gs) 384.2313 384.2313 384.2313 384.2313 384.2313 384.2313     1
##  recompute(gs2) 378.6411 378.6411 378.6411 378.6411 378.6411 378.6411     1

smaller pb files

tmp_mem <- tempfile()
tmp_ondisk <- tempfile()
system.time(save_gs(gs, tmp_mem))
##    user  system elapsed 
##   0.105   0.000   0.123
system.time(save_gs(gs2, tmp_ondisk))
##    user  system elapsed 
##   0.094   0.005   0.109
system(paste0("du ", tmp_mem, "/* -ch "), intern = T)
## [1] "4.0K\t/tmp/Rtmptnzlo8/file45f261ae2521/71591ede-5867-4e4d-87b6-18dbd1280ece.gs"
## [2] "140K\t/tmp/Rtmptnzlo8/file45f261ae2521/s5a01.h5"                               
## [3] "1.3M\t/tmp/Rtmptnzlo8/file45f261ae2521/s5a01.pb"                               
## [4] "140K\t/tmp/Rtmptnzlo8/file45f261ae2521/s5a02.h5"                               
## [5] "1.3M\t/tmp/Rtmptnzlo8/file45f261ae2521/s5a02.pb"                               
## [6] "2.8M\ttotal"
system(paste0("du ", tmp_ondisk, "/* -ch "), intern = T)
##  [1] "4.0K\t/tmp/Rtmptnzlo8/file45f24974ada/28c30897-b7c0-45b8-9efa-225bdafda752.gs"
##  [2] "140K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a01.h5"                               
##  [3] "28K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a01.idx/__meta"                        
##  [4] "36K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a01.idx"                               
##  [5] "220K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a01.pb"                               
##  [6] "140K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a02.h5"                               
##  [7] "24K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a02.idx/__meta"                        
##  [8] "32K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a02.idx"                               
##  [9] "220K\t/tmp/Rtmptnzlo8/file45f24974ada/s5a02.pb"                               
## [10] "792K\ttotal"

faster gs loading

microbenchmark(gs <- load_gs(tmp_mem), gs2 <- load_gs(tmp_ondisk), times = 2)
## Unit: milliseconds
##                        expr      min       lq     mean   median       uq
##      gs <- load_gs(tmp_mem) 64.89984 64.89984 66.68586 66.68586 68.47187
##  gs2 <- load_gs(tmp_ondisk) 28.68195 28.68195 29.63268 29.63268 30.58341
##       max neval
##  68.47187     2
##  30.58341     2

slower(moderate) initial idx reading

traverse_idx <- function(gh) {
  for(i in gh_get_pop_paths(gh))
    a <- gh_pop_get_indices(gh, i)
}
gh <- gs[[1]]
gh2 <- gs2[[1]]
microbenchmark(traverse_idx(gh), traverse_idx(gh2), times = 1)
## Unit: milliseconds
##               expr      min       lq     mean   median       uq      max neval
##   traverse_idx(gh) 423.1465 423.1465 423.1465 423.1465 423.1465 423.1465     1
##  traverse_idx(gh2) 475.1344 475.1344 475.1344 475.1344 475.1344 475.1344     1

comparable subsequent reading (due to the cache)

microbenchmark(traverse_idx(gh), traverse_idx(gh2), times = 1)
## Unit: milliseconds
##               expr      min       lq     mean   median       uq      max neval
##   traverse_idx(gh) 420.4788 420.4788 420.4788 420.4788 420.4788 420.4788     1
##  traverse_idx(gh2) 416.5524 416.5524 416.5524 416.5524 416.5524 416.5524     1