suppressPackageStartupMessages(
{ library(flowCore)
library(flowWorkspace)
library(CytoML)
library(microbenchmark)
library(ggcyto)
})
## Warning: replacing previous import 'ncdfFlow::filter' by 'dplyr::filter' when
## loading 'flowWorkspace'
on-disk idx is turned off by default
use_on_disk_idx()
## [1] FALSE
parse flowjo workspace
dataDir <- system.file("extdata",package="flowWorkspaceData")
wsfile <- list.files(dataDir, pattern="manual.xml",full=TRUE)
ws <- open_flowjo_xml(wsfile);
parse it with in-mem idx
# set_log_level("Gate")
system.time(gs <- flowjo_to_gatingset(ws, path = dataDir, name = 4, subset = 1))
## user system elapsed
## 0.308 0.032 0.339
gh <- gs[[1]]
gh_idx_get_uri(gh)
## [1] ""
parse it with on-disk idx
save gs
during saving, the in-mem idx cache is flushed to on-disk idx
tmp_mem <- tempfile()
tmp_ondisk <- tempfile()
system.time(save_gs(gs, tmp_mem))
## user system elapsed
## 0.013 0.004 0.028
system.time(save_gs(gs2, tmp_ondisk))
## user system elapsed
## 0.023 0.000 0.027
system(paste0("du ", tmp_mem, "/* -ch "))
system(paste0("du ", tmp_ondisk, "/* -ch "))
load gs
smaller pb thus loading faster
microbenchmark(gs <- load_gs(tmp_mem), gs2 <- load_gs(tmp_ondisk), times = 2)
## Unit: milliseconds
## expr min lq mean median uq
## gs <- load_gs(tmp_mem) 6.416442 6.416442 9.259159 9.259159 12.101877
## gs2 <- load_gs(tmp_ondisk) 2.569334 2.569334 2.576967 2.576967 2.584599
## max neval
## 12.101877 2
## 2.584599 2
gh <- gs[[1]]
gh2 <- gs2[[1]]
retrieving idx
traverse_idx <- function(gh) {
for(i in gh_get_pop_paths(gh))
a <- gh_pop_get_indices(gh, i)
}
on-disk idx is only loaded on-demand, thus initial read is a little slower
microbenchmark(traverse_idx(gh), traverse_idx(gh2), times = 1)
## Unit: milliseconds
## expr min lq mean median uq max neval
## traverse_idx(gh) 8.94083 8.94083 8.94083 8.94083 8.94083 8.94083 1
## traverse_idx(gh2) 21.52728 21.52728 21.52728 21.52728 21.52728 21.52728 1