Create different backends (locally)
cf.mem <- load_cytoframe_from_fcs("~/rglab/workspace/flowWorkspaceData/inst/extdata/CytoTrol_CytoTrol_1.fcs")
dim(cf.mem)
## events parameters
## 119531 12
h5_file <- tempfile(fileext = ".h5")
cf_write_h5(cf.mem, h5_file)
tile_dir <- tempfile(fileext = ".tile")
cf_write_tile(cf.mem, tile_dir)
Compare size
utils:::format.object_size(file.size(h5_file), "auto")
## [1] "5.5 Mb"
strsplit(system(paste0("du -sh ", tile_dir), intern = T), "\t")[[1]][1]
## [1] "5.6M"
Benchmark the random slicing
set.seed(1)
cidx <- c(1,5,8)
ridx <- sample(nrow(cf.h5), 1e3)
subset by channels
microbenchmark(a <- exprs(cf.mem[, cidx])
, b <- exprs(cf.h5[, cidx])
, c <- exprs(cf.tile[, cidx])
, times = 5
)
## Unit: milliseconds
## expr min lq mean median uq
## a <- exprs(cf.mem[, cidx]) 4.013820 4.109259 4.570120 4.228480 4.705207
## b <- exprs(cf.h5[, cidx]) 4.976243 5.425548 6.077632 5.925931 6.399990
## c <- exprs(cf.tile[, cidx]) 7.032783 8.472395 11.789017 9.055889 10.559067
## max neval
## 5.793835 5
## 7.660447 5
## 23.824953 5
all.equal(a,b,c)
## [1] TRUE
subset by cols & rows
microbenchmark(a <- exprs(cf.mem[ridx, cidx])
, b <- exprs(cf.h5[ridx, cidx])
, c <- exprs(cf.tile[ridx, cidx])
, times = 5
)
## Unit: milliseconds
## expr min lq mean median
## a <- exprs(cf.mem[ridx, cidx]) 2.490350 2.775796 3.160115 2.894229
## b <- exprs(cf.h5[ridx, cidx]) 4.100077 4.548739 5.087715 5.106019
## c <- exprs(cf.tile[ridx, cidx]) 12.394133 13.538592 14.282130 13.745210
## uq max neval
## 2.918879 4.721321 5
## 5.335178 6.348561 5
## 14.811257 16.921458 5
all.equal(a,b,c)
## [1] TRUE
Compare remote storage
bucket <- "mike-h5"
Manually upload h5 to s3 using aws.s3 pkg since H5 doesnโt support s3 write yet
library(aws.s3)
cred <- check_credential(NULL)
uri.h5 <- paste0("s3://", bucket, "/", basename(h5_file))
put_object(h5_file, uri.h5
, region = cred$AWS_REGION
, key = cred$AWS_ACCESS_KEY_ID
, secret = cred$AWS_SECRET_ACCESS_KEY
)
## [1] TRUE
tiledb can directly write to s3
uri.tile <- paste0("s3://", bucket, "/", basename(tile_dir))
cf_write_tile(cf.mem, uri.tile)
load cf remotely
microbenchmark(cf.h5 <- load_cytoframe(uri.h5)
,cf.tile <- load_cytoframe(uri.tile)
, times = 3)
## Unit: seconds
## expr min lq mean median
## cf.h5 <- load_cytoframe(uri.h5) 1.208530 1.273865 1.324207 1.339200
## cf.tile <- load_cytoframe(uri.tile) 2.076306 2.095645 2.120654 2.114984
## uq max neval
## 1.382045 1.424890 3
## 2.142828 2.170672 3
cf_get_uri(cf.h5)
## [1] "https://mike-h5.s3.amazonaws.com/file16fa1b544846.h5"
cf_get_uri(cf.tile)
## [1] "s3://mike-h5/file16fa35e10fc7.tile"
subset by channels
microbenchmark( b <- exprs(cf.h5[, cidx])
, c <- exprs(cf.tile[, cidx])
, times = 3
)
## Unit: milliseconds
## expr min lq mean median uq
## b <- exprs(cf.h5[, cidx]) 1216.7192 1238.4951 1280.3548 1260.2710 1312.173
## c <- exprs(cf.tile[, cidx]) 439.6784 449.5028 942.0454 459.3271 1193.229
## max neval
## 1364.074 3
## 1927.131 3
all.equal(b,c, check.attributes = F)
## [1] TRUE
subset by cols & rows
microbenchmark(b <- exprs(cf.h5[ridx, cidx])
, c <- exprs(cf.tile[ridx, cidx])
, times = 3
)
## Unit: milliseconds
## expr min lq mean median
## b <- exprs(cf.h5[ridx, cidx]) 1231.2594 1231.6869 1237.1049 1232.1143
## c <- exprs(cf.tile[ridx, cidx]) 480.5955 495.3953 552.6094 510.1951
## uq max neval
## 1240.0277 1247.9411 3
## 588.6164 667.0377 3
all.equal(b,c, check.attributes = F)
## [1] TRUE
clean up
cf_cleanup(cf.h5)
## https://mike-h5.s3.amazonaws.com/file16fa1b544846.h5 is deleted!
cf_cleanup(cf.tile)
## s3://mike-h5/file16fa35e10fc7.tile is deleted!