suppressPackageStartupMessages(devtools::load_all())
## Loading flowWorkspace

load gs from local archive

gs <- load_gs(list.files(dataDir, pattern = "gs_manual",full = TRUE))

timing h5 IO

cs <- gs_pop_get_data(gs)
cf <- get_cytoframe_from_cs(cs, 1)

h5 path show it is local

cf_get_h5_file_path(cf)
## [1] "/media/wjiang2/real_home/wjiang2/mylib/R-devel-build/library/flowWorkspaceData/extdata/gs_manual/CytoTrol_CytoTrol_1.fcs.h5"

everything is fast since it is local

system.time(colnames(cf))
##    user  system elapsed 
##   0.007   0.000   0.007
system.time(exprs(cf[, 1:2]))
##    user  system elapsed 
##   0.012   0.000   0.012
system.time(exprs(cf))
##    user  system elapsed 
##   0.010   0.008   0.018

save_gs from local to remote

Check bucket before saving

url <- "s3://mike-h5/test"
cred <- check_credential(NULL)
reg <- cred$AWS_REGION
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
## [1] Key          LastModified Size         Bucket      
## <0 rows> (or 0-length row.names)
save_gs(gs, url)
## Uploading gs ...
## Uploading 63c60e66-779b-4712-ad6e-b9349b965401.gs
## Uploading CytoTrol_CytoTrol_1.fcs.pb
## Uploading CytoTrol_CytoTrol_1.fcs.h5
## Done
## To reload it, use 'load_gs' function
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
##                                            Key             LastModified    Size
## 1 test/63c60e66-779b-4712-ad6e-b9349b965401.gs 2020-03-30T18:29:00.000Z      90
## 2              test/CytoTrol_CytoTrol_1.fcs.h5 2020-03-30T18:29:03.000Z 5778880
## 3              test/CytoTrol_CytoTrol_1.fcs.pb 2020-03-30T18:29:01.000Z  247118
##    Bucket
## 1 mike-h5
## 2 mike-h5
## 3 mike-h5

load gs from remote without downloading h5

gs <- load_gs(url)
## downloading test/63c60e66-779b-4712-ad6e-b9349b965401.gs ...
## downloading test/CytoTrol_CytoTrol_1.fcs.pb ...

timing h5 IO

cs <- gs_pop_get_data(gs)
cf <- get_cytoframe_from_cs(cs, 1)

h5 path shows it is remote

cf_get_h5_file_path(cf)
## [1] "https://mike-h5.s3.amazonaws.com/test/CytoTrol_CytoTrol_1.fcs.h5"

This is fast since meta is already in memory

system.time(colnames(cf))
##    user  system elapsed 
##   0.003   0.000   0.002

partial IO from remote h5 (takes longer than local)

system.time(exprs(cf[, 1:2]))
##    user  system elapsed 
##   0.081   0.024   1.084

load more data from remote (even slower)

system.time(exprs(cf))
##    user  system elapsed 
##   0.269   0.123   2.838

delete the remote/local archive

delete_gs(url)
## s3://mike-h5/test is deleted
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
## [1] Key          LastModified Size         Bucket      
## <0 rows> (or 0-length row.names)