suppressPackageStartupMessages(devtools::load_all())
## Loading flowWorkspace
load gs from local archive
gs <- load_gs(list.files(dataDir, pattern = "gs_manual",full = TRUE))
timing h5 IO
cs <- gs_pop_get_data(gs)
cf <- get_cytoframe_from_cs(cs, 1)
h5 path show it is local
cf_get_h5_file_path(cf)
## [1] "/media/wjiang2/real_home/wjiang2/mylib/R-devel-build/library/flowWorkspaceData/extdata/gs_manual/CytoTrol_CytoTrol_1.fcs.h5"
everything is fast since it is local
system.time(colnames(cf))
## user system elapsed
## 0.007 0.000 0.007
system.time(exprs(cf[, 1:2]))
## user system elapsed
## 0.012 0.000 0.012
system.time(exprs(cf))
## user system elapsed
## 0.010 0.008 0.018
save_gs from local to remote
Check bucket before saving
url <- "s3://mike-h5/test"
cred <- check_credential(NULL)
reg <- cred$AWS_REGION
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
## [1] Key LastModified Size Bucket
## <0 rows> (or 0-length row.names)
save_gs(gs, url)
## Uploading gs ...
## Uploading 63c60e66-779b-4712-ad6e-b9349b965401.gs
## Uploading CytoTrol_CytoTrol_1.fcs.pb
## Uploading CytoTrol_CytoTrol_1.fcs.h5
## Done
## To reload it, use 'load_gs' function
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
## Key LastModified Size
## 1 test/63c60e66-779b-4712-ad6e-b9349b965401.gs 2020-03-30T18:29:00.000Z 90
## 2 test/CytoTrol_CytoTrol_1.fcs.h5 2020-03-30T18:29:03.000Z 5778880
## 3 test/CytoTrol_CytoTrol_1.fcs.pb 2020-03-30T18:29:01.000Z 247118
## Bucket
## 1 mike-h5
## 2 mike-h5
## 3 mike-h5
load gs from remote without downloading h5
gs <- load_gs(url)
## downloading test/63c60e66-779b-4712-ad6e-b9349b965401.gs ...
## downloading test/CytoTrol_CytoTrol_1.fcs.pb ...
timing h5 IO
cs <- gs_pop_get_data(gs)
cf <- get_cytoframe_from_cs(cs, 1)
h5 path shows it is remote
cf_get_h5_file_path(cf)
## [1] "https://mike-h5.s3.amazonaws.com/test/CytoTrol_CytoTrol_1.fcs.h5"
partial IO from remote h5 (takes longer than local)
system.time(exprs(cf[, 1:2]))
## user system elapsed
## 0.081 0.024 1.084
load more data from remote (even slower)
system.time(exprs(cf))
## user system elapsed
## 0.269 0.123 2.838
delete the remote/local archive
delete_gs(url)
## s3://mike-h5/test is deleted
get_bucket_df(url, region = reg)[, c(1,2,4,8)]
## [1] Key LastModified Size Bucket
## <0 rows> (or 0-length row.names)