import
methodZarrExperiment 0.0.11
The Open Storage Network has allowed the Bioconductor project expanded
offerings to cloud data. We demonstrate the usage of the RemoteZarr
class to access Tissue MicroArray (TMA) data, one ‘core’ at a time.
library(ZarrExperiment)
https://mghp.osn.xsede.org/bir190004-bucket01/index.html#TMA11/zarr/
There are about 123 cores within the OSN storage location above. Cores are
numerically labeled e.g., 1.zarr
corresponds to core number one.
showClass("ZarrRemote")
## Class "ZarrRemote" [package "ZarrExperiment"]
##
## Slots:
##
## Name: endpoint bucket resource
## Class: character character character_OR_connection
##
## Extends:
## Class "ZarrArchive", directly
## Class "BiocFile", by class "ZarrArchive", distance 2
The endpoint
corresponds to the OSN website and the bucket
input corresponds
to the bucket name and subfolders where the .zarr
archives can be located.
zr <- ZarrRemote(
endpoint = "https://mghp.osn.xsede.org/",
bucket = "bir190004-bucket01/TMA11/zarr/"
)
zr
## class: ZarrRemote
## endpoint: https://mghp.osn.xsede.org/
## bucket: bir190004-bucket01/TMA11/zarr/
## ├── 1.zarr
## ├── 10.zarr
## ├── ...
## ├── 98.zarr
## └── 99.zarr
A URL may also be used as the resource
input. Both the endpoint
and the
bucket
location will be deduced when possible.
zr <- ZarrRemote(
resource = "https://mghp.osn.xsede.org/bir190004-bucket01/TMA11/zarr/"
)
zr
## class: ZarrRemote
## endpoint: https://mghp.osn.xsede.org/
## bucket: bir190004-bucket01/TMA11/zarr/
## ├── 1.zarr
## ├── 10.zarr
## ├── ...
## ├── 98.zarr
## └── 99.zarr
Currently, data retrieval is possible on a single directory via the import
method on the ZarrRemote
class.
import
methodThe import
method makes internal use of the s3fs
python module:
getMethod(import, "ZarrRemote")
## Method Definition:
##
## function (con, format, text, ...)
## {
## dots <- list(...)
## file <- dots[["filename"]]
## if (is.null(file) || !isScalarCharacter(file))
## stop("Provide a 'filename' input found in the bucket")
## fs <- .s3fs()$S3FileSystem(anon = TRUE, key = "dummy", secret = "dummy",
## client_kwargs = reticulate::dict(endpoint_url = con@endpoint))
## files <- fs$ls(con@bucket)
## if (!file %in% basename(files))
## stop("'filename': ", file, " not found")
## mapper <- fs$get_mapper(paste0(con@bucket, file))
## .zarr()$load(mapper)
## }
## <bytecode: 0x559bae3b3f98>
## <environment: namespace:ZarrExperiment>
##
## Signatures:
## con format text
## target "ZarrRemote" "ANY" "ANY"
## defined "ZarrRemote" "ANY" "ANY"
This operation takes a few minutes to complete and produces approximately a 2GB array.
c99 <- import(zr, filename = "99.zarr")
par(mfrow = c(2, 2))
for (channel in c(4, 5, 10, 17))
image(
c99[channel, , ],
main = paste0("Channel No. ", as.character(channel)),
useRaster = TRUE
)
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ZarrExperiment_0.0.11 BiocStyle_2.25.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.9 highr_0.9
## [3] XVector_0.37.0 bslib_0.4.0
## [5] compiler_4.2.1 BiocManager_1.30.18
## [7] jquerylib_0.1.4 GenomeInfoDb_1.33.5
## [9] zlibbioc_1.43.0 MatrixGenerics_1.9.1
## [11] bitops_1.0-7 tools_4.2.1
## [13] SingleCellExperiment_1.19.0 digest_0.6.29
## [15] lattice_0.20-45 jsonlite_1.8.0
## [17] evaluate_0.16 png_0.1-7
## [19] rlang_1.0.4 Matrix_1.4-1
## [21] DelayedArray_0.23.1 cli_3.3.0
## [23] rstudioapi_0.13 yaml_2.3.5
## [25] xfun_0.32 fastmap_1.1.0
## [27] GenomeInfoDbData_1.2.8 stringr_1.4.0
## [29] knitr_1.39 sass_0.4.2
## [31] S4Vectors_0.35.1 BiocBaseUtils_0.99.9
## [33] IRanges_2.31.0 triebeard_0.3.0
## [35] grid_4.2.1 stats4_4.2.1
## [37] reticulate_1.25 Biobase_2.57.1
## [39] R6_2.5.1 rmarkdown_2.14
## [41] bookdown_0.28 magrittr_2.0.3
## [43] urltools_1.7.3 GenomicRanges_1.49.0
## [45] htmltools_0.5.3 matrixStats_0.62.0
## [47] BiocGenerics_0.43.1 SummarizedExperiment_1.27.1
## [49] stringi_1.7.8 RCurl_1.98-1.8
## [51] cachem_1.0.6 BiocIO_1.7.1