library(rhdf5)
library(TENxGenomics)
library(microbenchmark)
orig <- "/loc/no-backup/mike/1M_neurons_neuron20k.h5"
tenx <- TENxGenomics(orig)
tenx
## class: TENxGenomics
## h5path: /loc/no-backup/mike/1M_neurons_neuron20k.h5
## dim(): 27998 x 20000
data <- as.matrix(tenx)
r <- nrow(data)
c <- ncol(data)
compressed <- "/loc/no-backup/mike/compressed.h5"
h5createFile(compressed)
h5createGroup(compressed, "mm10")
uncompressed <- "/loc/no-backup/mike/uncompressed.h5"
h5createFile(uncompressed)
h5createGroup(uncompressed, "mm10")
##write the data
h5createDataset(compressed, "/mm10/data", c(r,c), storage.mode = "integer", chunk=c(r,1), level=7)
h5write(data, compressed, "/mm10/data")
h5createDataset(uncompressed, "/mm10/data", c(r,c), storage.mode = "integer", chunk=c(r,1), level=0)
h5write(data, uncompressed, "/mm10/data")
barcodes <- h5read(orig, "/mm10/barcodes")
h5write(barcodes, compressed, "/mm10/barcodes", level = 1)
h5write(barcodes, uncompressed, "/mm10/barcodes", level = 1)
genes <- h5read(orig, "/mm10/genes")
h5write(genes, compressed, "/mm10/genes", level = 1)
h5write(genes, uncompressed, "/mm10/genes", level = 1)
gene_names <- h5read(orig, "/mm10/gene_names")
h5write(barcodes, compressed, "/mm10/gene_names", level = 1)
h5write(barcodes, uncompressed, "/mm10/gene_names", level = 1)
file.size(orig)/2^20
## [1] 58.83689
file.size(compressed)/2^20
## [1] 72.57142
file.size(uncompressed)/2^20
## [1] 2136.436
microbenchmark(sub <- as.matrix(tenx[, 1:100])
,sub1 <- h5read(compressed, "/mm10/data", index=list(NULL,1:100))
,sub2 <- h5read(uncompressed, "/mm10/data", index=list(NULL,1:100))
, times = 10)
## Unit: milliseconds
## expr
## sub <- as.matrix(tenx[, 1:100])
## sub1 <- h5read(compressed, "/mm10/data", index = list(NULL, 1:100))
## sub2 <- h5read(uncompressed, "/mm10/data", index = list(NULL, 1:100))
## min lq mean median uq max neval
## 168.37174 174.80739 189.26857 186.95237 195.01344 233.33392 10
## 48.52326 49.07636 56.72171 56.61076 59.46967 73.10114 10
## 33.59638 40.24370 48.15406 45.05596 49.69418 76.76322 10
all.equal(sub, sub1, check.attributes = FALSE)
## [1] TRUE
all.equal(sub, sub2, check.attributes = FALSE)
## [1] TRUE
set.seed(4)
rindx <- sample(r, 10)
cindx <- sample(c, 100)
microbenchmark(randomM <- as.matrix(tenx[rindx, cindx])
,randomM1 <- h5read(compressed, "/mm10/data", index=list(rindx, cindx))
,randomM2 <- h5read(uncompressed, "/mm10/data", index=list(rindx, cindx))
, times = 10)
## Unit: milliseconds
## expr
## randomM <- as.matrix(tenx[rindx, cindx])
## randomM1 <- h5read(compressed, "/mm10/data", index = list(rindx, cindx))
## randomM2 <- h5read(uncompressed, "/mm10/data", index = list(rindx, cindx))
## min lq mean median uq max neval
## 194.10781 194.47665 201.73349 197.33004 200.44720 225.11127 10
## 30.63320 31.10219 32.53921 31.49208 33.47150 38.84446 10
## 10.25918 10.75929 16.74423 11.24669 11.94802 66.75384 10
all.equal(randomM, randomM1, check.attributes = FALSE)
## [1] TRUE
all.equal(randomM, randomM2, check.attributes = FALSE)
## [1] TRUE
H5close()