library(HDF5Array)#must load it first to avoid namespace conflicting
library(DelayedArray)
library(mbenchmark)
path <- "/loc/no-backup/mike/shared/1M_neurons"
#h5
library(rhdf5)
h5.file <- file.path(path, "gz_chunk_by_gene_sub.h5")
hm = HDF5Array(h5.file, "data")
dims <- dim(hm)
dims
## [1] 10000 27998
nblocks <- 100

# h5.uncomp <- file.path(path, "uncompress_by_gene_sub.h5")
# hm.uncomp = HDF5Array(h5.uncomp, "data")
# h5.1k_by_1k <- file.path(path, "gz_1k_1k_sub.h5")
# hm.1k_by_1k = HDF5Array(h5.uncomp, "data")

#bigmemory
library(bigmemory)

bm.file <- file.path(path, "bm")
bm.desc <- file.path(path, "bm.desc")
# bm <- BigMatrix(nrow = dims[1], ncol = dims[2], backingfile = file.path(path, "bm"))
#
# i <- 1
# while(i < dims[2])
# {
#   j <- i + nblocks - 1
#   if(j > dims[2])
#     j <- dims[2]
#   bm[, i:j] <- as.matrix(hm[, i:j])
#   i <- j + 1
#   message(i)
# }
# all.equal(as.matrix(hm[,1:3]), as.matrix(bm[,1:3]))
# saveRDS(bm, file = file.path(path, "bmext"))
# bm <- readRDS(bm.file)
bm <- attach.big.matrix(bm.desc)
bmseed <- BMArraySeed(bm)
bm <- DelayedArray(bmseed)



library(ff)
ff.file <- file.path(path, "fm")
fm <- ff(vmode="double", dim=dims, filename = ff.file)
# i <- 1
# while(i < dims[2])
# {
#   j <- i + nblocks - 1
#   if(j > dims[2])
#     j <- dims[2]
#   fm[, i:j] <- as.matrix(hm[, i:j])
#   i <- j + 1
#   message(i)
# }
# all.equal(as.matrix(hm[,1:3]), as.matrix(fm[,1:3]))
fm <- DelayedArray(fm)

library(matter)
mm.file <- file.path(path, "mm")
mm <- matter_mat(nrow = dims[1], ncol = dims[2], paths = mm.file)
# i <- 1
# while(i < dims[2])
# {
#   j <- i + nblocks - 1
#   if(j > dims[2])
#     j <- dims[2]
#   mm[, i:j] <- as.matrix(hm[, i:j])
#   i <- j + 1
#   message(i)
# }
# all.equal(as.matrix(hm[,1:3]), as.matrix(mm[,1:3]))

mm <- DelayedArray(mm)


## Compare disk usage
mat.list <- list(bigmemory = bm, ff = fm, h5 = hm, matter = mm)

utils:::format.object_size(file.size(bm.file), units = "Mb")
## [1] "2136.1 Mb"
utils:::format.object_size(file.size(h5.file), units = "Mb")
## [1] "25.1 Mb"
# utils:::format.object_size(file.size(h5.uncomp), units = "Mb")
utils:::format.object_size(file.size(ff.file), units = "Mb")
## [1] "2136.1 Mb"
utils:::format.object_size(file.size(mm.file), units = "Mb")
## [1] "2136.1 Mb"
# ridx <- sample(1e4, 1e2)
# microbenchmark::microbenchmark(as.matrix(bm[ridx, ridx])
#                                ,as.matrix(hm.1k_by_1k[ridx, ridx])
#                                ,as.matrix(hm[ridx, ridx])
#                                # ,as.matrix(fm[ridx, ridx])
#                                # ,as.matrix(mm[ridx, ridx])
#                                , times = 3
#                                )
cache.file <- file.path(path, "mbenchres.csv")
suppressWarnings(res <- mbenchmark(mat.list, type = "subsetting"
                  , times = 5
                  , ubound = 0.1
                  , nsubset = 5
                  , shape = c(0.01, 0.5, 1, 2, 100)
                  # , shape = c(1)
                  , trace_mem = TRUE
                  , cache.file = cache.file
                  , verbose = F))


# res <- data.table::fread(file = cache.file)
# class(res) <- c("mbenchmark_subsetting", class(res))
autoplot(res) + scale_y_log10()

plot_mem(res, units = "Kb") + scale_y_log10()