Sourcing HDF5 data in R

Chetan Kumar

Including Code to install and connect hdf5 with R

You can include R code in the document as follows:

# Use the following code
# Install BiocManager first
#install.packages("BiocManager")
#BiocManager::install()
#Now install the "rhdf5"
#BiocManager::install("rhdf5")
# The package is now installed. Load it like every other library
library(rhdf5)
file <- h5createFile("example.h5")
## file 'C:\Users\Chetan\Desktop\R Programming\Getting and Cleaning Data\Week 2\example.h5' already exists.
file
## [1] FALSE

Create Groups

 # Create group
created1 <- h5createGroup("example.h5","fooo")
## Can not create group. Object with name 'fooo' already exists.
created1 <- h5createGroup("example.h5","baao")
## Can not create group. Object with name 'baao' already exists.
# Create subgroup
created1 <- h5createGroup("example.h5","foo/goobaao")
## Can not create group. Object with name 'foo/goobaao' already exists.
# list group and sub-groups
h5ls("example.h5")
##         group    name       otype   dclass       dim
## 0           /     baa   H5I_GROUP                   
## 1           /    baao   H5I_GROUP                   
## 2           /      df H5I_DATASET COMPOUND         5
## 3           /     foo   H5I_GROUP                   
## 4        /foo       A H5I_DATASET  INTEGER     5 x 2
## 5        /foo  goobaa   H5I_GROUP                   
## 6 /foo/goobaa       B H5I_DATASET    FLOAT 5 x 2 x 2
## 7        /foo goobaao   H5I_GROUP                   
## 8           /    fooo   H5I_GROUP

Write to a group

# Write to groups
A = matrix(1:10,nrow = 5,ncol = 2)
h5write(A,"example.h5","foo/A")
B = array(seq(0.1,2.0,by = 0.1), dim = c(5,2,2))
attr(B,"scale") <- "liter"
h5write(B,"example.h5","foo/goobaa/B")
h5ls("example.h5")
##         group    name       otype   dclass       dim
## 0           /     baa   H5I_GROUP                   
## 1           /    baao   H5I_GROUP                   
## 2           /      df H5I_DATASET COMPOUND         5
## 3           /     foo   H5I_GROUP                   
## 4        /foo       A H5I_DATASET  INTEGER     5 x 2
## 5        /foo  goobaa   H5I_GROUP                   
## 6 /foo/goobaa       B H5I_DATASET    FLOAT 5 x 2 x 2
## 7        /foo goobaao   H5I_GROUP                   
## 8           /    fooo   H5I_GROUP

Write a data set

#create a dataframe
df=data.frame(1L:5L,seq(0,1,length.out = 5),
              c("ab", "cde", "fghi", "a", "s"),
              stringsAsFactors = FALSE)

#write to the top level group
#h5write(df,"example.h5","df")

#list out to see the dimensions of the dataset
h5ls("example.h5")
##         group    name       otype   dclass       dim
## 0           /     baa   H5I_GROUP                   
## 1           /    baao   H5I_GROUP                   
## 2           /      df H5I_DATASET COMPOUND         5
## 3           /     foo   H5I_GROUP                   
## 4        /foo       A H5I_DATASET  INTEGER     5 x 2
## 5        /foo  goobaa   H5I_GROUP                   
## 6 /foo/goobaa       B H5I_DATASET    FLOAT 5 x 2 x 2
## 7        /foo goobaao   H5I_GROUP                   
## 8           /    fooo   H5I_GROUP

Reading from HDF5

readA <- h5read("example.h5","foo/A")
readB <- h5read("example.h5", "foo/goobaa/B")
readdf <- h5read("example.h5", "df")
readA
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10

Writing and reading chunks

# Here replacing a vector with 1 st column and first three rows indexes
h5write(c(12,13,14),"example.h5","foo/A",index = list(1:3,1))
h5read("example.h5","foo/A")
##      [,1] [,2]
## [1,]   12    6
## [2,]   13    7
## [3,]   14    8
## [4,]    4    9
## [5,]    5   10