Contents

1 Setup

accountEmail = "shbrief@gmail.com"
billingProjectName = "waldronlab-terra-rstudio"
workspaceName = "mtx_workflow_biobakery_ver3"

2 Most recent outputs

jobs <- monitorSubmission(accountEmail, billingProjectName, workspaceName,
                          mostRecentOnly = FALSE)    # all the runs
jobs_done <- which(jobs["status",]  == "Done")    # successfully done runs
submission_id <- jobs[, jobs_done[1]]$submissionId   # most recent, successfully run
submission_id
## [1] "0ade77db-665d-4259-b6ca-2572ad0e3a52"

2.1 Example list of sample-specific outputs

listOutput(accountEmail, billingProjectName, workspaceName, submission_id, 
           keyword = "HSM7J4NY.*.tsv")
## [1] "HSM7J4NY_genefamilies.tsv"        "HSM7J4NY_pathabundance.tsv"      
## [3] "HSM7J4NY_pathcoverage.tsv"        "HSM7J4NY_ecs.tsv"                
## [5] "HSM7J4NY_kos.tsv"                 "HSM7J4NY_ecs_relab.tsv"          
## [7] "HSM7J4NY_genefamilies_relab.tsv"  "HSM7J4NY_pathabundance_relab.tsv"
## [9] "HSM7J4NY.tsv"
getOutput(accountEmail, billingProjectName, workspaceName, submission_id, 
          keyword = "HSM7J4NY.*.tsv", dest_dir = dest_dir)
unzip(file.path(dest_dir, "ibdmdb_test_visualizations.zip"), exdir = dest_dir)

2.1.1 HSM7J4NY_genefamilies.tsv

##                                                 X..Gene.Family
## 1                                                     UNMAPPED
## 2                                              UniRef90_B7B7I3
## 3 UniRef90_B7B7I3|g__Parabacteroides.s__Parabacteroides_merdae
##   HSM7J4NY_Abundance.RPKs
## 1              10963114.0
## 2                583244.9
## 3                583116.8

2.1.2 HSM7J4NY_pathabundance.tsv

##                                                      X..Pathway
## 1                                                      UNMAPPED
## 2                                                  UNINTEGRATED
## 3 UNINTEGRATED|g__Parabacteroides.s__Parabacteroides_distasonis
##   HSM7J4NY_Abundance
## 1          2875610.0
## 2           675985.8
## 3           150306.1

2.1.3 HSM7J4NY_pathcoverage.tsv

##                                          X..Pathway HSM7J4NY_Coverage
## 1                                          UNMAPPED                 1
## 2                                      UNINTEGRATED                 1
## 3 UNINTEGRATED|g__Bacteroides.s__Bacteroides_caccae                 1

2.1.4 HSM7J4NY_ecs.tsv

##                                   X..Gene.Family HSM7J4NY_Abundance.RPKs
## 1                                       UNMAPPED             10963114.00
## 2                                      UNGROUPED              2552297.40
## 3 UNGROUPED|g__Alistipes.s__Alistipes_finegoldii                41352.53

2.1.5 HSM7J4NY_kos.tsv

##                                   X..Gene.Family HSM7J4NY_Abundance.RPKs
## 1                                       UNMAPPED             10963114.00
## 2                                      UNGROUPED              2544777.64
## 3 UNGROUPED|g__Alistipes.s__Alistipes_finegoldii                41201.35

2.1.6 HSM7J4NY_ecs_relab.tsv

##                                 X..Gene.Family HSM7J4NY_Abundance.RPKs
## 1                                      1.1.1.1             0.000435738
## 2 1.1.1.1|g__Alistipes.s__Alistipes_finegoldii             0.000435738
## 3                                    1.1.1.100             0.002691470

2.1.7 HSM7J4NY_genefamilies_relab.tsv

##                                 X..Gene.Family HSM7J4NY_Abundance.RPKs
## 1                                      1.1.1.1             0.000435738
## 2 1.1.1.1|g__Alistipes.s__Alistipes_finegoldii             0.000435738
## 3                                    1.1.1.100             0.002691470

2.1.8 HSM7J4NY_pathabundance_relab.tsv

##                                 X..Gene.Family HSM7J4NY_Abundance.RPKs
## 1                                      1.1.1.1             0.000435738
## 2 1.1.1.1|g__Alistipes.s__Alistipes_finegoldii             0.000435738
## 3                                    1.1.1.100             0.002691470

2.1.9 HSM7J4NY.tsv

##                   X.clade_name NCBI_tax_id relative_abundance
## 1                  k__Bacteria           2           73.37069
## 2                   k__Archaea        2157           26.62931
## 3 k__Bacteria|p__Bacteroidetes       2|976           72.77130
##   additional_species
## 1                   
## 2                   
## 3

2.2 Download recent outputs for visualization

dest_dir <- "~/data2/bioBakeryR/inst/extdata/outputs"
getOutput(accountEmail, billingProjectName, workspaceName, submission_id, 
          keyword = "visualization", dest_dir = dest_dir)
unzip(file.path(dest_dir, "ibdmdb_test_visualizations.zip"), exdir = dest_dir)
list.files(data_dir)
##  [1] "humann_feature_counts.tsv"              
##  [2] "humann_read_and_species_count_table.tsv"
##  [3] "kneaddata_read_count_table.tsv"         
##  [4] "metaphlan_taxonomic_profiles.tsv"       
##  [5] "microbial_counts_table.tsv"             
##  [6] "pathabundance_relab.tsv"                
##  [7] "qc_counts_orphans_table.tsv"            
##  [8] "qc_counts_pairs_table.tsv"              
##  [9] "taxa_counts_table.tsv"                  
## [10] "top_average_pathways_names.tsv"

2.2.1 humann_feature_counts.tsv

##   X..samples humann_ecs_relab_counts humann_genefamilies_relab_counts
## 1   CSM9X23N                    1049                            88227
## 2   HSM6XRQY                    1150                            64841
## 3   HSM7J4NY                     416                             9002
##   humann_pathabundance_relab_counts
## 1                               209
## 2                               262
## 3                                42

2.2.2 humann_read_and_species_count_table.tsv

##   X..samples total.reads total.nucleotide.aligned total.translated.aligned
## 1   HSM6XRQY    17166083                 13505861                 14076343
## 2   HSMA33OT    11189134                  3394815                  4348515
## 3   MSM6J2QD     3858456                   545341                   883737
##   total.species
## 1            27
## 2            26
## 3            10

2.2.3 kneaddata_read_count_table.tsv

##     Sample raw.pair1 raw.pair2 trimmed.pair1 trimmed.pair2
## 1 CSM9X23N  10529590  10529590      10529590      10529590
## 2 HSM6XRQY   8655985   8655985       8655985       8655985
## 3 HSM7J4NY   7241429   7241429       7241425       7241425
##   decontaminated.SILVA_128_LSUParc_SSUParc_ribosomal_RNA.pair1
## 1                                                     10482541
## 2                                                      8622357
## 3                                                      7192958
##   decontaminated.SILVA_128_LSUParc_SSUParc_ribosomal_RNA.pair2
## 1                                                     10482541
## 2                                                      8622357
## 3                                                      7192958
##   decontaminated.hg37dec_v0.1.pair1 decontaminated.hg37dec_v0.1.pair2
## 1                          10429946                          10429946
## 2                           8573730                           8573730
## 3                           5764053                           5764053
##   decontaminated.human_hg38_refMrna.pair1
## 1                                10482541
## 2                                 8622359
## 3                                 7192959
##   decontaminated.human_hg38_refMrna.pair2
## 1                                10482541
## 2                                 8622359
## 3                                 7192959
##   decontaminated.SILVA_128_LSUParc_SSUParc_ribosomal_RNA.orphan1
## 1                                                          16559
## 2                                                           9642
## 3                                                          22026
##   decontaminated.SILVA_128_LSUParc_SSUParc_ribosomal_RNA.orphan2
## 1                                                          16158
## 2                                                           9009
## 3                                                          21765
##   decontaminated.hg37dec_v0.1.orphan1 decontaminated.hg37dec_v0.1.orphan2
## 1                               19029                               18559
## 2                               11519                               11078
## 3                              775171                               46603
##   decontaminated.human_hg38_refMrna.orphan1
## 1                                     16560
## 2                                      9642
## 3                                     22027
##   decontaminated.human_hg38_refMrna.orphan2 final.pair1 final.pair2
## 1                                     16158    10429946    10429946
## 2                                      9009     8573730     8573730
## 3                                     21764     5764053     5764053
##   final.orphan1 final.orphan2
## 1         16546         16129
## 2          9630          8993
## 3         21992         21736

2.2.4 metaphlan_taxonomic_profiles.tsv

##                 X..taxonomy  CSM9X23N  HSM6XRQY HSM7J4NY  HSMA33KE  HSMA33OT
## 1                k__Archaea   0.00000   0.00000 26.62931   0.00000   0.00000
## 2               k__Bacteria 100.00000 100.00000 73.37069 100.00000 100.00000
## 3 k__Bacteria|p__Firmicutes   6.76299   2.84778  0.00000  27.44129   6.94853
##    MSM6J2QD
## 1   0.00000
## 2 100.00000
## 3   1.22901

2.2.5 microbial_counts_table.tsv

##   X..Sample rRNA...Trim rRNA...Raw hg37dec_v0.1...Trim hg37dec_v0.1...Raw
## 1  CSM9X23N     0.99566    0.99720             0.99065            0.99219
## 2  HSM6XRQY     0.99623    0.99730             0.99059            0.99166
## 3  HSM7J4NY     1.04517    1.04833             0.79829            0.80070
##   mRNA...Trim mRNA...Raw
## 1     0.99554    0.99709
## 2     0.99612    0.99719
## 3     0.99333    0.99633

2.2.6 pathabundance_relab.tsv

##                                                                                    X..Pathway
## 1                                        1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis
## 2   1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii
## 3 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus
##   CSM9X23N_Abundance HSM6XRQY_Abundance HSM7J4NY_Abundance HSMA33KE_Abundance
## 1          0.0120141          0.0128154           0.023005        1.03715e-02
## 2          0.0000000          0.0000000           0.000000        9.96737e-05
## 3          0.0000000          0.0000000           0.000000        1.65680e-04
##   HSMA33OT_Abundance MSM6J2QD_Abundance
## 1         0.00895635          0.0155297
## 2         0.00000000          0.0000000
## 3         0.00000000          0.0000000

2.2.7 qc_counts_orphans_table.tsv

##   X..Sample rRNA.orphan1 rRNA.orphan2 hg37dec_v0.1.orphan1 hg37dec_v0.1.orphan2
## 1  CSM9X23N        16559        16158                19029                18559
## 2  HSM6XRQY         9642         9009                11519                11078
## 3  HSM7J4NY        22026        21765               775171                46603
##   mRNA.orphan1 mRNA.orphan2
## 1        16560        16158
## 2         9642         9009
## 3        22027        21764

2.2.8 qc_counts_pairs_table.tsv

##   X..Sample      Raw     Trim     rRNA hg37dec_v0.1     mRNA
## 1  CSM9X23N 10529590 10529590 10482541     10429946 10482541
## 2  HSM6XRQY  8655985  8655985  8622357      8573730  8622359
## 3  HSM7J4NY  7241429  7241425  7192958      5764053  7192959

2.2.9 taxa_counts_table.tsv

##   X..Sample Species Species.filtered Genera Genera.filtered
## 1  CSM9X23N      34               33     19              19
## 2  HSM6XRQY      33               30     22              21
## 3  HSM7J4NY      11               11      6               6

2.2.10 top_average_pathways_names.tsv

##                                                 X..Pathway Average.abundance
## 1                  PWY-1042: glycolysis IV (plant cytosol)            0.0399
## 2         ANAGLYCOLYSIS-PWY: glycolysis III (from glucose)            0.0232
## 3 PWY-7219: adenosine ribonucleotides de novo biosynthesis            0.0231
##   Variance
## 1 0.001020
## 2 0.000225
## 3 0.000180

3 Make SummarizedExperiment

3.1 Taxonomy profile

taxo_profile <- read.csv(file.path(data_dir, "metaphlan_taxonomic_profiles.tsv"),
                         sep = "\t", header = TRUE)

## Taxonomy as rownames
taxo_profile <- tibble::column_to_rownames(taxo_profile, var = "X..taxonomy")

## Metadata
meta <- read.table("~/data2/bioBakeryR/inst/extdata/ibdmdb_demo_metadata.txt", 
                   sep = "\t", header = TRUE)
colData <- DataFrame(meta)
se <- SummarizedExperiment(assays = list(taxo_profile = taxo_profile),
                           colData = colData)
se
## class: SummarizedExperiment 
## dim: 202 6 
## metadata(0):
## assays(1): taxo_profile
## rownames(202): k__Archaea k__Bacteria ...
##   k__Archaea|p__Euryarchaeota|c__Thermoplasmata|o__Methanomassiliicoccales|f__Methanomassiliicoccaceae|g__Methanomassiliicoccus|s__Candidatus_Methanomassiliicoccus_intestinalis
##   k__Bacteria|p__Proteobacteria|c__Proteobacteria_unclassified|o__Proteobacteria_unclassified|f__Proteobacteria_unclassified|g__Proteobacteria_unclassified|s__Proteobacteria_bacterium_CAG_139
## rowData names(0):
## colnames(6): CSM9X23N HSM6XRQY ... HSMA33OT MSM6J2QD
## colData names(4): Sample Sequencing.Type Site Age

3.2 Pathway abundance

path_abun <- read.csv(file.path(data_dir, "pathabundance_relab.tsv"),
                      sep = "\t", header = TRUE)

## Pathway as rownames
path_abun <- tibble::column_to_rownames(path_abun, var = "X..Pathway")
colnames(path_abun) <- gsub("_Abundance", "", colnames(path_abun))

head(path_abun)
##                                                                                                 CSM9X23N
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         0.012014100
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    0.000000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  0.000000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    0.000383122
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.000282248
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.001134250
##                                                                                               HSM6XRQY
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         0.0128154
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.0000000
##                                                                                              HSM7J4NY
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         0.023005
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    0.000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  0.000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    0.000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.000000
##                                                                                                 HSMA33KE
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         1.03715e-02
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    9.96737e-05
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  1.65680e-04
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    2.36374e-04
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.00000e+00
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.00000e+00
##                                                                                                 HSMA33OT
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         8.95635e-03
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    0.00000e+00
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  0.00000e+00
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    5.14446e-05
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.00000e+00
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.00000e+00
##                                                                                               MSM6J2QD
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis                                         0.0155297
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Alistipes.s__Alistipes_finegoldii    0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Anaerostipes.s__Anaerostipes_hadrus  0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_caccae    0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_dorei     0.0000000
## 1CMET2-PWY: N10-formyl-tetrahydrofolate biosynthesis|g__Bacteroides.s__Bacteroides_eggerthii 0.0000000