1 Create datasets
2 Merging into one big ExpressionSet
3 Convert to phyloseq object
4 Bray-Curtis distance ordination plot
- 4.1 Using everything
- 4.2 With species + strain only
5 Ugly summary statistics
6 Session Info

1 Create datasets

Create an ExperimentHub containing metadata for all curatedMetagenomicData records:

suppressPackageStartupMessages(library(ExperimentHub))
eh = ExperimentHub()

## snapshotDate(): 2016-10-26

myquery = query(eh, "curatedMetagenomicData")

Subset this to bug abundance from stool datasets. Note that this could also be done using myquery$tags (once the tags are updated). See the available metadata using View(mcols(myquery)).

myquery.stool <- myquery[grepl("stool", myquery$title) & grepl("bugs", myquery$title), ]
myquery.stool

## ExperimentHub with 11 records
## # snapshotDate(): 2016-10-26 
## # $dataprovider: Institute of Microbiology and Infection, University of...
## # $species: Homo Sapiens
## # $rdataclass: ExpressionSet
## # additional mcols(): taxonomyid, genome, description,
## #   coordinate_1_based, maintainer, rdatadateadded, preparerclass,
## #   tags, sourceurl, sourcetype 
## # retrieve records with, e.g., 'object[["EH236"]]' 
## 
##           title                                        
##   EH236 | HMP_2012.metaphlan_bugs_list.stool           
##   EH277 | KarlssonFH_2013.metaphlan_bugs_list.stool    
##   EH283 | LeChatelierE_2013.metaphlan_bugs_list.stool  
##   EH289 | LomanNJ_2013_Hi.metaphlan_bugs_list.stool    
##   EH295 | LomanNJ_2013_Mi.metaphlan_bugs_list.stool    
##   ...     ...                                          
##   EH307 | Obregon-TitoAJ_2015.metaphlan_bugs_list.stool
##   EH319 | QinJ_2012.metaphlan_bugs_list.stool          
##   EH325 | QinN_2014.metaphlan_bugs_list.stool          
##   EH331 | RampelliS_2015.metaphlan_bugs_list.stool     
##   EH361 | ZellerG_2014.metaphlan_bugs_list.stool

Create a list of ExpressionSet objects:

eset.list <- lapply(names(myquery.stool), function(x) myquery.stool[[x]])

Give them simplified titles:

names(eset.list) <- myquery.stool$title
names(eset.list) <- gsub("\\..+", "", myquery.stool$title)

And add the titles to the colnames:

for (i in 1:length(eset.list)){
    colnames(eset.list[[i]]) <- paste(names(eset.list)[[i]],
                                      colnames(eset.list[[i]]), 
                                      sep=".")
    pData(eset.list[[i]]) <- pData(eset.list[[i]])[, !sapply(pData(eset.list[[i]]), function(x) all(is.na(x)))]
    eset.list[[i]]$subjectID <- as.character(eset.list[[i]]$subjectID)
}

2 Merging into one big `ExpressionSet`

joinWithRnames <- function(obj, FUN = I) {
    mylist <- lapply(obj, function(x) {
        df <- data.frame(FUN(x))
        df$rnames28591436107 <- rownames(df)
        return(df)
    })
    bigdf <- Reduce(dplyr::full_join, mylist)
    rownames(bigdf) <- make.names(bigdf$rnames28591436107)
    bigdf <- bigdf[, !grepl("^rnames28591436107$", colnames(bigdf))]
    return(bigdf)
}

pdat <- joinWithRnames(eset.list, FUN=pData)
pdat$study <- sub("\\..+", "", rownames(pdat))
ab <- joinWithRnames(eset.list, FUN=exprs)
ab[is.na(ab)] <- 0
eset <- ExpressionSet(assayData = as.matrix(ab), 
                      phenoData = AnnotatedDataFrame(pdat))
eset

## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 3463 features, 1885 samples 
##   element names: exprs 
## protocolData: none
## phenoData
##   sampleNames: HMP_2012.SRS011061 HMP_2012.SRS011084 ...
##     ZellerG_2014.CCIS98832363ST.4.0 (1885 total)
##   varLabels: subjectID visit_number ... study (105 total)
##   varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## Annotation:

3 Convert to `phyloseq` object

suppressPackageStartupMessages(library(phyloseq))
source("https://raw.githubusercontent.com/waldronlab/presentations/master/Waldron_2016-06-07_EPIC/metaphlanToPhyloseq.R")
pseq <- metaphlanToPhyloseq(tax=exprs(eset), metadat=pData(eset), split=".")

4 Bray-Curtis distance ordination plot

4.1 Using everything

ord = ordinate(pseq, method="PCoA", distance="bray")

plot_ordination(pseq, ord, color="disease") + 
  ggplot2::ggtitle("Bray-Curtis Principal Coordinates Analysis")

plot_ordination(pseq, ord, color="country") + 
  ggplot2::ggtitle("Bray-Curtis Principal Coordinates Analysis")

plot_ordination(pseq, ord, color="study") + 
  ggplot2::ggtitle("Bray-Curtis Principal Coordinates Analysis")

4.2 With species + strain only

pseq.spstrain = subset_taxa(pseq, !is.na(Species)) #species + strain only
ord2 = ordinate(pseq.spstrain, method="PCoA", distance="bray")

plot_ordination(pseq.spstrain, ord, color="study") + 
  ggplot2::ggtitle("Bray-Curtis Principal Coordinates Analysis")

5 Ugly summary statistics

for (i in 1:length(pdat)){
    cat(names(pdat)[[i]])
    cat("\n")
    if(is(pdat[[i]], "numeric")){
        print(summary(pdat[[i]]))
    }else{
        if(length(unique(pdat[[i]])) <= 30)
          print(table(pdat[[i]]))
    }
    cat("\n")
    }

## subjectID
## 
## visit_number
## 
##  1  2  3 
## 86 57  4 
## 
## bodysite
## 
## stool 
##  1885 
## 
## snprnt
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## 700013715 700024542 700038354 700062686 700103016 700119496      1738 
## 
## gender
## 
## female   male 
##    640    735 
## 
## wmsphase
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##       1       1       1       1       1       1    1738 
## 
## disease
## 
##                     cancer                  cirrhosis 
##                         53                        123 
##          ibd_crohn_disease     ibd_ulcerative_colitis 
##                         21                        127 
## impaired_glucose_tolerance              large_adenoma 
##                         49                         15 
##                    leaness                          n 
##                         96                        840 
##                 n_relative                      obese 
##                         47                          5 
##                    obesity                 overweight 
##                        169                         10 
##              small_adenoma             stec2-positive 
##                         27                         53 
##                        t2d                underweight 
##                        223                          1 
## 
## country
## 
##      china    denmark    estonia    finland     france    germany 
##        600        471          1          1        157         57 
##    hungary    iceland      italy     norway       peru   slovakia 
##          1          1         11          1         36          1 
##      spain     sweden   tanzania        usa yugoslavia 
##        219        130         27        169          2 
## 
## age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   35.00   49.00   47.79   62.00   89.00     367 
## 
## number_reads
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    164938  33202947  49691540  53972539  66357610 238641707 
## 
## classification
## 
## igt ngt t2d 
##  49  43  53 
## 
## gad.antibodies
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.2000  0.4000  0.9621  0.8000 56.6000    1740 
## 
## bmi
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   14.05   21.62   24.45   25.57   29.00   46.60     319 
## 
## whr
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.7300  0.8200  0.8600  0.8597  0.9000  1.0500    1740 
## 
## wc
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    66.0    82.0    90.0    89.4    95.0   117.0    1740 
## 
## cholesterol
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.420   4.730   5.410   5.451   6.100   8.710    1740 
## 
## triglycerides
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.420   0.840   1.040   1.301   1.500   6.350    1740 
## 
## hdl
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.300   1.030   1.245   1.383   1.583   6.790    1409 
## 
## ldl
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.140   2.640   3.210   3.229   3.830   7.120    1405 
## 
## creatinine
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   44.00   60.00   68.00   69.79   74.00  182.00    1740 
## 
## y.gt
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1400  0.2600  0.3600  0.5374  0.5700  4.6000    1740 
## 
## fasting_glucose
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.900   5.300   5.800   6.383   6.825  16.500    1741 
## 
## fasting_insulin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     2.8     6.2     8.8    11.8    13.0    66.0    1740 
## 
## hba1c
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   26.00   36.00   38.00   40.64   43.00   77.00    1740 
## 
## adiponectin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.680   9.595  11.900  13.361  15.725  34.800    1741 
## 
## leptin
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    2.17   12.57   21.65   24.94   33.50  105.00    1741 
## 
## glp.1
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.300   0.900   1.774   1.700  17.800    1755 
## 
## fgf.19
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   16.40   71.64  107.83  138.50  160.03  782.18    1755 
## 
## hscrp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.130   0.940   1.530   4.117   3.480  99.220    1740 
## 
## c.peptide
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.1200  0.6300  0.8100  0.9062  1.0700  2.8000    1740 
## 
## tnfa
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.750   1.560   2.030   2.162   2.530   7.100    1740 
## 
## il.1
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.0000  0.9818  0.0000 16.8500    1755 
## 
## cd163
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   109.8   610.9   764.9   786.4   902.2  2276.5    1755 
## 
## statins
## 
##  n  y 
## 93 52 
## 
## insulin
## 
##   n   y 
## 139   6 
## 
## oral_anti.diabetic_medication
## 
##   met sulph 
##    20     2 
## 
## years_in_sweden
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    15.0    34.5    38.0    38.2    40.0    60.0    1870 
## 
## ethnicity
## 
## asian white 
##   600  1011 
## 
## paired_end_insert_size
## 
## read_length
## 
##    44 44/75    75    90 
##    14     1   162   115 
## 
## matched_reads
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
##  14251095  30895897  39132640  42448127  53507661 122587818      1593 
## 
## uniquely_matching_reads
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
##  13112580  26606088  33623355  36814287  46519632 108713556      1593 
## 
## uniquely_matched_reads
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
##  7044202 15308787 20383454 21808160 27785946 60517895     1593 
## 
## gene_number
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   91032  560710  686014  670528  784221 1005488    1593 
## 
## gene_number_for_11M_uniquely_matched_reads
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   59147  493405  599938  578512  679028  878816    1593 
## 
## hitchip_probe_number
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     524    1166    1264    1255    1372    1650    1593 
## 
## gene_count_class
## 
## hgc lgc 
## 224  68 
## 
## hitchip_probe_class
## 
## hpc lpc 
## 227  65 
## 
## first
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1122    2723    2934    3122    3878    5066    1833 
## 
## repeat.
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0000  0.0000  0.6346  0.0000  5.0000    1833 
## 
## stooltexture
## 
## bloody smooth watery 
##      8     17     11 
## 
## daysafteronset
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    3.00    5.50    6.71    8.00   26.00    1847 
## 
## hus
## 
##  n  y 
## 26 12 
## 
## stec_count
## 
##     high      low moderate 
##       17       16        5 
## 
## shigatoxin2elisa
## 
## negative positive 
##       12       26 
## 
## readsmillions
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    7.50   10.97   14.35   22.98   19.57  110.00    1833 
## 
## nonhuman
## 
##   >99     0  1.86    15    19    21    29    31    32    33 33.04    35 
##    24     1     1     2     2     1     1     1     1     1     1     1 
##    36    40    46     6 67.51    69    78     9    90    99 
##     1     1     1     1     1     1     1     1     1     6 
## 
## stec_coverage
## 
##  <1   1  10  11  16  17  19   2  22  29   3  39   4   5   6 619   7   8 
##  11   2   1   2   1   1   1   4   1   1   3   1   3   1   1   1   1   1 
##   9 
##   1 
## 
## stxab_detected
## 
##  n  y 
## 19 29 
## 
## stx_ratio
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.400   1.000   1.000   2.793   2.500  13.000    1870 
## 
## typingdata
## 
##  n  y 
## 14 24 
## 
## c_difficile_frequency
## 
##   0.0006   0.0009   0.0014   0.0036   0.0037    0.004   0.0041   0.0069 
##        1        1        2        1        1        1        1        1 
## negative positive 
##       40        3 
## 
## sampling_day
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0   407.5   505.0   482.9   587.0   954.0    1666 
## 
## dfmp
## 
## dfmp 
##   19 
## 
## mgs_richness
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    33.0   124.0   156.0   154.6   192.0   307.0    1492 
## 
## mgs_profile_matched_sample_pairs
## 
## population
## 
##   matses   norman tunapuco 
##       24       22       12 
## 
## bmi_class
## 
##     healthy       obese  overweight underweight 
##          35           5          10           1 
## 
## X16s_rrna
## 
##  y 
## 58 
## 
## shotgun_metagenome
## 
##  y 
## 58 
## 
## stage
## 
##  discovery    stage_I   stage_II validation 
##        181        145        199         56 
## 
## height
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     147     158     165     164     170     186    1541 
## 
## weight
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   36.00   55.00   62.00   62.92   70.00  121.50    1541 
## 
## diabetic
## 
##   n   y 
## 174 170 
## 
## fbg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.130   5.050   6.215   7.159   8.785  33.000    1541 
## 
## sbp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    84.0   112.5   124.5   125.1   135.0   210.0    1559 
## 
## dbp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   45.00   70.00   78.00   76.53   80.00  121.00    1559 
## 
## fins
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.600   7.827  12.395  13.047  16.883  34.600    1689 
## 
## fcp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.040   1.100   1.420   1.684   2.360   4.540    1840 
## 
## hbalc
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   4.200   5.400   5.850   6.814   7.500  13.800    1555 
## 
## tg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.420   1.060   1.480   1.707   2.080   8.710    1549 
## 
## tcho
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.030   4.497   5.040   5.073   5.725   9.110    1549 
## 
## cirrhotic
## 
##   n   y 
## 114 123 
## 
## hbv_related
## 
##   n   y 
## 138  99 
## 
## alcohol_related
## 
##   n   y 
## 203  34 
## 
## other_causes_related
## 
##                                   autoimmune_related 
##                                                    1 
##                                         drug_related 
##                                                    1 
##                            hepatitis_c_virus_related 
##                                                    1 
##                            hepatitis_d_virus_related 
##                                                    5 
##                            hepatitis_e_virus_related 
##                                                    8 
##                hepatolenticular_degeneration_related 
##                                                    2 
##                                                    n 
##                                                  214 
##       primary_biliary_cirrhosis_&_autoimmune_related 
##                                                    1 
##                                  schistosoma_related 
##                                                    2 
## schistosoma?hepatitis_e_virus_and_autoimmune_related 
##                                                    1 
##                schistosoma?hepatitis_e_virus_related 
##                                                    1 
## 
## inr
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.920   1.145   1.280   1.351   1.465   2.350    1762 
## 
## crea
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   30.00   53.00   63.00   65.01   76.00  163.00    1648 
## 
## alb
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   15.20   33.50   43.50   40.62   48.70   57.60    1648 
## 
## tb
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    5.00   12.00   16.00   41.12   30.00  597.00    1648 
## 
## pt
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   11.00   13.45   14.70   15.63   17.05   27.00    1762 
## 
## ascites
## 
## absent   mild  sever 
##     57     37     29 
## 
## he
## 
## grade_1 grade_4    none 
##       2       1     120 
## 
## ctp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   5.000   6.000   7.000   7.691   9.000  14.000    1762 
## 
## meld
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   -6.10    4.15    7.40    8.30   11.95   23.40    1762 
## 
## antivirus
## 
##                    adefovir                   entecavir 
##                           2                          11 
##                  lamivudine         lamivudine_adefovir 
##                           9                          27 
## lamivudine_foscarnet_sodium                        none 
##                           1                          71 
##                 telbivudine 
##                           2 
## 
## beta.blocker
## 
##        none propranolol 
##         111          12 
## 
## camp
## 
## bologna dedauko sengele 
##      11      20       7 
## 
## tnm_stage
## 
## t1n0m0 t2n0m0 t2n1m0 t2n1m1 t3n0m0 t3n0m1 t3n1m0 t3n1m1 t3nxm1 t4n0m0 
##      9      6      1      1      6      1      7      9      1      1 
## t4n0m1 t4n1m0 t4n1m1 
##      1      2      8 
## 
## ajcc_stage
## 
##   i  ii iii  iv 
##  15   7  10  21 
## 
## localization
## 
##     lc  lc/rc     rc rectum  sigma 
##     35      6     29     16      9 
## 
## fobt
## 
## negative positive 
##      121       34 
## 
## wif.1_gene_methylation_test
## 
## negative positive 
##      125       27 
## 
## group
## 
## control     crc 
##      88      53 
## 
## study
## 
##          HMP_2012   KarlssonFH_2013 LeChatelierE_2013   LomanNJ_2013_Hi 
##               147               145               292                44 
##   LomanNJ_2013_Mi    NielsenHB_2014           Obregon         QinJ_2012 
##                 9               396                58               363 
##         QinN_2014    RampelliS_2015      ZellerG_2014 
##               237                38               156

6 Session Info

sessionInfo()

## R Under development (unstable) (2016-10-26 r71594)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X Yosemite 10.10.5
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] curatedMetagenomicData_1.0.0 phyloseq_1.19.0             
##  [3] magrittr_1.5                 ExperimentHubData_1.1.0     
##  [5] AnnotationHubData_1.5.0      futile.logger_1.4.3         
##  [7] GenomicRanges_1.27.2         GenomeInfoDb_1.11.0         
##  [9] IRanges_2.9.0                S4Vectors_0.13.1            
## [11] Biobase_2.35.0               ExperimentHub_1.1.0         
## [13] AnnotationHub_2.7.0          BiocGenerics_0.21.0         
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-128                  bitops_1.0-6                 
##  [3] httr_1.2.1                    tools_3.4.0                  
##  [5] R6_2.2.0                      vegan_2.4-1                  
##  [7] mgcv_1.8-15                   DBI_0.5-1                    
##  [9] colorspace_1.2-7              permute_0.9-4                
## [11] ade4_1.7-4                    curl_2.2                     
## [13] chron_2.3-47                  graph_1.53.0                 
## [15] BiocCheck_1.11.0              formatR_1.4                  
## [17] xml2_1.0.0                    labeling_0.3                 
## [19] rtracklayer_1.35.0            scales_0.4.0                 
## [21] RBGL_1.51.0                   stringr_1.1.0                
## [23] digest_0.6.10                 Rsamtools_1.27.2             
## [25] rmarkdown_1.1                 GEOquery_2.41.0              
## [27] AnnotationForge_1.17.1        XVector_0.15.0               
## [29] rBiopaxParser_2.15.0          htmltools_0.3.5              
## [31] RSQLite_1.0.0                 BiocInstaller_1.25.2         
## [33] shiny_0.14.1                  jsonlite_1.1                 
## [35] BiocParallel_1.9.0            RCurl_1.95-4.8               
## [37] biomformat_1.3.0              Matrix_1.2-7.1               
## [39] Rcpp_0.12.7                   munsell_0.4.3                
## [41] ape_3.5                       stringi_1.1.2                
## [43] yaml_2.1.13                   MASS_7.3-45                  
## [45] SummarizedExperiment_1.5.1    zlibbioc_1.21.0              
## [47] rhdf5_2.19.0                  biocViews_1.43.1             
## [49] plyr_1.8.4                    grid_3.4.0                   
## [51] lattice_0.20-34               Biostrings_2.43.0            
## [53] splines_3.4.0                 multtest_2.31.0              
## [55] GenomicFeatures_1.27.0        knitr_1.14                   
## [57] igraph_1.0.1                  optparse_1.3.2               
## [59] RUnit_0.4.31                  reshape2_1.4.2               
## [61] codetools_0.2-15              biomaRt_2.31.1               
## [63] futile.options_1.0.0          XML_3.98-1.4                 
## [65] evaluate_0.10                 lambda.r_1.1.9               
## [67] data.table_1.9.6              httpuv_1.3.3                 
## [69] foreach_1.4.3                 gtable_0.2.0                 
## [71] getopt_1.20.0                 assertthat_0.1               
## [73] ggplot2_2.1.0                 mime_0.5                     
## [75] xtable_1.8-2                  survival_2.39-5              
## [77] tibble_1.2                    OrganismDbi_1.17.1           
## [79] iterators_1.0.8               GenomicAlignments_1.11.0     
## [81] AnnotationDbi_1.37.0          cluster_2.0.5                
## [83] interactiveDisplayBase_1.13.0

Clustering of All Gut Samples using Taxonomic Abundance

Levi Waldron

October 29, 2016

1 Create datasets

2 Merging into one big `ExpressionSet`

3 Convert to `phyloseq` object

4 Bray-Curtis distance ordination plot

4.1 Using everything

4.2 With species + strain only

5 Ugly summary statistics

6 Session Info

Clustering of All Gut Samples using Taxonomic Abundance

Levi Waldron

October 29, 2016

1 Create datasets

2 Merging into one big ExpressionSet

3 Convert to phyloseq object

4 Bray-Curtis distance ordination plot

4.1 Using everything

4.2 With species + strain only

5 Ugly summary statistics

6 Session Info

2 Merging into one big `ExpressionSet`

3 Convert to `phyloseq` object