R amplicon analysis part 2

phyloseq

Install and load packages

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(phyloseq)
library(ape)
library(genefilter)

## 
## Attaching package: 'genefilter'
## 
## The following object is masked from 'package:readr':
## 
##     spec

library(ggplot2)

Import data to R

# Import Count table. Skip first row of tsv file, which is just some text
asv_counts <- read_tsv(file="~/export/table.tsv")

## Rows: 193 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (1): OTU ID
## dbl (11): RACEKX3, SANPEDRO, STOD4, BALK10C, ITA01A, IZR, IZRMEDIA, KIEL9, N...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# And specify that the first column of data are rownames
asv_counts <- column_to_rownames(asv_counts, var = colnames(asv_counts)[1])
# Import taxonomy of ASVs
taxonomy <- read_tsv(file="~/export/taxonomy.tsv")

## Rows: 193 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): Feature ID, Taxon
## dbl (1): Confidence
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Import tree file 
#tree = read_tree("~/export/tree.nwk")
# Import fasta
asv_fasta <- read.FASTA(file = "~/export/dna-sequences.fasta")
#Import metadata file
sample_info_tab <- read_tsv("~/export/metadata_more_info.tsv")

## Rows: 11 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): sample-ID, type, clade
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Also delete the row with the QIIME2 category codes
#sample_info_tab <- sample_info_tab[-1,]

head(taxonomy)

## # A tibble: 6 × 3
##   `Feature ID`                     Taxon                                 Confi…¹
##   <chr>                            <chr>                                   <dbl>
## 1 000c465694b030b575b0399f2c5fdd29 d__Archaea; p__Halobacterota; c__Met…   0.999
## 2 00af279a7dcd6d0cab8f388df82f2075 d__Archaea; p__Asgardarchaeota; c__O…   0.879
## 3 015c2c21f0b0e140796b2d0a2fa72080 d__Archaea; p__Halobacterota; c__Met…   0.944
## 4 020bf741caada7c0a497b09e997463e2 d__Archaea; p__Halobacterota; c__Met…   0.757
## 5 02c6990d44eaa5a670e6d9b15d02501d d__Archaea; p__Nanoarchaeota; c__Nan…   0.954
## 6 02d8c10ed712b0f49cc734646de936f9 d__Archaea; p__Crenarchaeota; c__Nit…   0.995
## # … with abbreviated variable name ¹Confidence

Taxonomy table, split the taxonomy in columns

taxonomy <-  taxonomy %>%
  separate(Taxon, into=c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus","Species"), sep=";") %>%
select (-Confidence) %>% #delete the column Confidence
 column_to_rownames(var = 'Feature ID') # specify that the "Feature ID" column of data are rownames

## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 124 rows [1, 4,
## 6, 7, 10, 11, 12, 13, 16, 17, 20, 21, 22, 25, 27, 28, 30, 32, 33, 36, ...].

head(taxonomy)

##                                     Kingdom              Phylum
## 000c465694b030b575b0399f2c5fdd29 d__Archaea    p__Halobacterota
## 00af279a7dcd6d0cab8f388df82f2075 d__Archaea  p__Asgardarchaeota
## 015c2c21f0b0e140796b2d0a2fa72080 d__Archaea    p__Halobacterota
## 020bf741caada7c0a497b09e997463e2 d__Archaea    p__Halobacterota
## 02c6990d44eaa5a670e6d9b15d02501d d__Archaea    p__Nanoarchaeota
## 02d8c10ed712b0f49cc734646de936f9 d__Archaea    p__Crenarchaeota
##                                                Class                  Order
## 000c465694b030b575b0399f2c5fdd29  c__Methanomicrobia  o__Methanomicrobiales
## 00af279a7dcd6d0cab8f388df82f2075     c__Odinarchaeia        o__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080  c__Methanomicrobia  o__Methanomicrobiales
## 020bf741caada7c0a497b09e997463e2  c__Methanomicrobia  o__Methanomicrobiales
## 02c6990d44eaa5a670e6d9b15d02501d     c__Nanoarchaeia     o__Woesearchaeales
## 02d8c10ed712b0f49cc734646de936f9  c__Nitrososphaeria   o__Nitrososphaerales
##                                                   Family
## 000c465694b030b575b0399f2c5fdd29  f__Methanospirillaceae
## 00af279a7dcd6d0cab8f388df82f2075         f__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080    f__Methanoregulaceae
## 020bf741caada7c0a497b09e997463e2    f__Methanoregulaceae
## 02c6990d44eaa5a670e6d9b15d02501d         f__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9   f__Nitrososphaeraceae
##                                                         Genus
## 000c465694b030b575b0399f2c5fdd29          g__Methanospirillum
## 00af279a7dcd6d0cab8f388df82f2075              g__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080             g__Methanoregula
## 020bf741caada7c0a497b09e997463e2              g__Methanolinea
## 02c6990d44eaa5a670e6d9b15d02501d              g__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9  g__Candidatus_Nitrocosmicus
##                                                       Species
## 000c465694b030b575b0399f2c5fdd29                         <NA>
## 00af279a7dcd6d0cab8f388df82f2075         s__uncultured_marine
## 015c2c21f0b0e140796b2d0a2fa72080   s__Methanoregula_formicica
## 020bf741caada7c0a497b09e997463e2                         <NA>
## 02c6990d44eaa5a670e6d9b15d02501d  s__uncultured_euryarchaeote
## 02d8c10ed712b0f49cc734646de936f9                         <NA>

Import data as phyloseq object

making phyloseq objects out of each of our input data tables (in the last tutorial, I imported the tree using phyloseq so it is already a phyloseq object)

ASV =   otu_table(data.frame(asv_counts), taxa_are_rows = TRUE)
TAX =   tax_table(as.matrix(taxonomy))
META    =   sample_data(data.frame(sample_info_tab, row.names = sample_info_tab$`sample-ID`))

The taxonomy in the taxonomy table is retained in one column and the different levels are separated by underscore, eg:

head(taxonomy)

##                                     Kingdom              Phylum
## 000c465694b030b575b0399f2c5fdd29 d__Archaea    p__Halobacterota
## 00af279a7dcd6d0cab8f388df82f2075 d__Archaea  p__Asgardarchaeota
## 015c2c21f0b0e140796b2d0a2fa72080 d__Archaea    p__Halobacterota
## 020bf741caada7c0a497b09e997463e2 d__Archaea    p__Halobacterota
## 02c6990d44eaa5a670e6d9b15d02501d d__Archaea    p__Nanoarchaeota
## 02d8c10ed712b0f49cc734646de936f9 d__Archaea    p__Crenarchaeota
##                                                Class                  Order
## 000c465694b030b575b0399f2c5fdd29  c__Methanomicrobia  o__Methanomicrobiales
## 00af279a7dcd6d0cab8f388df82f2075     c__Odinarchaeia        o__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080  c__Methanomicrobia  o__Methanomicrobiales
## 020bf741caada7c0a497b09e997463e2  c__Methanomicrobia  o__Methanomicrobiales
## 02c6990d44eaa5a670e6d9b15d02501d     c__Nanoarchaeia     o__Woesearchaeales
## 02d8c10ed712b0f49cc734646de936f9  c__Nitrososphaeria   o__Nitrososphaerales
##                                                   Family
## 000c465694b030b575b0399f2c5fdd29  f__Methanospirillaceae
## 00af279a7dcd6d0cab8f388df82f2075         f__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080    f__Methanoregulaceae
## 020bf741caada7c0a497b09e997463e2    f__Methanoregulaceae
## 02c6990d44eaa5a670e6d9b15d02501d         f__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9   f__Nitrososphaeraceae
##                                                         Genus
## 000c465694b030b575b0399f2c5fdd29          g__Methanospirillum
## 00af279a7dcd6d0cab8f388df82f2075              g__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080             g__Methanoregula
## 020bf741caada7c0a497b09e997463e2              g__Methanolinea
## 02c6990d44eaa5a670e6d9b15d02501d              g__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9  g__Candidatus_Nitrocosmicus
##                                                       Species
## 000c465694b030b575b0399f2c5fdd29                         <NA>
## 00af279a7dcd6d0cab8f388df82f2075         s__uncultured_marine
## 015c2c21f0b0e140796b2d0a2fa72080   s__Methanoregula_formicica
## 020bf741caada7c0a497b09e997463e2                         <NA>
## 02c6990d44eaa5a670e6d9b15d02501d  s__uncultured_euryarchaeote
## 02d8c10ed712b0f49cc734646de936f9                         <NA>

First check that the inputs are in compatible formats by checking for ASV names with the phyloseq function, taxa_names

head(taxa_names(TAX))

## [1] "000c465694b030b575b0399f2c5fdd29" "00af279a7dcd6d0cab8f388df82f2075"
## [3] "015c2c21f0b0e140796b2d0a2fa72080" "020bf741caada7c0a497b09e997463e2"
## [5] "02c6990d44eaa5a670e6d9b15d02501d" "02d8c10ed712b0f49cc734646de936f9"

head(taxa_names(ASV))

## [1] "000c465694b030b575b0399f2c5fdd29" "00af279a7dcd6d0cab8f388df82f2075"
## [3] "015c2c21f0b0e140796b2d0a2fa72080" "020bf741caada7c0a497b09e997463e2"
## [5] "02c6990d44eaa5a670e6d9b15d02501d" "02d8c10ed712b0f49cc734646de936f9"

And check sample names were also detected

head(sample_names(ASV))

## [1] "RACEKX3"  "SANPEDRO" "STOD4"    "BALK10C"  "ITA01A"   "IZR"

head(sample_names(META))

## [1] "RACEKX3"  "STOD4"    "SANPEDRO" "KIEL9"    "IZR"      "BALK10C"

Make one phyloseq object, which contains all 4 objects:

ps <- phyloseq(ASV, TAX,    META)

Check some features of the phyloseq object

rank_names(ps)

## [1] "Kingdom" "Phylum"  "Class"   "Order"   "Family"  "Genus"   "Species"

unique(tax_table(ps)[, "Kingdom"])

## Taxonomy Table:     [3 taxa by 1 taxonomic ranks]:
##                                  Kingdom       
## 000c465694b030b575b0399f2c5fdd29 "d__Archaea"  
## 278a7abeb2fa129cd7e5a4dd13bde4cc "d__Eukaryota"
## 304d7ca27eeb601ca2061dbc9e221080 "Unassigned"

table(tax_table(ps)[, "Kingdom"], exclude = NULL)

## 
##   d__Archaea d__Eukaryota   Unassigned 
##          191            1            1

remove Eukaryota and Unassigned (and Bacteria I only need Archaea)

ps <- subset_taxa(ps, !is.na(Kingdom) & !Kingdom %in% c("Unassigned", "d__Eukaryota"))
table(tax_table(ps)[, "Kingdom"], exclude = NULL)

## 
## d__Archaea 
##        191

now I have only archaeal sequences in my data

Filtering out NA taxa

check for NA or low abundant Phyla

table(tax_table(ps)[, "Phylum"], exclude = NULL)

## 
##     p__Altiarchaeota   p__Asgardarchaeota     p__Crenarchaeota 
##                    3                    8                   28 
##     p__Euryarchaeota     p__Halobacterota     p__Micrarchaeota 
##                    8                   75                    4 
##     p__Nanoarchaeota  p__Thermoplasmatota                 <NA> 
##                   43                   13                    9

Filter out NA Phyla

ps <- subset_taxa(ps, !is.na(Phylum) & !Phylum %in% c(""))
table(tax_table(ps)[, "Phylum"], exclude = NULL)

## 
##     p__Altiarchaeota   p__Asgardarchaeota     p__Crenarchaeota 
##                    3                    8                   28 
##     p__Euryarchaeota     p__Halobacterota     p__Micrarchaeota 
##                    8                   75                    4 
##     p__Nanoarchaeota  p__Thermoplasmatota 
##                   43                   13

check for NA or low abundant Classes

table(tax_table(ps)[, "Class"], exclude = NULL)

## 
##     c__Altiarchaeia    c__Bathyarchaeia     c__Halobacteria     c__Lokiarchaeia 
##                   3                  16                   1                   4 
##  c__Methanobacteria    c__Methanocellia  c__Methanomicrobia  c__Methanosarcinia 
##                   7                   2                  45                  27 
##     c__Micrarchaeia     c__Nanoarchaeia  c__Nitrososphaeria     c__Odinarchaeia 
##                   4                  43                  12                   1 
##      c__Thermococci   c__Thermoplasmata                <NA> 
##                   1                  13                   3

Remove NA Classes

ps <- subset_taxa(ps, !is.na(Class) & !Class %in% c(""))
table(tax_table(ps)[, "Class"], exclude = NULL)

## 
##     c__Altiarchaeia    c__Bathyarchaeia     c__Halobacteria     c__Lokiarchaeia 
##                   3                  16                   1                   4 
##  c__Methanobacteria    c__Methanocellia  c__Methanomicrobia  c__Methanosarcinia 
##                   7                   2                  45                  27 
##     c__Micrarchaeia     c__Nanoarchaeia  c__Nitrososphaeria     c__Odinarchaeia 
##                   4                  43                  12                   1 
##      c__Thermococci   c__Thermoplasmata 
##                   1                  13

Check for NA in Order

table(tax_table(ps)[, "Order"], exclude = NULL)

## 
##           o__Altiarchaeales            o__Bathyarchaeia 
##                           3                          16 
##           o__Halobacterales             o__Lokiarchaeia 
##                           1                           4 
##       o__Methanobacteriales          o__Methanocellales 
##                           7                           2 
##     o__Methanofastidiosales  o__Methanomassiliicoccales 
##                           1                          13 
##       o__Methanomicrobiales       o__Methanosarciniales 
##                          45                          27 
##           o__Micrarchaeales         o__Nitrosopumilales 
##                           4                           8 
##        o__Nitrososphaerales             o__Odinarchaeia 
##                           4                           1 
##          o__Woesearchaeales 
##                          43

Check NA Family

table(tax_table(ps)[, "Family"], exclude = NULL)

## 
##           f__Altiarchaeaceae             f__Bathyarchaeia 
##                            3                           16 
##              f__CG1-02-32-21              f__CG1-02-57-44 
##                            1                            3 
##                    f__GW2011            f__Haloferacaceae 
##                            2                            1 
##              f__Lokiarchaeia       f__Methanobacteriaceae 
##                            4                            7 
##          f__Methanocellaceae     f__Methanofastidiosaceae 
##                            2                            1 
##  f__Methanomassiliicoccaceae   f__Methanomethylophilaceae 
##                           11                            2 
##        f__Methanomicrobiales         f__Methanoregulaceae 
##                            1                           33 
##          f__Methanosaetaceae        f__Methanosarcinaceae 
##                           10                           17 
##       f__Methanospirillaceae            f__Micrarchaeales 
##                            9                            3 
##         f__Nitrosopumilaceae        f__Nitrososphaeraceae 
##                            8                            4 
##              f__Odinarchaeia           f__Woesearchaeales 
##                            1                           35 
##                         <NA> 
##                            5

Filter out NA Family

ps <- subset_taxa(ps, !is.na(Family) & !Family %in% c(""))
table(tax_table(ps)[, "Family"], exclude = NULL)

## 
##           f__Altiarchaeaceae             f__Bathyarchaeia 
##                            3                           16 
##              f__CG1-02-32-21              f__CG1-02-57-44 
##                            1                            3 
##                    f__GW2011            f__Haloferacaceae 
##                            2                            1 
##              f__Lokiarchaeia       f__Methanobacteriaceae 
##                            4                            7 
##          f__Methanocellaceae     f__Methanofastidiosaceae 
##                            2                            1 
##  f__Methanomassiliicoccaceae   f__Methanomethylophilaceae 
##                           11                            2 
##        f__Methanomicrobiales         f__Methanoregulaceae 
##                            1                           33 
##          f__Methanosaetaceae        f__Methanosarcinaceae 
##                           10                           17 
##       f__Methanospirillaceae            f__Micrarchaeales 
##                            9                            3 
##         f__Nitrosopumilaceae        f__Nitrososphaeraceae 
##                            8                            4 
##              f__Odinarchaeia           f__Woesearchaeales 
##                            1                           35

Check NA Genus

table(tax_table(ps)[, "Genus"], exclude = NULL)

## 
##                           g__AR15                  g__Bathyarchaeia 
##                                 2                                16 
##        g__Candidatus_Altiarchaeum  g__Candidatus_Methanofastidiosum 
##                                 3                                 1 
##       g__Candidatus_Nitrocosmicus      g__Candidatus_Nitrosopumilus 
##                                 2                                 4 
##       g__Candidatus_Nitrosotenuis                   g__CG1-02-32-21 
##                                 1                                 1 
##                   g__CG1-02-57-44                     g__Halogranum 
##                                 3                                 1 
##                   g__Lokiarchaeia               g__Methanobacterium 
##                                 4                                 7 
##               g__Methanococcoides                   g__Methanolinea 
##                                 1                                 2 
##                   g__Methanolobus          g__Methanomassiliicoccus 
##                                 5                                 2 
##           g__Methanomethylovorans                  g__Methanoregula 
##                                 6                                31 
##                   g__Methanosaeta                 g__Methanosarcina 
##                                10                                 5 
##               g__Methanospirillum                 g__Micrarchaeales 
##                                 9                                 3 
##              g__Nitrosopumilaceae             g__Nitrososphaeraceae 
##                                 2                                 2 
##                   g__Odinarchaeia                 g__Rice_Cluster_I 
##                                 1                                 1 
##                     g__uncultured                g__Woesearchaeales 
##                                10                                35 
##                              <NA> 
##                                 4

Filter out NA Genus

ps <- subset_taxa(ps, !is.na(Genus) & !Genus %in% c(""))
table(tax_table(ps)[, "Genus"], exclude = NULL)

## 
##                           g__AR15                  g__Bathyarchaeia 
##                                 2                                16 
##        g__Candidatus_Altiarchaeum  g__Candidatus_Methanofastidiosum 
##                                 3                                 1 
##       g__Candidatus_Nitrocosmicus      g__Candidatus_Nitrosopumilus 
##                                 2                                 4 
##       g__Candidatus_Nitrosotenuis                   g__CG1-02-32-21 
##                                 1                                 1 
##                   g__CG1-02-57-44                     g__Halogranum 
##                                 3                                 1 
##                   g__Lokiarchaeia               g__Methanobacterium 
##                                 4                                 7 
##               g__Methanococcoides                   g__Methanolinea 
##                                 1                                 2 
##                   g__Methanolobus          g__Methanomassiliicoccus 
##                                 5                                 2 
##           g__Methanomethylovorans                  g__Methanoregula 
##                                 6                                31 
##                   g__Methanosaeta                 g__Methanosarcina 
##                                10                                 5 
##               g__Methanospirillum                 g__Micrarchaeales 
##                                 9                                 3 
##              g__Nitrosopumilaceae             g__Nitrososphaeraceae 
##                                 2                                 2 
##                   g__Odinarchaeia                 g__Rice_Cluster_I 
##                                 1                                 1 
##                     g__uncultured                g__Woesearchaeales 
##                                10                                35

or you can rename not filter out NA taxonomic classification

If there are other NA’s in you taxonomy table then do this

tax_table(ps)[is.na(tax_table(ps)[,5])] <- "f__"
tax_table(ps)[is.na(tax_table(ps)[,6])] <- "g__"

# Then you can convert those unclassified such as p__ without phylum name to this or skip this part
tax_table(ps)[tax_table(ps)[,"Family"]== "f__", "Family"] <- "Unknown_Family"
tax_table(ps)[tax_table(ps)[,"Genus"]== "g__", "Genus" ] <- "Unknown_Genus"

Check the filtered tax table

View(ps@tax_table)

Make relative abundances

phyloseq_relative <- transform_sample_counts(ps, function(x) x / sum(x))

Filter based on abundance

flista<-filterfun(kOverA(1,0.15))
a <- filter_taxa(phyloseq_relative, flista)
filtered_phyloseq <- prune_taxa(a,ps)
View(filtered_phyloseq@tax_table)

Filter based on taxonomy

only include Methanobacteriales, Methanomicrobiales etc. to leave only symbionts

filtered_phyloseq_methanogens <- subset_taxa(filtered_phyloseq, Order==“Methanomicrobiales” | Order==“Methanobacteriales” | Order==“Methanosarciniales”) View(filtered_phyloseq_methanogens@tax_table)


rename and clean the taxonomy table after filtering


```r
#clean up taxa table
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="g__",replacement="")
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="f__",replacement="")
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="o__",replacement="")
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="c__",replacement="")
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="p__",replacement="")
tax_table(ps)[,colnames(tax_table(ps))] <- gsub(tax_table(ps)[,colnames(tax_table(ps))],pattern="d__",replacement="")

check the taxonomy table

View(ps@tax_table)

after all the filtering, rename the ASVs

rownames(filtered_phyloseq@otu_table) <- 
  case_when(rownames(filtered_phyloseq@otu_table)=="015c2c21f0b0e140796b2d0a2fa72080"  ~  "Methanoregula formicica",
        rownames(filtered_phyloseq@otu_table)=="0eb6233014f786a4a526856e4a18324a"  ~  "Methanobacterium ASV 1",
            rownames(filtered_phyloseq@otu_table)=="1d74bcf4e7d652412f786ee2d65a4a0f"  ~  "Methanosaeta ASV 1",
        rownames(filtered_phyloseq@otu_table)=="2235b1f103e24aa46eebb31db8e2428d"  ~  "Methanoregula ASV 1",
        rownames(filtered_phyloseq@otu_table)=="61cbae61dd0773faa39783c90c1f3fe6"  ~  "Methanobacteriales",
            rownames(filtered_phyloseq@otu_table)=="655dba046ff74bc0d4200bc5730eb407"  ~  "Methanoregula ASV 2",
            rownames(filtered_phyloseq@otu_table)=="a90aa8573c921a842c3022f35c9f794e"  ~  "Methanoregula ASV 3",
        rownames(filtered_phyloseq@otu_table)=="aa0c6b48066ab0ba40dddf93359c2b18"  ~  "Methanobacterium ASV 2",
        rownames(filtered_phyloseq@otu_table)=="acdd6e53ca593f84c5ba3ebf3221acd7"  ~  "Methanoregula ASV 4",
            rownames(filtered_phyloseq@otu_table)=="b83a52fc891dbd06476c103fcb667b6b"  ~  "Methanobacterium ASV 3",
            rownames(filtered_phyloseq@otu_table)=="c6beaabd9c67491b6914cdd516379e95"  ~  "Methanoregula ASV 5",
            rownames(filtered_phyloseq@otu_table)=="e6d1121872022926ff0d1ac375ebb6f5"  ~  "Methanolobus bombayensis",
            rownames(filtered_phyloseq@otu_table)=="e7146984aeb97f06a4d5089412447d36"  ~  "Methanomethylovorans ASV 1",
        rownames(filtered_phyloseq@otu_table)=="ead6478a925b701c77a9cace2f4924ba"  ~  "Methanomethylovorans ASV 2",

            TRUE ~ rownames(filtered_phyloseq@otu_table))

# now figure out how to get a column in the taxonomy file called "symbiont" which only has those IDs in it (but does not have 
# anything for the features which are not labelled). probably have to do this manually 
# import back
count_table<-read_tsv(file="~/export/table_only_symbionts.tsv")

## Rows: 14 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (1): OTU_ID
## dbl (11): RACEKX3, SANPEDRO, STOD4, BALK10C, ITA01A, IZR, IZRMEDIA, KIEL9, N...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

count_table<-column_to_rownames(count_table, var=colnames(count_table)[1])
View(count_table)
taxonomy <- read_tsv(file="~/export/taxonomy_symbiont_ASV2.tsv")

## Rows: 193 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): Feature ID, Taxon, Type
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(taxonomy)
asv_fasta <- read.FASTA(file="~/export/dna-sequences.fasta")
sample_info_tab<-read_tsv(file="~/export/metadata_more_info.tsv")

## Rows: 11 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): sample-ID, type, clade
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

taxonomy <-  taxonomy %>%
  separate(Taxon, into=c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus","Species"), sep=";") %>%
 column_to_rownames(var = 'Feature ID') # specify that the "Feature ID" column of data are rownames

## Warning: Expected 7 pieces. Missing pieces filled with `NA` in 124 rows [1, 4,
## 6, 7, 10, 11, 12, 13, 16, 17, 20, 21, 22, 25, 27, 28, 30, 32, 33, 36, ...].

head(taxonomy)

##                                     Kingdom              Phylum
## 000c465694b030b575b0399f2c5fdd29 d__Archaea    p__Halobacterota
## 00af279a7dcd6d0cab8f388df82f2075 d__Archaea  p__Asgardarchaeota
## 015c2c21f0b0e140796b2d0a2fa72080 d__Archaea    p__Halobacterota
## 020bf741caada7c0a497b09e997463e2 d__Archaea    p__Halobacterota
## 02c6990d44eaa5a670e6d9b15d02501d d__Archaea    p__Nanoarchaeota
## 02d8c10ed712b0f49cc734646de936f9 d__Archaea    p__Crenarchaeota
##                                                Class                  Order
## 000c465694b030b575b0399f2c5fdd29  c__Methanomicrobia  o__Methanomicrobiales
## 00af279a7dcd6d0cab8f388df82f2075     c__Odinarchaeia        o__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080  c__Methanomicrobia  o__Methanomicrobiales
## 020bf741caada7c0a497b09e997463e2  c__Methanomicrobia  o__Methanomicrobiales
## 02c6990d44eaa5a670e6d9b15d02501d     c__Nanoarchaeia     o__Woesearchaeales
## 02d8c10ed712b0f49cc734646de936f9  c__Nitrososphaeria   o__Nitrososphaerales
##                                                   Family
## 000c465694b030b575b0399f2c5fdd29  f__Methanospirillaceae
## 00af279a7dcd6d0cab8f388df82f2075         f__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080    f__Methanoregulaceae
## 020bf741caada7c0a497b09e997463e2    f__Methanoregulaceae
## 02c6990d44eaa5a670e6d9b15d02501d         f__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9   f__Nitrososphaeraceae
##                                                         Genus
## 000c465694b030b575b0399f2c5fdd29          g__Methanospirillum
## 00af279a7dcd6d0cab8f388df82f2075              g__Odinarchaeia
## 015c2c21f0b0e140796b2d0a2fa72080             g__Methanoregula
## 020bf741caada7c0a497b09e997463e2              g__Methanolinea
## 02c6990d44eaa5a670e6d9b15d02501d              g__CG1-02-57-44
## 02d8c10ed712b0f49cc734646de936f9  g__Candidatus_Nitrocosmicus
##                                                       Species     Type
## 000c465694b030b575b0399f2c5fdd29                         <NA>     <NA>
## 00af279a7dcd6d0cab8f388df82f2075         s__uncultured_marine     <NA>
## 015c2c21f0b0e140796b2d0a2fa72080   s__Methanoregula_formicica symbiont
## 020bf741caada7c0a497b09e997463e2                         <NA>     <NA>
## 02c6990d44eaa5a670e6d9b15d02501d  s__uncultured_euryarchaeote     <NA>
## 02d8c10ed712b0f49cc734646de936f9                         <NA>     <NA>

ASV = otu_table(data.frame(count_table), taxa_are_rows=TRUE)
TAX = tax_table(as.matrix(taxonomy))
META= sample_data(data.frame(sample_info_tab, row.names = sample_info_tab$'sample-ID'))
# make phyloseq object
ps2 <- phyloseq(ASV, TAX, META)
ps2_rel <- transform_sample_counts(ps, function(x) x / sum(x))

#clean up taxa table
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="g__",replacement="")
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="f__",replacement="")
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="o__",replacement="")
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="c__",replacement="")
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="p__",replacement="")
tax_table(ps2)[,colnames(tax_table(ps2))] <- gsub(tax_table(ps2)[,colnames(tax_table(ps2))],pattern="d__",replacement="")

# transform to relative abundance
ra_ps2 <- transform_sample_counts(ps2, function(x) x / sum(x))

plot abundances and relative abundances of all samples

# barplot
plot_bar(ps2,fill="Genus")

plot_bar(ra_ps2,fill="Genus", facet_grid=~type) + scale_fill_brewer(palette = "Paired")

plot_bar(ra_ps2,fill="Genus")+facet_grid(rows="type",scales="free_y", space="free")+coord_flip()+scale_fill_brewer(palette="Paired")

plot relative ASV abundances, group by clades

plot_bar(ra_ps2, fill = "Genus")+
  facet_grid(rows="clade",scales="free_y",space="free")+coord_flip()+theme(strip.text.y=element_text(angle=360))+
  ylab("Relative abundance") + theme(axis.title = element_text(size=10, face="bold")) +
  xlab("Clade") +
  theme(axis.text.x = element_blank(), axis.text.y = element_text(size=5)) + 
  theme(legend.text = element_text(size = 7)) + theme(legend.title = element_text(size = 10, face="bold")) + 
  theme(strip.text.x = element_text(size = 5, face="bold")) + 
  theme(legend.key = element_rect(size = 0.4)) + theme(legend.position="bottom")+scale_fill_brewer(palette="Paired")

## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.

plot relative abundances, group by media/symbiont sample

plot_bar(ra_ps2, fill = "Genus")+
  facet_grid(rows="type",scales="free_y",space="free")+coord_flip()+
  ylab("Relative abundance") + theme(axis.title = element_text(size=10, face="bold")) +
  xlab("Strain") +
  theme(axis.text.x = element_blank(), axis.text.y = element_text(size=7)) + 
  theme(legend.text = element_text(size = 7)) + theme(legend.title = element_text(size = 10, face="bold")) + 
  theme(strip.text.x = element_text(size=7, face="bold")) + 
  theme(legend.key = element_rect(size=5)) + theme(legend.position="bottom")+scale_fill_brewer(palette="Paired")

plot relative abundances, grouped by clade, vertical, different colors

plot_bar(ra_ps2, fill = "Genus")+facet_grid(cols=vars(clade),scales="free_x",space="free")+
  ylab("Relative abundance") + theme(axis.title = element_text(size=12, face="bold")) + xlab("Clade") + theme(axis.text.x = element_text(size=5), axis.text.y = element_text(size=7)) +
  theme(legend.text = element_text(size = 7)) + theme(legend.title = element_text(size = 13, face="bold")) + 
  theme(strip.text.x = element_text(size = 10, face="italic")) + 
  theme(legend.key = element_rect(size = 0.4)) + theme(legend.position="bottom") +
  scale_fill_brewer(palette = "Set2")

let’s check overall how the orders are distributed among samples using phyloseq

# First aglomerate the ASVs at the order level using the phyloseq function, tax_glom
phylumGlommed = tax_glom(ps2, "Order")
# and plot
plot_bar(phylumGlommed, x = "Sample", fill = "Order") +
  scale_fill_brewer(palette = "Set2")

Finally we are ready to try some ordinations

What is an ordination? Ordination is a collective term for multivariate techniques which summarize a multidimensional dataset in such a way that when it is projected onto a low dimensional space, any intrinsic pattern the data may possess becomes apparent upon visual inspection (Pielou, 1984).

The first one to try here will be a principal coordinate analysis (PCoA), PCoA tries to represent the distance between objects by projecting their dissimilarity into a 2- or 3-D (Euclidean) space. We will using the “bray” method (Bray-Curtis) to calculate the dissimilarity matrix. You can build the dissimilarity method directly into the PCoA calculation, but first let’s do it on it’s own to check what the matrix looks like:

ps_dist <- distance(ps2, method = "bray") 
ps_dist

##            RACEKX3  SANPEDRO     STOD4   BALK10C    ITA01A       IZR  IZRMEDIA
## SANPEDRO 0.9958338                                                            
## STOD4    0.7963143 1.0000000                                                  
## BALK10C  1.0000000 1.0000000 1.0000000                                        
## ITA01A   1.0000000 1.0000000 1.0000000 1.0000000                              
## IZR      1.0000000 1.0000000 1.0000000 0.9895343 1.0000000                    
## IZRMEDIA 1.0000000 1.0000000 1.0000000 0.9964066 1.0000000 0.8543534          
## KIEL9    1.0000000 1.0000000 1.0000000 0.9879284 1.0000000 0.4496730 0.9958080
## NA1      1.0000000 1.0000000 1.0000000 0.9608021 1.0000000 0.9965628 0.9882167
## NA1MEDIA 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 0.9361360 0.8220641
## SLOV3A   1.0000000 1.0000000 1.0000000 0.9637462 1.0000000 0.9022039 0.9953330
##              KIEL9       NA1  NA1MEDIA
## SANPEDRO                              
## STOD4                                 
## BALK10C                               
## ITA01A                                
## IZR                                   
## IZRMEDIA                              
## KIEL9                                 
## NA1      1.0000000                    
## NA1MEDIA 0.9408855 0.9893430          
## SLOV3A   0.8741132 0.9977072 0.9765020

Next plot the PCoA

out.pcoa <- ordinate(ps2, method = "PCoA", distance = "bray")
pcoa_plot = plot_ordination(ps2, out.pcoa, color="clade", shape = "type") +
  geom_point(size = 3) 
pcoa_plot

Next, I will demonstrate a weighted UniFrac PCoA. When calculating the distance matrix, this takes into account how many related taxa are shared among each pair of samples using the phylogenetic tree (which we embedded in the phyloseq object). It then performs the PCoA:

out.pcoa <- ordinate(ps, method = “PCoA”, distance = “wunifrac”) wuf_pcoa_plot = plot_ordination(ps, out.pcoa, color =“sample-id”, shape = “type”) + geom_point(size = 3) wuf_pcoa_plot