library(bugSetEnrichment)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

1 Read in signatures

All signatures

full.dat <- importBugSigDB()
## Using cached version from 2021-03-26 15:06:02
## Using cached version from 2021-03-26 15:11:16
## Using cached version from 2021-03-26 15:11:26

2 Afroza Parvin

Extract all signatures by one curator (choosing only those of decreased abundance in group 1 for example). See argument tax.level in ?extractBugSigs to restrict e.g. to genus; default is all taxonomic levels.

my.dat <- full.dat[full.dat$Curator == "Mst Afroza Parvin", ]  # data subset
my.sigs <- extractBugSigs(my.dat, direction = "decreased") # signatures
my.siglengths <- sapply(my.sigs, length) #signature lengths
sort(my.siglengths, decreasing = TRUE)
##  PMID:31976177_2  PMID:29186529_1  PMID:30267022_1  PMID:30366118_1 
##               45               38               21               19 
##  PMID:31226994_2  PMID:31226994_1  PMID:25444008_2  PMID:25444008_3 
##               11               10                9                9 
##  PMID:25444008_4  PMID:29186529_3  PMID:30534614_1  PMID:25444008_1 
##                9                6                6                5 
##  PMID:31006170_1  PMID:31006170_4  PMID:29935270_1  PMID:25444008_9 
##                5                5                5                4 
##  PMID:31006170_2  PMID:30534614_2  PMID:25444008_7  PMID:31006170_3 
##                4                4                3                3 
##  PMID:29554907_1  PMID:29554907_2  PMID:30568265_1  PMID:32143621_2 
##                3                3                3                3 
##  PMID:32429742_1  PMID:25761741_1  PMID:25761741_2  PMID:26971496_1 
##                3                3                3                2 
##  PMID:28789705_1  PMID:28789705_2  PMID:25444008_6 PMID:25444008_10 
##                2                2                2                2 
##  PMID:24819377_1  PMID:32380969_1  PMID:32380969_2  PMID:31976177_1 
##                2                2                2                2 
##  PMID:30568265_2  PMID:30568265_3  PMID:31934770_1  PMID:22546742_1 
##                2                2                2                2 
##  PMID:26745497_1  PMID:29186529_2  PMID:26971496_2  PMID:28789705_3 
##                2                1                1                1 
##  PMID:25444008_5  PMID:25444008_8 PMID:25444008_11  PMID:32012716_1 
##                1                1                1                1 
##  PMID:31337807_1  PMID:31337807_2  PMID:31337807_3  PMID:31337807_4 
##                1                1                1                1 
##  PMID:30042470_1  PMID:28808302_1  PMID:28808302_2  PMID:26551842_1 
##                1                1                1                1 
##  PMID:26551842_2  PMID:26551842_3  PMID:32143621_1  PMID:23631345_1 
##                1                1                1                1 
##  PMID:23459324_1  PMID:23459324_2  PMID:29935270_2 
##                1                1                1

Relevant background data (may want to choose more carefully, this just chooses all signatures of the same body site(s))

relevant.dat <- full.dat[full.dat$`Body site` %in% my.dat$`Body site`, ]
relevant.sigs <- extractBugSigs(my.dat)
## Zero length signatures dropped:PMID:26745497_1

Perform the simulation. This is how frequent the most commonly observed taxon would be expected to be seen by chance when randomly drawing taxa from the “relevant background” (alpha = 0.05).

getCriticalN(relevant.sigs, my.siglengths)
## 95% 
##  11

And compare to the most frequently observed taxa:

frequencySigs(my.sigs)
##        g__Bacteroides f__Bifidobacteriaceae    g__Bifidobacterium 
##                    12                    10                     9 
##     f__Bacteroidaceae f__Porphyromonadaceae     p__Actinobacteria 
##                     7                     6                     5 
##     f__Clostridiaceae        g__Clostridium            g__Blautia 
##                     5                     5                     5 
##       g__Oscillospira 
##                     5

3 Cynthia Anderson

Extract all signatures by one curator (choosing only those of decreased abundance in group 1 for example). See argument tax.level in ?extractBugSigs to restrict e.g. to genus; default is all taxonomic levels.

curator <- "Cynthia Anderson"
bodysites <- c("cervicovaginal secretion", "endocervix", "posterior fornix of vagina", "uterine cervix", "vagina")
my.dat <- full.dat[full.dat$Curator == curator & full.dat$`Body site` %in% bodysites, ]  # data subset
my.sigs <- extractBugSigs(my.dat, direction = "decreased") # signatures
## Zero length signatures dropped:PMID:28860468_2 PMID:23717441_1
my.siglengths <- sapply(my.sigs, length) #signature lengths
sort(my.siglengths, decreasing = TRUE)
##  PMID:29765068_4  PMID:29765068_7  PMID:29765068_3  PMID:33241010_1 
##               23               20               19               19 
##  PMID:29765068_2  PMID:31555603_1  PMID:30986570_2  PMID:29765068_1 
##               16               15               14               12 
##  PMID:30463989_1  PMID:29563617_1  PMID:30986570_1  PMID:32842982_1 
##               11               10                9                9 
##  PMID:32842982_2  PMID:32842982_3  PMID:32842982_4  PMID:29765068_5 
##                9                9                9                8 
##  PMID:29765068_8  PMID:26935422_3  PMID:30782659_3  PMID:26062721_1 
##                8                7                6                6 
##  PMID:29765068_9  PMID:31370796_2  PMID:30782659_2  PMID:32332850_1 
##                6                6                5                5 
##  PMID:29313867_1  PMID:29313867_2  PMID:28860468_1 PMID:29765068_10 
##                5                5                4                4 
##  PMID:31749298_1  PMID:29955075_1  PMID:33241010_2  PMID:29765068_6 
##                4                4                4                3 
##  PMID:31370796_1  PMID:30782659_1  PMID:26574055_1  PMID:32214382_1 
##                3                2                2                2 
##  PMID:32214382_2  PMID:30463989_3  PMID:30463989_4  PMID:26935422_1 
##                2                2                2                2 
##  PMID:30405584_1  PMID:30841606_1  PMID:30841606_2  PMID:30463989_2 
##                2                2                2                1 
##  PMID:30463989_5  PMID:23717441_2  PMID:26935422_2  PMID:30405584_2 
##                1                1                1                1 
##  PMID:30405584_3  PMID:30405584_4 
##                1                1

Relevant background data (may want to choose more carefully, this just chooses all signatures of the same body site(s))

relevant.dat <- full.dat[full.dat$`Body site` %in% my.dat$`Body site`, ]
relevant.sigs <- extractBugSigs(my.dat)
## Zero length signatures dropped:PMID:28860468_4 PMID:23717441_2

Perform the simulation. This is how frequent the most commonly observed taxon would be expected to be seen by chance when randomly drawing taxa from the “relevant background” (alpha = 0.05).

getCriticalN(relevant.sigs, my.siglengths)
## 95% 
##  12

And compare to the most frequently observed taxa:

frequencySigs(my.sigs)
##            g__Lactobacillus                  c__Bacilli 
##                          16                          10 
##          o__Lactobacillales         f__Lactobacillaceae 
##                          10                           9 
##               p__Firmicutes s__Peptoniphilus grossensis 
##                           8                           7 
##          g__Bifidobacterium    s__Lactobacillus gasseri 
##                           6                           6 
##         o__Coriobacteriales        f__Coriobacteriaceae 
##                           5                           5