Contents

1 Intro

Develop and evaluate the prediction models for recent antibiotics usage
Researchers often exclude participants with recent antibiotic use, but this may introduce selection bias by excluding the least healthy individuals. We hypothesize that antibiotic usage can be detected from the current microbiome status using the prediction models trained on microbiome datasets with recent antibiotics exposure. We will construct the prediction models and test how well they can infer probable antibiotic exposure from the gut microbiome. Using prediction models from this aim, researchers can stratify analyses or include participants who may have otherwise been excluded from the study.

1.1 Load packages

SIAMCAT is used for this analysis.

suppressPackageStartupMessages({
    library(curatedMetagenomicData)
    library(dplyr)
    library(mia)
    library(SIAMCAT)
    library(stringr)
    library(ggplot2)
    library(microbiomeABXR21) # temporary package for EDA
})

1.2 Script

This script requires studyName and abx inputs.

siamcat_cmd <- system.file("scripts/siamcat_cmd.R", 
                           package = "microbiomeABXR21")

2 Modeling

studyNames <- c("VincentC_2016", "RaymondF_2016")

## Metadata
meta <- curatedMetagenomicData::sampleMetadata
submeta <- meta %>% 
    filter(study_name == studyNames[1] | study_name == studyNames[2])

Prepare allData object

## Download data
allData <- returnSamples(submeta, "relative_abundance")
colData(allData)$study_name %>% table() # summarize the number of samples
## .
## RaymondF_2016 VincentC_2016 
##            72           229
assay(allData) <- assay(allData)/100 # percentage to decimals

## Clean ABX info
colData(allData)$abx <- colData(allData)$antibiotics_family
colData(allData)$abx[which(is.na(colData(allData)$abx))] <- "control"
abx_ls <- c("carbapenems", "cephalosporins", "fluoroquinolones",
            "glycopeptides", "macrolides", "nitroimidazoles",
            "penicillin", "sulfonamides")
for (abx_name in abx_ls) {
    ind <- which(colData(allData)$antibiotics_family %>% str_detect(., abx_name))
    colData(allData)$abx[ind] <- abx_name
}
## Check the ABX classes used
for (studyName in studyNames) {
    sub_ind <- colData(allData)$study_name == studyName
    res <- table(colData(allData)$abx[sub_ind])
    print(paste0("ABX classes in ", studyName, ":"))
    print(res)
}
## [1] "ABX classes in VincentC_2016:"
## 
##      carbapenems   cephalosporins          control fluoroquinolones 
##                5                2              196               10 
##       macrolides       penicillin 
##                2               14 
## [1] "ABX classes in RaymondF_2016:"
## 
## cephalosporins        control 
##             36             36

2.1 Raymond dataset

This script requires allData,studyName and abx inputs.

siamcat_cmd <- system.file("scripts/siamcat_cmd.R", 
                           package = "microbiomeABXR21")
## Required inputs
studyName <- "RaymondF_2016"
abx <- "cephalosporins"
source(siamcat_cmd)

2.2 Vincent dataset

## Required inputs
studyName <- "VincentC_2016"
abx <- "penicillin"
source(siamcat_cmd)

## Error from `check.confounders` step:      
# Error in ctrfn(levels(x), contrasts = contrasts) :
# orthogonal polynomials cannot be represented accurately enough for 97 degrees of freedom

3 Genus-Level Modeling

## Genus-level data
allData <- agglomerateByRank(allData, rank = "genus")

3.1 Raymond dataset

studyName <- "RaymondF_2016"
abx <- "cephalosporins"
source(siamcat_cmd)

3.2 Vincent dataset

studyName <- "VincentC_2016"
abx <- "penicillin"
source(siamcat_cmd)