Develop and evaluate the prediction models for recent antibiotics usage
Researchers often exclude participants with recent antibiotic use, but this may introduce selection bias by excluding the least healthy individuals. We hypothesize that antibiotic usage can be detected from the current microbiome status using the prediction models trained on microbiome datasets with recent antibiotics exposure. We will construct the prediction models and test how well they can infer probable antibiotic exposure from the gut microbiome. Using prediction models from this aim, researchers can stratify analyses or include participants who may have otherwise been excluded from the study.
SIAMCAT is used for this analysis.
suppressPackageStartupMessages({
library(curatedMetagenomicData)
library(dplyr)
library(mia)
library(SIAMCAT)
library(stringr)
library(ggplot2)
library(microbiomeABXR21) # temporary package for EDA
})
This script requires studyName and abx inputs.
siamcat_cmd <- system.file("scripts/siamcat_cmd.R",
package = "microbiomeABXR21")
studyNames <- c("VincentC_2016", "RaymondF_2016")
## Metadata
meta <- curatedMetagenomicData::sampleMetadata
submeta <- meta %>%
filter(study_name == studyNames[1] | study_name == studyNames[2])
Prepare allData object
## Download data
allData <- returnSamples(submeta, "relative_abundance")
colData(allData)$study_name %>% table() # summarize the number of samples
## .
## RaymondF_2016 VincentC_2016
## 72 229
assay(allData) <- assay(allData)/100 # percentage to decimals
## Clean ABX info
colData(allData)$abx <- colData(allData)$antibiotics_family
colData(allData)$abx[which(is.na(colData(allData)$abx))] <- "control"
abx_ls <- c("carbapenems", "cephalosporins", "fluoroquinolones",
"glycopeptides", "macrolides", "nitroimidazoles",
"penicillin", "sulfonamides")
for (abx_name in abx_ls) {
ind <- which(colData(allData)$antibiotics_family %>% str_detect(., abx_name))
colData(allData)$abx[ind] <- abx_name
}
## Check the ABX classes used
for (studyName in studyNames) {
sub_ind <- colData(allData)$study_name == studyName
res <- table(colData(allData)$abx[sub_ind])
print(paste0("ABX classes in ", studyName, ":"))
print(res)
}
## [1] "ABX classes in VincentC_2016:"
##
## carbapenems cephalosporins control fluoroquinolones
## 5 2 196 10
## macrolides penicillin
## 2 14
## [1] "ABX classes in RaymondF_2016:"
##
## cephalosporins control
## 36 36
This script requires allData,studyName and abx inputs.
siamcat_cmd <- system.file("scripts/siamcat_cmd.R",
package = "microbiomeABXR21")
## Required inputs
studyName <- "RaymondF_2016"
abx <- "cephalosporins"
source(siamcat_cmd)
## Required inputs
studyName <- "VincentC_2016"
abx <- "penicillin"
source(siamcat_cmd)
## Error from `check.confounders` step:
# Error in ctrfn(levels(x), contrasts = contrasts) :
# orthogonal polynomials cannot be represented accurately enough for 97 degrees of freedom
## Genus-level data
allData <- agglomerateByRank(allData, rank = "genus")
studyName <- "RaymondF_2016"
abx <- "cephalosporins"
source(siamcat_cmd)
studyName <- "VincentC_2016"
abx <- "penicillin"
source(siamcat_cmd)