knitr
Optionsknitr::opts_chunk$set(cache = TRUE)
knitr::opts_chunk$set(fig.align = "center")
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(results = "hold")
R
Librarieslibrary(curatedMetagenomicData)
library(magrittr)
library(DT)
library(dplyr)
library(ggplot2)
library(tibble)
DT
Optionsoptions(DT.options = list(dom = 'Bfrtip', buttons = c('copy', 'csv', 'excel')))
combined_metadata[1:1000, 1:4] %>%
datatable(rownames = FALSE, extensions = 'Buttons')
combined_metadata %>%
colnames()
## [1] "dataset_name"
## [2] "sampleID"
## [3] "subjectID"
## [4] "body_site"
## [5] "antibiotics_current_use"
## [6] "study_condition"
## [7] "disease"
## [8] "age"
## [9] "infant_age"
## [10] "age_category"
## [11] "gender"
## [12] "country"
## [13] "non_westernized"
## [14] "sequencing_platform"
## [15] "DNA_extraction_kit"
## [16] "PMID"
## [17] "number_reads"
## [18] "number_bases"
## [19] "minimum_read_length"
## [20] "median_read_length"
## [21] "pregnant"
## [22] "lactating"
## [23] "NCBI_accession"
## [24] "BMI"
## [25] "antibiotics_family"
## [26] "momeducat"
## [27] "alcohol"
## [28] "flg-genotype"
## [29] "disease_subtype"
## [30] "hdl"
## [31] "triglycerides"
## [32] "hba1c"
## [33] "ldl"
## [34] "tnm"
## [35] "body_subsite"
## [36] "visit_number"
## [37] "days_from_first_collection"
## [38] "c-peptide"
## [39] "family"
## [40] "cholesterol"
## [41] "glucose"
## [42] "mumps"
## [43] "adiponectin"
## [44] "insulin(cat)"
## [45] "fgf-19"
## [46] "hscrp"
## [47] "leptin"
## [48] "glutamate_decarboxylase_2_antibody"
## [49] "creatinine"
## [50] "il-1"
## [51] "cd163"
## [52] "glp-1"
## [53] "hitchip_probe_class"
## [54] "hitchip_probe_number"
## [55] "protein_intake"
## [56] "days_after_onset"
## [57] "stec_count"
## [58] "shigatoxin_2_elisa"
## [59] "stool_texture"
## [60] "ferm_milk_prod_consumer"
## [61] "mgs_richness"
## [62] "location"
## [63] "dyastolic_p"
## [64] "systolic_p"
## [65] "prothrombin_time"
## [66] "creatine"
## [67] "inr"
## [68] "ctp"
## [69] "albumine"
## [70] "bilubirin"
## [71] "smoker"
## [72] "ever_smoker"
## [73] "birth_control_pil"
## [74] "hla_drb12"
## [75] "hla_dqa12"
## [76] "hla_dqa11"
## [77] "hla_drb11"
## [78] "start_solidfood"
## [79] "ajcc"
## [80] "fobt"
select
Functionnot_all_na <- function(x) {
all(!is.na(x))
}
bmi_metadata <-
combined_metadata %>%
filter(!is.na(BMI)) %>%
filter(body_site == "stool") %>%
select_if(not_all_na)
bmi_metadata %>%
select(dataset_name, sampleID, BMI) %>%
datatable(rownames = FALSE)
bmi_metadata %>%
ggplot(aes(BMI)) +
geom_density() +
theme_minimal()
bmi_datasets <-
unique(bmi_metadata$dataset_name) %>%
paste0(".metaphlan_bugs_list.stool") %>%
curatedMetagenomicData(dryrun = FALSE)
bmi_datasets[1]
bmi_datasets[[1]]
## List of length 1
## names(1): FengQ_2015.metaphlan_bugs_list.stool
## ExpressionSet (storageMode: lockedEnvironment)
## assayData: 1547 features, 154 samples
## element names: exprs
## protocolData: none
## phenoData
## sampleNames: SID31766 SID31537 ... SID531663 (154 total)
## varLabels: subjectID body_site ... NCBI_accession (23 total)
## varMetadata: labelDescription
## featureData: none
## experimentData: use 'experimentData(object)'
## pubMedIds: 25758642
## Annotation:
beaumont_taxa <- c("k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales",
"k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Lachnospiraceae",
"k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae",
"k__Bacteria|p__Firmicutes|c__Clostridia|o__Clostridiales|f__Ruminococcaceae|g__Ruminococcus")
data.frame
from ExpressionSet
Objectseset_df <- function(eset_obj, eset_name) {
eset_name <- strsplit(eset_name, "\\.")[[1]][1]
exprs_df <-
eset_obj[featureNames(eset_obj) %in% beaumont_taxa, ] %>%
exprs() %>%
t() %>%
data.frame() %>%
rownames_to_column(var = "SampleID")
bmi_vec <- eset_obj$BMI
data.frame(eset_name, exprs_df, bmi_vec)
}
data.frame
dataset_names <- names(bmi_datasets)
meta_df <-
mapply(eset_df, bmi_datasets, dataset_names, SIMPLIFY = FALSE) %>%
Reduce(rbind, .) %>%
set_colnames(c("Study", "SampleID", "o__Clostridiales", "f__Lachnospiraceae", "f__Ruminococcaceae", "g__Ruminococcus", "BMI")) %>%
filter(!is.na(BMI))
meta_df %>%
datatable(rownames = FALSE)
model_df <- function(lm_obj, model_name) {
estimate <-
coef(lm_obj) %>%
data.frame() %>%
extract(2, )
lower_ci <-
confint(lm_obj) %>%
data.frame() %>%
extract(2, 1)
upper_ci <-
confint(lm_obj) %>%
data.frame() %>%
extract(2, 2)
data.frame(model_name, estimate, lower_ci, upper_ci)
}
o__Clostridiales <-
meta_df %$%
lm(BMI ~ o__Clostridiales) %>%
model_df("o__Clostridiales")
f__Lachnospiraceae <-
meta_df %$%
lm(BMI ~ f__Lachnospiraceae) %>%
model_df("f__Lachnospiraceae")
f__Ruminococcaceae <-
meta_df %$%
lm(BMI ~ f__Ruminococcaceae) %>%
model_df("f__Ruminococcaceae")
g__Ruminococcus <-
meta_df %$%
lm(BMI ~ g__Ruminococcus) %>%
model_df("g__Ruminococcus")
box_plot_order <-
c("o__Clostridiales", "f__Lachnospiraceae", "f__Ruminococcaceae",
"g__Ruminococcus") %>%
rev()
box_plot_df <-
list(o__Clostridiales, f__Lachnospiraceae, f__Ruminococcaceae,
g__Ruminococcus) %>%
Reduce(full_join, .) %>%
mutate(model_name = ordered(model_name, levels = box_plot_order))
box_plot_df %>%
ggplot(aes(model_name, estimate)) +
geom_point() +
geom_pointrange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_hline(aes(yintercept = 0), color = "gray", linetype = "dashed") +
coord_flip() +
theme_minimal() +
theme(axis.title.y = element_blank())
o__Clostridiales <-
meta_df %$%
lm(BMI ~ o__Clostridiales + Study) %>%
model_df("o__Clostridiales")
f__Lachnospiraceae <-
meta_df %$%
lm(BMI ~ f__Lachnospiraceae + Study) %>%
model_df("f__Lachnospiraceae")
f__Ruminococcaceae <-
meta_df %$%
lm(BMI ~ f__Ruminococcaceae + Study) %>%
model_df("f__Ruminococcaceae")
g__Ruminococcus <-
meta_df %$%
lm(BMI ~ g__Ruminococcus + Study) %>%
model_df("g__Ruminococcus")
box_plot_df <-
list(o__Clostridiales, f__Lachnospiraceae, f__Ruminococcaceae,
g__Ruminococcus) %>%
Reduce(full_join, .) %>%
mutate(model_name = ordered(model_name, levels = box_plot_order))
box_plot_df %>%
ggplot(aes(model_name, estimate)) +
geom_point() +
geom_pointrange(aes(ymin = lower_ci, ymax = upper_ci)) +
geom_hline(aes(yintercept = 0), color = "gray", linetype = "dashed") +
coord_flip() +
theme_minimal() +
theme(axis.title.y = element_blank())