## Load packages
suppressPackageStartupMessages({
library(dplyr)
library(stringr)
library(ggplot2)
})
## Load metadata tables
dir <- system.file("extdata", package = "microbiomeABXR21")
meta_b <- read.table(file.path(dir, "Bokulich_metadata.txt"), sep = "\t", header = TRUE)
meta_k <- read.csv(file.path(dir, "Korry_metadata.csv"))
meta_vi <- read.csv(file.path(dir, "Vincent_metadata.csv"))
meta_y <- read.csv(file.path(dir, "Yassour_metadata.csv"))
meta_z <- read.csv(file.path(dir, "Zaura_metadata.csv"))
## Raymond dataset
meta <- curatedMetagenomicData::sampleMetadata
meta_r <- meta %>% filter(study_name == "RaymondF_2016")
meta_vi_wrong <- meta %>% filter(study_name == "VincentC_2016") # Chloe identified an issue
## Source the plotting script
code_dir <- system.file("scripts", package = "microbiomeABXR21")
source(file.path(code_dir, "plotMultiABX.R"))
dim(meta_b)
## [1] 1216 65
head(meta_b, 3)
## sample_name abx1_pmp_all_bymonth abx_all_sources abx_name abx_pmp_all
## 1 10249.C001.01SS <NA> 0 <NA> <NA>
## 2 10249.C001.02SS <NA> 0 <NA> <NA>
## 3 10249.C001.03SS pre 0 <NA> <NA>
## antiexposedall baby_sex collection_timestamp country
## 1 n female 2012-03-15 United States of America
## 2 n female 2012-04-20 United States of America
## 3 n female 2012-04-26 United States of America
## course_num day_of_life delivery description diet diet_2 diet_2_month
## 1 NA 0 Vaginal Vaginal.bd.n.0 bd bd bd_0
## 2 NA 36 Vaginal Vaginal.bd.n.0 bd bd bd_1
## 3 NA 42 Vaginal Vaginal.bd.n.0 bd bd bd_1
## diet_3 dna_extracted elevation empo_1 empo_2 empo_3
## 1 eb TRUE 10 Host-associated Animal Animal distal gut
## 2 eb TRUE 10 Host-associated Animal Animal distal gut
## 3 eb TRUE 10 Host-associated Animal Animal distal gut
## env_biome env_feature env_material env_package geo_loc_name
## 1 urban biome human-associated habitat feces human-gut USA:NY:NYC
## 2 urban biome human-associated habitat feces human-gut USA:NY:NYC
## 3 urban biome human-associated habitat feces human-gut USA:NY:NYC
## host_age host_age_normalized_years host_age_units host_body_habitat
## 1 not provided not provided years UBERON:feces
## 2 not provided not provided years UBERON:feces
## 3 not provided not provided years UBERON:feces
## host_body_mass_index host_body_product host_body_site host_common_name
## 1 not provided UBERON:feces UBERON:feces human
## 2 not provided UBERON:feces UBERON:feces human
## 3 not provided UBERON:feces UBERON:feces human
## host_height host_height_units host_scientific_name host_subject_id
## 1 not provided cm Homo sapiens 1
## 2 not provided cm Homo sapiens 1
## 3 not provided cm Homo sapiens 1
## host_taxid host_weight host_weight_units latitude lifestage longitude
## 1 9606 not provided kg 40.742 infant -73.974
## 2 9606 not provided kg 40.742 infant -73.974
## 3 9606 not provided kg 40.742 infant -73.974
## mom_child mom_courses_ofdeliveryabx mom_ld_abx mom_pre_post_ld
## 1 C na FALSE na
## 2 C na FALSE na
## 3 C na FALSE na
## mom_prenatal_abx mom_prenatal_abx_class mom_prenatal_abx_trimester month
## 1 FALSE na na 24
## 2 FALSE na na 28
## 3 FALSE na na 30
## month_of_life physical_specimen_location physical_specimen_remaining
## 1 24.1 NYUMC TRUE
## 2 28.1 NYUMC TRUE
## 3 30.1 NYUMC TRUE
## qiita_study_id sample_summary sample_type sampletype scientific_name
## 1 10249 Vaginal.bd.y.1 feces Dry_Stool human gut metagenome
## 2 10249 Vaginal.bd.y.1 feces Dry_Stool human gut metagenome
## 3 10249 Vaginal.bd.y.1 feces Dry_Stool human gut metagenome
## sex studyid taxon_id
## 1 female 1 408170
## 2 female 1 408170
## 3 female 1 408170
## title
## 1 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life
## 2 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life
## 3 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life
mapping_b <- read.table(file.path(dir, "Bokulich_mapping_file_corrected.txt"),
sep = "\t", header = FALSE)
head(mapping_b)
## V1 V2 V3 V4
## 1 10249.C001.01SS 10249.C001.01SS.fastq <NA> Vaginal.bd.n.0
## 2 10249.C001.02SS 10249.C001.02SS.fastq <NA> Vaginal.bd.n.0
## 3 10249.C001.03SS 10249.C001.03SS.fastq <NA> Vaginal.bd.n.0
## 4 10249.C001.04SS 10249.C001.04SS.fastq Cephalexin Vaginal.bd.y.1
## 5 10249.C001.05SS 10249.C001.05SS.fastq Cephalexin Vaginal.bd.y.1
## 6 10249.C001.06SS 10249.C001.06SS.fastq <NA> Vaginal.bd.y.1
table(mapping_b$V3)
##
## Amoxicillin Amoxicillin/Ciprodex Ampicillin/Gentamicin
## 47 3 3
## Augmentin Azithromycin Bactrim
## 7 12 1
## Cefadroxil Cefdinir Cefepime
## 2 10 2
## Cephalexin Ciprofloxacin Clindamycin
## 10 3 3
## Metronidazole Moxifloxacin Polytrim
## 3 5 6
Table S3. Prenatal antimicrobial use by class and purpose
Table S4. Peri-natal antibiotic use by class and indication
Table S5. Post-natal antimicrobial use by class and age of child
supT3 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 1)
supT4 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 2) %>% .[1:89, 1:6] # remove NAs
supT5 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 3)
head(supT3)
## Antibiotic Study.ID Indication Duration..Days. Class Type
## 1 Nitrofurantoin 2 UTIa 10 Nitrofuran oral
## 2 Penicillin 4 UTI 5 Beta-lactam oral
## 3 Amoxicillin 42 Bronchitis 10 Beta-lactam oral
## 4 Cephalosporin 46 Sinus infection 10 Cephalosporin oral
## 5 Azithromycin 49 Sinus infection 3 Macrolide oral
## 6 Azithromycin 55 Cold 10 Macrolide oral
head(supT4)
## Antibiotic Study_ID Subject Indication. Class Type
## 1 Erythromycin 1 Child ONP macrolide eye ointment
## 2 Clindamycin 2 Mother CNP lincosamide IV
## 3 Erythromycin 2 Child ONP macrolide eye ointment
## 4 Clindamycin 4 Mother CNP lincosamide IV
## 5 Erythromycin 4 Child ONP macrolide eye ointment
## 6 Cefazolin 5 Mother CNP beta-lactam IV
head(supT5)
## Antibiotic Study.ID Indication Exposure.Age..Month. Duration..Days.
## 1 Nitrofurantoin* 1 Mother UTI 1 7
## 2 Cephalexin* 1 Mother mastitis 2 7
## 3 Amoxicillin 1 ear infection 10 10
## 4 Valtrex* 1 Cold sore 12 1
## 5 Amoxicillin 1 ear infection 13 10
## 6 Metronidazole* 1 Mother GI infection 13 10
## Class
## 1 nitrofuran
## 2 cephalosporin
## 3 beta-lactam
## 4 anti-viral
## 5 beta-lactam
## 6 nitroimidazole
Child’s perinatal ABX exposure data (Sup.Table4) includes three ABXs. 4 children were exposed to three ABXs and 42 children were exposed to only one ABX.
supT4_c <- filter(supT4, Subject == "Child")
supT4_c %>%
group_by(Antibiotic) %>%
summarize(n = length(unique(Study_ID)))
## # A tibble: 3 × 2
## Antibiotic n
## <chr> <int>
## 1 Ampicillin 4
## 2 Erythromycin 46
## 3 Gentamicin 4
## Children exposed to only one perinatal ABX treatment
abx_summary <- supT4_c %>%
group_by(Study_ID) %>%
summarize(n = length(unique(Antibiotic)),
abx = paste(Antibiotic, collapse = " | "))
abx_summary %>%
filter(n == 1) %>%
group_by(abx) %>%
summarise(n = length(unique(Study_ID)))
## # A tibble: 1 × 2
## abx n
## <chr> <int>
## 1 Erythromycin 42
24 Children had postnatal ABX exposures.
## Subset Sup.Table5 to only child cases
mother_ind <- grep("Mother", supT5$Study.ID)
supT5_c <- supT5[-mother_ind,]
## The number of children exposed postnatal ABX
length(unique(supT5_c$Study.ID))
## [1] 24
15 children had postnatal ABX exposure to only one class of ABX. For multiple ABX exposure cases, the exposure time is spread out, so we can potentially use them as independent single exposure cases…?!
## Summary of Child's postnatal ABX exposure
supT5_c_summary <-
supT5_c %>%
group_by(Study.ID) %>%
summarize(n = length(unique(Class)),
abx = paste(Class, collapse = " | "))
supT5_c_summary
## # A tibble: 24 × 3
## Study.ID n abx
## <chr> <int> <chr>
## 1 1 4 beta-lactam | anti-viral | beta-lactam | beta-lactam | quinol…
## 2 10 4 anti-fungal | monoxycarbolic acid | beta-lactam | quinolone |…
## 3 11 2 beta-lactam | cephalosporin
## 4 12 2 macrolide | beta-lactam
## 5 16 2 NA | anti-fungal
## 6 17 1 monoxycarbolic acid
## 7 18 1 cephalosporin
## 8 20 3 NA | beta-lactam | NA
## 9 21 1 beta-lactam
## 10 24 1 cephalosporin
## # … with 14 more rows
sum(supT5_c_summary$n == 1) # 15 children had postnatal ABX exposure to only one class of ABX
## [1] 15
dim(meta_k)
## [1] 20 25
head(meta_k, 3)
## sample_id study_name subject_id body_site antibiotics_current_use
## 1 SRS4115648 KorryB_2020 DC10 cecum doxycycline
## 2 SRS4115647 KorryB_2020 DC09 cecum doxycycline
## 3 SRS4115645 KorryB_2020 DC01 cecum none
## study_condition disease age infant_age age_category gender country
## 1 doxycycline NA 6 weeks NA NA female USA
## 2 doxycycline NA 6 weeks NA NA female USA
## 3 none NA 6 weeks NA NA female USA
## non_westernized sequencing_platform DNA_extraction_kit PMID
## 1 NA IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit NA
## 2 NA IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit NA
## 3 NA IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit NA
## number_reads number_bases minimum_read_length median_read_length
## 1 NA 8992888352 NA 302
## 2 NA 11138027874 NA 302
## 3 NA 9049567108 NA 302
## NCBI_accession pregnant lactating curator type
## 1 SRR8291342 NA NA Chloe Mirzayi METAGENOMIC
## 2 SRR8291343 NA NA Chloe Mirzayi METAGENOMIC
## 3 SRR8291345 NA NA Chloe Mirzayi METAGENOMIC
Antibiotic class abbreviation in this dataset:
| Full name | Class |
|---|---|
| Amoxicillin | Penicillins |
| Ciprofloxacin | Fluoroquinolones |
| Doxycycline | Tetracyclines |
Based on the paper, there should be 4 each group, but metadata says there are 8 control mice…?
meta_k %>%
group_by(subject_id) %>%
summarize(abx = paste(study_condition, collapse = " | ")) %>%
group_by(abx) %>%
summarize(n = length(unique(subject_id)))
## # A tibble: 4 × 2
## abx n
## <chr> <int>
## 1 amoxicillin 4
## 2 ciprofloxacin 4
## 3 doxycycline 4
## 4 none 8
dim(meta_r)
## [1] 72 136
length(unique(meta_r$subject_id)) # 24 subjects
## [1] 24
All 24 participants have control at the baseline. 6 participants didn’t treated with any antibiotic.
meta_r %>%
group_by(subject_id) %>%
summarize(trt = paste(study_condition, collapse = " | "))
## # A tibble: 24 × 2
## subject_id trt
## <chr> <chr>
## 1 P10E control | cephalosporins | cephalosporins
## 2 P11E control | cephalosporins | cephalosporins
## 3 P12E control | cephalosporins | cephalosporins
## 4 P13E control | cephalosporins | cephalosporins
## 5 P14E control | cephalosporins | cephalosporins
## 6 P15E control | cephalosporins | cephalosporins
## 7 P17E control | cephalosporins | cephalosporins
## 8 P18E control | cephalosporins | cephalosporins
## 9 P19E control | cephalosporins | cephalosporins
## 10 P1E control | cephalosporins | cephalosporins
## # … with 14 more rows
Metadata is collected from the Supplementary Table 1 of the manuscript.
va_explanation <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 1)
va_med <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 2)
va_ab <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 3)
va_feeding <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 4)
Each row represents a unique 222 participant.
dim(va_med)
## [1] 222 110
length(unique(va_med$Participant)) # The number of participants
## [1] 222
class_mapping <- list(Amoxicillin = "Penicillins",
Azithromycin = "Macrolides",
`Clavulanic acid` = "Beta-lactamase inhibitor",
Trimethoprim = "Sulfonamides",
Sulfadiazine = "Sulfonamides", # Sulfadiazine is a short-acting sulfonamide.
Cefalexin = "Cephalosporins", # typo for Cephalexin?
Phenoxymethylpenicillin = "Penicillins",
Cefuroxime = "Cephalosporins",
Clarithromycin = "Macrolides",
Gentamicin = "Aminoglycosides",
Sulfamethoxazole = "Sulfonamides",
Cefprozil = "Cephalosporins",
Benzylpenicillin = "Penicillins",
Cefazolin = "Cephalosporins",
Ceftriaxone = "Cephalosporins",
Nifuroxazide = "Nitrofurans",
Azitromycin = "Macrolides", # it looks like a typo in metadata
`Systemic antibiotic NAS` = "Systemic antibiotic NAS",
Ampicillin = "Penicillins",
Cefadroxil = "Cephalosporins",
Cefexime = "Cephalosporins", # typo for Cefixime?
Cefotaxim = "Cephalosporins", # typo for Cefotaxime?
Cefotaxime = "Cephalosporins",
Furazidin = "Hydantoins",
Isoniazid = "antituberculosis agents",
Midecamycin = "Macrolides",
Nitrofurantoin = "Nitrofurans",
Trimetoprim = "Timethoprim") # not sure
table(unlist(class_mapping))
##
## Aminoglycosides antituberculosis agents Beta-lactamase inhibitor
## 1 1 1
## Cephalosporins Hydantoins Macrolides
## 9 1 4
## Nitrofurans Penicillins Sulfonamides
## 2 4 3
## Systemic antibiotic NAS Timethoprim
## 1 1
I combined all the antibiotics exposures, considering following cases as
independent data points:
1) multiple ABX treated at the same time
2) same ABX treated at the different time points
abx_names <- grep("Name", colnames(va_med), ignore.case = TRUE, value = TRUE)
sub <- va_med[,abx_names]
sub$abx_all <- NA
sub$abx_unique <- NA
sub$abx_unique_n <- 0
sub$abx_class_all <- NA
for (i in seq_len(nrow(sub))) {
## Collect all ABX info per participant
x <- sub[i, abx_names]
sub$abx_all[i] <- paste(x[!is.na(x)], collapse = " | ")
sub$abx_all[i] <- gsub("and", "|", sub$abx_all[i])
## Unique ABXs
unique_abx <- strsplit(sub$abx_all[i], " | ", fixed = TRUE) %>%
unlist %>%
str_replace("^\\w{1}", toupper) %>%
unique
sub$abx_unique[i] <- paste(unique_abx, collapse = " | ")
sub$abx_unique_n[i] <- length(unique_abx)
# Aggregate all ABX to the class level
if (length(unique_abx) != 0) {
abx_class <- unique(unlist(class_mapping[unique_abx]))
sub$abx_class_all[i] <- paste(abx_class, collapse = " | ")
}
}
Summary of all the ABX used in this study
strsplit(sub$abx_class_all, " | ", fixed = TRUE) %>%
unlist %>%
table %>%
sort(decreasing = TRUE)
## .
## Penicillins Cephalosporins Macrolides
## 122 37 36
## Beta-lactamase inhibitor Sulfonamides Aminoglycosides
## 28 24 7
## Nitrofurans Systemic antibiotic NAS antituberculosis agents
## 4 2 1
## Hydantoins Timethoprim
## 1 1
61% of the participants (136 out of 222) received antibiotics in the first 3
years of their lives. This is when we consider the following cases as a single
independent exposure:
1) the same antibiotic were administered at multiple different time points
2) individual was treated with multiple types of antibiotics at the same time
abx_freq <- table(rowSums(!is.na(sub)))
plotMultiABX(abx_freq)
There is an error in cMD metadata for this dataset. We clean and update ABX information using the new metadata and metadata dictionary.
ABX exposure information has prior hospitalization (e.g.PRIOR_CEPH), three
different time points prior to stool sample collection, and dichotomized
version of it (e.g.CEPH_bRF).
grep("CEPH", colnames(meta_vi), ignore.case = TRUE, value = TRUE)
## [1] "PRIOR_CEPH" "CEPH" "CEPH.1To3Days_RF"
## [4] "CEPH.4To14Days_RF" "CEPH.15To30Days_RF" "CEPH_bRF"
head(meta_vi[,c(grep("CEPH", colnames(meta_vi), ignore.case = TRUE, value = TRUE))])
## PRIOR_CEPH CEPH CEPH.1To3Days_RF CEPH.4To14Days_RF CEPH.15To30Days_RF
## 1 0 1To3Days 1 0 0
## 2 0 4To14Days 0 1 0
## 3 0 4To14Days 0 1 0
## 4 0 15To30Days 0 0 1
## 5 0 1To3Days 1 0 0
## 6 0 1To3Days 1 0 0
## CEPH_bRF
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## Subset to only ABX information
abx_classes <- c("CEPH", "FLUORO", "CARBA", "PENICI", "VANCOIV", "AZITHRO",
"METRO", "COTRIM", "GENTA", "CLINDA", "DAPTO")
abx_binary <- paste0(abx_classes, "_bRF")
meta_vi_abx <- meta_vi[, c("PATIENTID", abx_binary)]
## Quick look
colSums(meta_vi_abx[,abx_binary]) # the number of patients exposed to a specific ABX
## CEPH_bRF FLUORO_bRF CARBA_bRF PENICI_bRF VANCOIV_bRF AZITHRO_bRF
## 134 35 34 10 37 16
## METRO_bRF COTRIM_bRF GENTA_bRF CLINDA_bRF DAPTO_bRF
## 19 2 3 5 5
table(rowSums(meta_vi_abx[,abx_binary])) # the number of ABXs each patient exposed to (0 to 6 ABX)
##
## 0 1 2 3 4 5 6
## 44 136 19 11 3 15 1
meta_vi_abx[which.max(rowSums(meta_vi_abx[,abx_binary])),] # patient exposed to the maximum number (=6) of ABX
## PATIENTID CEPH_bRF FLUORO_bRF CARBA_bRF PENICI_bRF VANCOIV_bRF AZITHRO_bRF
## 213 MM098 1 1 1 0 1 1
## METRO_bRF COTRIM_bRF GENTA_bRF CLINDA_bRF DAPTO_bRF
## 213 1 0 0 0 0
## Subset only to 1) NoTreatment and 2) exposure to only one class of ABX
simple_ind <- which(rowSums(meta_vi_abx[,abx_binary]) %in% c(0, 1))
x <- meta_vi_abx[simple_ind,] # This step removes 12 patients from 98 total patients
## Make `abx` column that concatenates dichotomized ABX exposure variables
for (i in seq_len(nrow(x))) {
ind <- which(x[i,] == 1)
abx <- gsub("_bRF", "", colnames(x)[ind])
x$abx[i] <- paste(abx, collapse = " | ") # this matters when I keep patients exposed to multiple ABX
if (length(ind) == 0) {x$abx[i] <- "Control"} # fill NoTreatment
}
This is the summary of the exposure groups. 12 participants exposed to more than one family of antibiotics are excluded in this summary, so there are total of 76 participant’s antibiotics exposure data is summarized.
## Summary
x %>%
group_by(abx) %>%
summarise(n = length(unique(PATIENTID)))
## # A tibble: 9 × 2
## abx n
## <chr> <int>
## 1 AZITHRO 1
## 2 CARBA 2
## 3 CEPH 60
## 4 Control 29
## 5 COTRIM 2
## 6 FLUORO 3
## 7 GENTA 1
## 8 METRO 1
## 9 PENICI 1
dim(meta_y)
## [1] 1136 33
length(unique(meta_y$subject_id)) # 39 subjects
## [1] 39
# ## The number of subjects for each ABX exposure
# meta_y %>%
# group_by(antibiotics_current_use) %>%
# summarise(n = length(unique(subject_id)))
## The number of ABX treatments for each subject
## Multiple different ABX were used
a <- meta_y %>%
filter(!is.na(antibiotics_current_use)) %>%
group_by(subject_id) %>%
summarise(abx_trt = length(unique(antibiotics_current_use)))
plotMultiABX(table(a$abx_trt))
sub_y <- meta_y %>% filter(!is.na(antibiotics_current_use))
sub_y$abx_all <- NA
sub_y$abx_unique <- NA
sub_y$abx_unique_n <- 0
sub_y$abx_class_all <- NA
for (i in seq_len(nrow(sub_y))) {
## Collect all ABX info per participant
sub_y$abx_all[i] <- gsub("and", "|", sub_y$antibiotics_current_use[i])
sub_y$abx_all[i] <- gsub(",", "|", sub_y$abx_all[i])
## Unique ABXs
unique_abx <- strsplit(sub_y$abx_all[i], " | ", fixed = TRUE) %>%
unlist %>%
str_replace("^\\w{1}", toupper) %>%
unique
sub_y$abx_unique[i] <- paste(unique_abx, collapse = " | ")
sub_y$abx_unique_n[i] <- length(unique_abx)
# Aggregate all ABX to the class level
if (length(unique_abx) != 0) {
abx_class <- unique(unlist(class_mapping[unique_abx]))
sub_y$abx_class_all[i] <- paste(abx_class, collapse = " | ")
}
}
Summary of all the ABX used in this study
strsplit(sub_y$abx_class_all, " | ", fixed = TRUE) %>%
unlist %>%
table %>%
sort(decreasing = TRUE)
## .
## Penicillins Beta-lactamase inhibitor Sulfonamides
## 79 40 27
## Macrolides Cephalosporins Systemic antibiotic NAS
## 13 8 1
yas_general <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 1) %>% .[,1:6]
yas_abx <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 2)
yas_feeding <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 3)
dim(yas_general)
## [1] 39 6
head(yas_general, 3)
## subject Year.of.birth Country Gender Delivery.type Gestational.age
## 1 E000823 2008 Finland Male Vaginal delivery 39+4
## 2 E001958 2008 Finland Female Vaginal delivery 40+1
## 3 E003188 2008 Finland Female Vaginal delivery 38+6
length(unique(yas_general$subject)) # 39 unique subjects
## [1] 39
dim(yas_abx)
## [1] 233 6
head(yas_abx, 3)
## Subject Age..months. Antibiotic.type Antibiotic.code
## 1 E003188 6.5 Amoxicillin J01CA04
## 2 E003188 7 Amoxicillin and clavulanic acid J01CR02
## 3 E003188 13 Amoxicillin and clavulanic acid J01CR02
## Duration..days. Symptoms
## 1 7 Acute otitis media
## 2 7 Acute otitis media
## 3 7 Acute otitis media
length(unique(yas_abx$Subject)) # 21 unique ABX-exposed subjects
## [1] 21
## One subject seems missing
# total 20 subjects with ABX exposure?
length(intersect(yas_general$subject, unique(yas_abx$Subject)))
## [1] 20
# What is this subject "E028794"? It is only in 'abx' table - not in 'general' table
setdiff(yas_abx$Subject, yas_general$subject)
## [1] "E028794"
# ## The number of subjects for each ABX exposure
# yas_abx %>%
# group_by(Antibiotic.type) %>%
# summarise(n = length(unique(Subject)))
z <- yas_abx %>%
group_by(Subject) %>%
summarise(abx_trt = length(unique(Antibiotic.type)))
## All participants, except one, received more than one ABX
plotMultiABX(table(z$abx_trt))
yas_abx$abx_all <- NA
yas_abx$abx_unique <- NA
yas_abx$abx_unique_n <- 0
yas_abx$abx_class_all <- NA
for (i in seq_len(nrow(yas_abx))) {
## Collect all ABX info per participant
x <- yas_abx[i, "Antibiotic.type"]
yas_abx$abx_all[i] <- paste(x[!is.na(x)], collapse = " | ")
yas_abx$abx_all[i] <- gsub("and", "|", yas_abx$abx_all[i])
## Unique ABXs
unique_abx <- strsplit(yas_abx$abx_all[i], " | ", fixed = TRUE) %>%
unlist %>%
str_replace("^\\w{1}", toupper) %>%
unique
yas_abx$abx_unique[i] <- paste(unique_abx, collapse = " | ")
yas_abx$abx_unique_n[i] <- length(unique_abx)
# Aggregate all ABX to the class level
if (length(unique_abx) != 0) {
abx_class <- unique(unlist(class_mapping[unique_abx]))
yas_abx$abx_class_all[i] <- paste(abx_class, collapse = " | ")
}
}
Summary of all the ABX used in this study
strsplit(yas_abx$abx_class_all, " | ", fixed = TRUE) %>%
unlist %>%
table %>%
sort(decreasing = TRUE)
## .
## Penicillins Beta-lactamase inhibitor Sulfonamides
## 129 57 41
## Macrolides Cephalosporins Systemic antibiotic NAS
## 30 22 2
dim(meta_z)
## [1] 389 25
head(meta_z, 3)
## Description study_name sample_id subject_id assessment_day
## 1 HP1_faeces_amox2 ZauraE_2015 HP1_faeces_amox2 HP1 0.000
## 2 HP1_faeces_amox3 ZauraE_2015 HP1_faeces_amox3 HP1 7.000
## 3 HP1_faeces_amox4 ZauraE_2015 HP1_faeces_amox4 HP1 30.437
## antibiotics_current_use study_condition disease age infant_age age_category
## 1 amox amox NA NA NA NA
## 2 amox amox NA NA NA NA
## 3 amox amox NA NA NA NA
## country non_westernized sequencing_platform DNA_extraction_kit
## 1 United Kingdom NA LS454 NA
## 2 United Kingdom NA LS454 NA
## 3 United Kingdom NA LS454 NA
## PMID number_reads number_bases minimum_read_length median_read_length
## 1 26556275 NA 2159067 NA NA
## 2 26556275 NA 3129754 NA NA
## 3 26556275 NA 2404940 NA NA
## NCBI_accession pregnant lactating curator type
## 1 NA NA NA Chloe Mirzayi 16S
## 2 NA NA NA Chloe Mirzayi 16S
## 3 NA NA NA Chloe Mirzayi 16S
Antibiotic class abbreviation in this dataset:
| Label | Full name | Class |
|---|---|---|
| amox | Amoxicillin | Penicillins |
| cipro | Ciprofloxacin | Fluoroquinolones |
| clinda | Clindamycin | Lincomycins |
| minoc | Minocycline Hydrochloride | Tetracyclines |
meta_z %>%
group_by(antibiotics_current_use) %>% # `study_condition` is identical
summarise(n = length(unique(subject_id)))
## # A tibble: 6 × 2
## antibiotics_current_use n
## <chr> <int>
## 1 amox 14
## 2 cipro 10
## 3 clinda 9
## 4 minoc 10
## 5 minoc, minoc 1
## 6 placebo 23
meta_z$abx <- NA
class_mapping <- list(amox = "Penicillins",
cipro = "Fluoroquinolones",
clinda = "Lincomycins",
minoc = "Tetracyclines",
placebo = "Placebo")
for (class_abbr in names(class_mapping)) {
ind <- which(meta_z$antibiotics_current_use == class_abbr)
meta_z$abx[ind] <- class_mapping[[class_abbr]]
}
# One data point with `minoc, minoc` label
ind <- which(meta_z$antibiotics_current_use == "minoc, minoc")
meta_z$abx[ind] <- "Tetracyclines"
meta_z %>%
group_by(abx) %>%
summarise(n = length(unique(subject_id)))
## # A tibble: 5 × 2
## abx n
## <chr> <int>
## 1 Fluoroquinolones 10
## 2 Lincomycins 9
## 3 Penicillins 14
## 4 Placebo 23
## 5 Tetracyclines 10