Contents

1 Setup

## Load packages
suppressPackageStartupMessages({
    library(dplyr)
    library(stringr)
    library(ggplot2)
})

## Load metadata tables
dir <- system.file("extdata", package = "microbiomeABXR21")
meta_b <- read.table(file.path(dir, "Bokulich_metadata.txt"), sep = "\t", header = TRUE)
meta_k <- read.csv(file.path(dir, "Korry_metadata.csv"))
meta_vi <- read.csv(file.path(dir, "Vincent_metadata.csv"))
meta_y <- read.csv(file.path(dir, "Yassour_metadata.csv"))
meta_z <- read.csv(file.path(dir, "Zaura_metadata.csv"))

## Raymond dataset
meta <- curatedMetagenomicData::sampleMetadata
meta_r <- meta %>% filter(study_name == "RaymondF_2016")
meta_vi_wrong <- meta %>% filter(study_name == "VincentC_2016") # Chloe identified an issue 

## Source the plotting script
code_dir <- system.file("scripts", package = "microbiomeABXR21")
source(file.path(code_dir, "plotMultiABX.R"))

2 Bokulich dataset

2.1 Overview

dim(meta_b)
## [1] 1216   65
head(meta_b, 3)
##       sample_name abx1_pmp_all_bymonth abx_all_sources abx_name abx_pmp_all
## 1 10249.C001.01SS                 <NA>               0     <NA>        <NA>
## 2 10249.C001.02SS                 <NA>               0     <NA>        <NA>
## 3 10249.C001.03SS                  pre               0     <NA>        <NA>
##   antiexposedall baby_sex collection_timestamp                  country
## 1              n   female           2012-03-15 United States of America
## 2              n   female           2012-04-20 United States of America
## 3              n   female           2012-04-26 United States of America
##   course_num day_of_life delivery    description diet diet_2 diet_2_month
## 1         NA           0  Vaginal Vaginal.bd.n.0   bd     bd         bd_0
## 2         NA          36  Vaginal Vaginal.bd.n.0   bd     bd         bd_1
## 3         NA          42  Vaginal Vaginal.bd.n.0   bd     bd         bd_1
##   diet_3 dna_extracted elevation          empo_1 empo_2            empo_3
## 1     eb          TRUE        10 Host-associated Animal Animal distal gut
## 2     eb          TRUE        10 Host-associated Animal Animal distal gut
## 3     eb          TRUE        10 Host-associated Animal Animal distal gut
##     env_biome              env_feature env_material env_package geo_loc_name
## 1 urban biome human-associated habitat        feces   human-gut   USA:NY:NYC
## 2 urban biome human-associated habitat        feces   human-gut   USA:NY:NYC
## 3 urban biome human-associated habitat        feces   human-gut   USA:NY:NYC
##       host_age host_age_normalized_years host_age_units host_body_habitat
## 1 not provided              not provided          years      UBERON:feces
## 2 not provided              not provided          years      UBERON:feces
## 3 not provided              not provided          years      UBERON:feces
##   host_body_mass_index host_body_product host_body_site host_common_name
## 1         not provided      UBERON:feces   UBERON:feces            human
## 2         not provided      UBERON:feces   UBERON:feces            human
## 3         not provided      UBERON:feces   UBERON:feces            human
##    host_height host_height_units host_scientific_name host_subject_id
## 1 not provided                cm         Homo sapiens               1
## 2 not provided                cm         Homo sapiens               1
## 3 not provided                cm         Homo sapiens               1
##   host_taxid  host_weight host_weight_units latitude lifestage longitude
## 1       9606 not provided                kg   40.742    infant   -73.974
## 2       9606 not provided                kg   40.742    infant   -73.974
## 3       9606 not provided                kg   40.742    infant   -73.974
##   mom_child mom_courses_ofdeliveryabx mom_ld_abx mom_pre_post_ld
## 1         C                        na      FALSE              na
## 2         C                        na      FALSE              na
## 3         C                        na      FALSE              na
##   mom_prenatal_abx mom_prenatal_abx_class mom_prenatal_abx_trimester month
## 1            FALSE                     na                         na    24
## 2            FALSE                     na                         na    28
## 3            FALSE                     na                         na    30
##   month_of_life physical_specimen_location physical_specimen_remaining
## 1          24.1                      NYUMC                        TRUE
## 2          28.1                      NYUMC                        TRUE
## 3          30.1                      NYUMC                        TRUE
##   qiita_study_id sample_summary sample_type sampletype      scientific_name
## 1          10249 Vaginal.bd.y.1       feces  Dry_Stool human gut metagenome
## 2          10249 Vaginal.bd.y.1       feces  Dry_Stool human gut metagenome
## 3          10249 Vaginal.bd.y.1       feces  Dry_Stool human gut metagenome
##      sex studyid taxon_id
## 1 female       1   408170
## 2 female       1   408170
## 3 female       1   408170
##                                                                                   title
## 1 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life
## 2 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life
## 3 ECAM: Antibiotics, birth mode, and diet shape microbiome maturation during early life

2.2 Mapping file

mapping_b <- read.table(file.path(dir, "Bokulich_mapping_file_corrected.txt"), 
                        sep = "\t", header = FALSE)
head(mapping_b)
##                V1                    V2         V3             V4
## 1 10249.C001.01SS 10249.C001.01SS.fastq       <NA> Vaginal.bd.n.0
## 2 10249.C001.02SS 10249.C001.02SS.fastq       <NA> Vaginal.bd.n.0
## 3 10249.C001.03SS 10249.C001.03SS.fastq       <NA> Vaginal.bd.n.0
## 4 10249.C001.04SS 10249.C001.04SS.fastq Cephalexin Vaginal.bd.y.1
## 5 10249.C001.05SS 10249.C001.05SS.fastq Cephalexin Vaginal.bd.y.1
## 6 10249.C001.06SS 10249.C001.06SS.fastq       <NA> Vaginal.bd.y.1
table(mapping_b$V3)
## 
##           Amoxicillin  Amoxicillin/Ciprodex Ampicillin/Gentamicin 
##                    47                     3                     3 
##             Augmentin          Azithromycin               Bactrim 
##                     7                    12                     1 
##            Cefadroxil              Cefdinir              Cefepime 
##                     2                    10                     2 
##            Cephalexin         Ciprofloxacin           Clindamycin 
##                    10                     3                     3 
##         Metronidazole          Moxifloxacin              Polytrim 
##                     3                     5                     6

2.4 Supplementary Tables

Table S3. Prenatal antimicrobial use by class and purpose
Table S4. Peri-natal antibiotic use by class and indication
Table S5. Post-natal antimicrobial use by class and age of child

supT3 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 1)
supT4 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 2) %>% .[1:89, 1:6] # remove NAs
supT5 <- xlsx::read.xlsx(file.path(dir, "Bokulich_Sup_Tables.xlsx"), 3)
head(supT3)
##       Antibiotic Study.ID      Indication Duration..Days.         Class Type
## 1 Nitrofurantoin        2            UTIa              10    Nitrofuran oral
## 2     Penicillin        4             UTI               5   Beta-lactam oral
## 3    Amoxicillin       42      Bronchitis              10   Beta-lactam oral
## 4  Cephalosporin       46 Sinus infection              10 Cephalosporin oral
## 5   Azithromycin       49 Sinus infection               3     Macrolide oral
## 6   Azithromycin       55            Cold              10     Macrolide oral
head(supT4)
##     Antibiotic Study_ID Subject Indication.       Class         Type
## 1 Erythromycin        1   Child         ONP   macrolide eye ointment
## 2  Clindamycin        2  Mother         CNP lincosamide           IV
## 3 Erythromycin        2   Child         ONP   macrolide eye ointment
## 4  Clindamycin        4  Mother         CNP lincosamide           IV
## 5 Erythromycin        4   Child         ONP   macrolide eye ointment
## 6    Cefazolin        5  Mother         CNP beta-lactam           IV
head(supT5)
##        Antibiotic Study.ID    Indication Exposure.Age..Month. Duration..Days.
## 1 Nitrofurantoin* 1 Mother           UTI                    1               7
## 2     Cephalexin* 1 Mother      mastitis                    2               7
## 3     Amoxicillin        1 ear infection                   10              10
## 4        Valtrex*        1     Cold sore                   12               1
## 5     Amoxicillin        1 ear infection                   13              10
## 6  Metronidazole* 1 Mother  GI infection                   13              10
##            Class
## 1     nitrofuran
## 2  cephalosporin
## 3    beta-lactam
## 4     anti-viral
## 5    beta-lactam
## 6 nitroimidazole

2.4.1 Perinatal ABX exposure

Child’s perinatal ABX exposure data (Sup.Table4) includes three ABXs. 4 children were exposed to three ABXs and 42 children were exposed to only one ABX.

supT4_c <- filter(supT4, Subject == "Child")
supT4_c %>% 
    group_by(Antibiotic) %>%
    summarize(n = length(unique(Study_ID)))
## # A tibble: 3 × 2
##   Antibiotic       n
##   <chr>        <int>
## 1 Ampicillin       4
## 2 Erythromycin    46
## 3 Gentamicin       4

## Children exposed to only one perinatal ABX treatment 
abx_summary <- supT4_c %>%
    group_by(Study_ID) %>%
    summarize(n = length(unique(Antibiotic)),
              abx = paste(Antibiotic, collapse = " | "))
abx_summary %>% 
    filter(n == 1) %>%
    group_by(abx) %>%
    summarise(n = length(unique(Study_ID)))
## # A tibble: 1 × 2
##   abx              n
##   <chr>        <int>
## 1 Erythromycin    42

2.4.2 Postnatal ABX exposure

24 Children had postnatal ABX exposures.

## Subset Sup.Table5 to only child cases
mother_ind <- grep("Mother", supT5$Study.ID)
supT5_c <- supT5[-mother_ind,]

## The number of children exposed postnatal ABX
length(unique(supT5_c$Study.ID))
## [1] 24

15 children had postnatal ABX exposure to only one class of ABX. For multiple ABX exposure cases, the exposure time is spread out, so we can potentially use them as independent single exposure cases…?!

## Summary of Child's postnatal ABX exposure
supT5_c_summary <- 
    supT5_c %>%
    group_by(Study.ID) %>%
    summarize(n = length(unique(Class)),
              abx = paste(Class, collapse = " | "))

supT5_c_summary
## # A tibble: 24 × 3
##    Study.ID     n abx                                                           
##    <chr>    <int> <chr>                                                         
##  1 1            4 beta-lactam | anti-viral | beta-lactam | beta-lactam | quinol…
##  2 10           4 anti-fungal | monoxycarbolic acid | beta-lactam | quinolone |…
##  3 11           2 beta-lactam | cephalosporin                                   
##  4 12           2 macrolide | beta-lactam                                       
##  5 16           2 NA | anti-fungal                                              
##  6 17           1 monoxycarbolic acid                                           
##  7 18           1 cephalosporin                                                 
##  8 20           3 NA | beta-lactam | NA                                         
##  9 21           1 beta-lactam                                                   
## 10 24           1 cephalosporin                                                 
## # … with 14 more rows
sum(supT5_c_summary$n == 1) # 15 children had postnatal ABX exposure to only one class of ABX
## [1] 15

3 Korry dataset

3.1 Overview

dim(meta_k)
## [1] 20 25
head(meta_k, 3)
##    sample_id  study_name subject_id body_site antibiotics_current_use
## 1 SRS4115648 KorryB_2020       DC10     cecum             doxycycline
## 2 SRS4115647 KorryB_2020       DC09     cecum             doxycycline
## 3 SRS4115645 KorryB_2020       DC01     cecum                    none
##   study_condition disease     age infant_age age_category gender country
## 1     doxycycline      NA 6 weeks         NA           NA female     USA
## 2     doxycycline      NA 6 weeks         NA           NA female     USA
## 3            none      NA 6 weeks         NA           NA female     USA
##   non_westernized sequencing_platform               DNA_extraction_kit PMID
## 1              NA       IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit   NA
## 2              NA       IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit   NA
## 3              NA       IlluminaHiSeq ZymoBIOMICS DNA/RNA Miniprep kit   NA
##   number_reads number_bases minimum_read_length median_read_length
## 1           NA   8992888352                  NA                302
## 2           NA  11138027874                  NA                302
## 3           NA   9049567108                  NA                302
##   NCBI_accession pregnant lactating       curator        type
## 1     SRR8291342       NA        NA Chloe Mirzayi METAGENOMIC
## 2     SRR8291343       NA        NA Chloe Mirzayi METAGENOMIC
## 3     SRR8291345       NA        NA Chloe Mirzayi METAGENOMIC

3.2 ABX summary

Antibiotic class abbreviation in this dataset:

Full name Class
Amoxicillin Penicillins
Ciprofloxacin Fluoroquinolones
Doxycycline Tetracyclines

Based on the paper, there should be 4 each group, but metadata says there are 8 control mice…?

meta_k %>%
    group_by(subject_id) %>%
    summarize(abx = paste(study_condition, collapse = " | ")) %>%
    group_by(abx) %>%
    summarize(n = length(unique(subject_id)))
## # A tibble: 4 × 2
##   abx               n
##   <chr>         <int>
## 1 amoxicillin       4
## 2 ciprofloxacin     4
## 3 doxycycline       4
## 4 none              8

4 Raymond dataset

4.1 Overview

dim(meta_r)
## [1]  72 136
length(unique(meta_r$subject_id)) # 24 subjects
## [1] 24

4.3 ABX summary

All 24 participants have control at the baseline. 6 participants didn’t treated with any antibiotic.

meta_r %>%
    group_by(subject_id) %>%
    summarize(trt = paste(study_condition, collapse = " | "))
## # A tibble: 24 × 2
##    subject_id trt                                      
##    <chr>      <chr>                                    
##  1 P10E       control | cephalosporins | cephalosporins
##  2 P11E       control | cephalosporins | cephalosporins
##  3 P12E       control | cephalosporins | cephalosporins
##  4 P13E       control | cephalosporins | cephalosporins
##  5 P14E       control | cephalosporins | cephalosporins
##  6 P15E       control | cephalosporins | cephalosporins
##  7 P17E       control | cephalosporins | cephalosporins
##  8 P18E       control | cephalosporins | cephalosporins
##  9 P19E       control | cephalosporins | cephalosporins
## 10 P1E        control | cephalosporins | cephalosporins
## # … with 14 more rows

5 Vatanen dataset

5.1 Overview

Metadata is collected from the Supplementary Table 1 of the manuscript.

va_explanation <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 1)
va_med <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 2)
va_ab <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 3)
va_feeding <- xlsx::read.xlsx(file.path(dir, "Vatanen_mmc2.xlsx"), 4)

Each row represents a unique 222 participant.

dim(va_med)
## [1] 222 110
length(unique(va_med$Participant)) # The number of participants
## [1] 222

5.2 ABX summary

5.2.0.1 Aggregate individual ABXs into their class

class_mapping <- list(Amoxicillin = "Penicillins",
                      Azithromycin = "Macrolides",
                      `Clavulanic acid` = "Beta-lactamase inhibitor",
                      Trimethoprim = "Sulfonamides",
                      Sulfadiazine = "Sulfonamides", # Sulfadiazine is a short-acting sulfonamide. 
                      Cefalexin = "Cephalosporins", # typo for Cephalexin?
                      Phenoxymethylpenicillin = "Penicillins",
                      Cefuroxime = "Cephalosporins",
                      Clarithromycin = "Macrolides",
                      Gentamicin = "Aminoglycosides",
                      Sulfamethoxazole = "Sulfonamides",
                      Cefprozil = "Cephalosporins",
                      Benzylpenicillin = "Penicillins",
                      Cefazolin = "Cephalosporins",
                      Ceftriaxone = "Cephalosporins",
                      Nifuroxazide = "Nitrofurans",
                      Azitromycin = "Macrolides", # it looks like a typo in metadata
                      `Systemic antibiotic NAS` = "Systemic antibiotic NAS",
                      Ampicillin = "Penicillins",
                      Cefadroxil = "Cephalosporins",
                      Cefexime = "Cephalosporins", # typo for Cefixime?
                      Cefotaxim = "Cephalosporins", # typo for Cefotaxime?
                      Cefotaxime = "Cephalosporins", 
                      Furazidin = "Hydantoins",
                      Isoniazid = "antituberculosis agents",
                      Midecamycin = "Macrolides",
                      Nitrofurantoin = "Nitrofurans",
                      Trimetoprim = "Timethoprim") # not sure

table(unlist(class_mapping))
## 
##          Aminoglycosides  antituberculosis agents Beta-lactamase inhibitor 
##                        1                        1                        1 
##           Cephalosporins               Hydantoins               Macrolides 
##                        9                        1                        4 
##              Nitrofurans              Penicillins             Sulfonamides 
##                        2                        4                        3 
##  Systemic antibiotic NAS              Timethoprim 
##                        1                        1

5.2.0.2 Add the ABX summary column

I combined all the antibiotics exposures, considering following cases as independent data points:
1) multiple ABX treated at the same time
2) same ABX treated at the different time points

abx_names <- grep("Name", colnames(va_med), ignore.case = TRUE, value = TRUE)
sub <- va_med[,abx_names]

sub$abx_all <- NA
sub$abx_unique <- NA
sub$abx_unique_n <- 0
sub$abx_class_all <- NA

for (i in seq_len(nrow(sub))) {
    
    ## Collect all ABX info per participant
    x <- sub[i, abx_names]
    sub$abx_all[i] <- paste(x[!is.na(x)], collapse = " | ")
    sub$abx_all[i] <- gsub("and", "|", sub$abx_all[i])
    
    ## Unique ABXs
    unique_abx <- strsplit(sub$abx_all[i], " | ", fixed = TRUE) %>% 
        unlist %>% 
        str_replace("^\\w{1}", toupper) %>% 
        unique
    
    sub$abx_unique[i] <- paste(unique_abx, collapse = " | ")
    sub$abx_unique_n[i] <- length(unique_abx)
    
    # Aggregate all ABX to the class level
    if (length(unique_abx) != 0) {
        abx_class <- unique(unlist(class_mapping[unique_abx]))
        sub$abx_class_all[i] <- paste(abx_class, collapse = " | ")
    }
}

Summary of all the ABX used in this study

strsplit(sub$abx_class_all, " | ", fixed = TRUE) %>% 
    unlist %>% 
    table %>% 
    sort(decreasing = TRUE)
## .
##              Penicillins           Cephalosporins               Macrolides 
##                      122                       37                       36 
## Beta-lactamase inhibitor             Sulfonamides          Aminoglycosides 
##                       28                       24                        7 
##              Nitrofurans  Systemic antibiotic NAS  antituberculosis agents 
##                        4                        2                        1 
##               Hydantoins              Timethoprim 
##                        1                        1

5.2.0.3 How frequently each participants received the antibiotics?

61% of the participants (136 out of 222) received antibiotics in the first 3 years of their lives. This is when we consider the following cases as a single independent exposure:
1) the same antibiotic were administered at multiple different time points
2) individual was treated with multiple types of antibiotics at the same time

abx_freq <- table(rowSums(!is.na(sub)))
plotMultiABX(abx_freq)

6 Vincent dataset

There is an error in cMD metadata for this dataset. We clean and update ABX information using the new metadata and metadata dictionary.

6.1 Overview

ABX exposure information has prior hospitalization (e.g.PRIOR_CEPH), three different time points prior to stool sample collection, and dichotomized version of it (e.g.CEPH_bRF).

grep("CEPH", colnames(meta_vi), ignore.case = TRUE, value = TRUE)
## [1] "PRIOR_CEPH"         "CEPH"               "CEPH.1To3Days_RF"  
## [4] "CEPH.4To14Days_RF"  "CEPH.15To30Days_RF" "CEPH_bRF"
head(meta_vi[,c(grep("CEPH", colnames(meta_vi), ignore.case = TRUE, value = TRUE))])
##   PRIOR_CEPH       CEPH CEPH.1To3Days_RF CEPH.4To14Days_RF CEPH.15To30Days_RF
## 1          0   1To3Days                1                 0                  0
## 2          0  4To14Days                0                 1                  0
## 3          0  4To14Days                0                 1                  0
## 4          0 15To30Days                0                 0                  1
## 5          0   1To3Days                1                 0                  0
## 6          0   1To3Days                1                 0                  0
##   CEPH_bRF
## 1        1
## 2        1
## 3        1
## 4        1
## 5        1
## 6        1

6.2 ABX summary

## Subset to only ABX information
abx_classes <- c("CEPH", "FLUORO", "CARBA", "PENICI", "VANCOIV", "AZITHRO", 
                 "METRO", "COTRIM", "GENTA", "CLINDA", "DAPTO")
abx_binary <- paste0(abx_classes, "_bRF")
meta_vi_abx <- meta_vi[, c("PATIENTID", abx_binary)]

## Quick look
colSums(meta_vi_abx[,abx_binary]) # the number of patients exposed to a specific ABX
##    CEPH_bRF  FLUORO_bRF   CARBA_bRF  PENICI_bRF VANCOIV_bRF AZITHRO_bRF 
##         134          35          34          10          37          16 
##   METRO_bRF  COTRIM_bRF   GENTA_bRF  CLINDA_bRF   DAPTO_bRF 
##          19           2           3           5           5
table(rowSums(meta_vi_abx[,abx_binary])) # the number of ABXs each patient exposed to (0 to 6 ABX)
## 
##   0   1   2   3   4   5   6 
##  44 136  19  11   3  15   1
meta_vi_abx[which.max(rowSums(meta_vi_abx[,abx_binary])),] # patient exposed to the maximum number (=6) of ABX
##     PATIENTID CEPH_bRF FLUORO_bRF CARBA_bRF PENICI_bRF VANCOIV_bRF AZITHRO_bRF
## 213     MM098        1          1         1          0           1           1
##     METRO_bRF COTRIM_bRF GENTA_bRF CLINDA_bRF DAPTO_bRF
## 213         1          0         0          0         0

## Subset only to 1) NoTreatment and 2) exposure to only one class of ABX
simple_ind <- which(rowSums(meta_vi_abx[,abx_binary]) %in% c(0, 1))
x <- meta_vi_abx[simple_ind,] # This step removes 12 patients from 98 total patients 

## Make `abx` column that concatenates dichotomized ABX exposure variables
for (i in seq_len(nrow(x))) {
    ind <- which(x[i,] == 1)
    abx <- gsub("_bRF", "", colnames(x)[ind])
    x$abx[i] <- paste(abx, collapse = " | ") # this matters when I keep patients exposed to multiple ABX
    if (length(ind) == 0) {x$abx[i] <- "Control"} # fill NoTreatment
}

This is the summary of the exposure groups. 12 participants exposed to more than one family of antibiotics are excluded in this summary, so there are total of 76 participant’s antibiotics exposure data is summarized.

## Summary 
x %>% 
    group_by(abx) %>%
    summarise(n = length(unique(PATIENTID)))
## # A tibble: 9 × 2
##   abx         n
##   <chr>   <int>
## 1 AZITHRO     1
## 2 CARBA       2
## 3 CEPH       60
## 4 Control    29
## 5 COTRIM      2
## 6 FLUORO      3
## 7 GENTA       1
## 8 METRO       1
## 9 PENICI      1

7 Yassour datasets

7.1 Overview

dim(meta_y)
## [1] 1136   33
length(unique(meta_y$subject_id)) # 39 subjects
## [1] 39

7.2 ABX summary

# ## The number of subjects for each ABX exposure
# meta_y %>%
#     group_by(antibiotics_current_use) %>%
#     summarise(n = length(unique(subject_id)))

## The number of ABX treatments for each subject
## Multiple different ABX were used
a <- meta_y %>% 
    filter(!is.na(antibiotics_current_use)) %>%
    group_by(subject_id) %>%
    summarise(abx_trt = length(unique(antibiotics_current_use)))
plotMultiABX(table(a$abx_trt))

sub_y <- meta_y %>% filter(!is.na(antibiotics_current_use))
sub_y$abx_all <- NA
sub_y$abx_unique <- NA
sub_y$abx_unique_n <- 0
sub_y$abx_class_all <- NA

for (i in seq_len(nrow(sub_y))) {
    
    ## Collect all ABX info per participant
    sub_y$abx_all[i] <- gsub("and", "|", sub_y$antibiotics_current_use[i])
    sub_y$abx_all[i] <- gsub(",", "|", sub_y$abx_all[i])
    
    ## Unique ABXs
    unique_abx <- strsplit(sub_y$abx_all[i], " | ", fixed = TRUE) %>% 
        unlist %>% 
        str_replace("^\\w{1}", toupper) %>% 
        unique
    
    sub_y$abx_unique[i] <- paste(unique_abx, collapse = " | ")
    sub_y$abx_unique_n[i] <- length(unique_abx)
    
    # Aggregate all ABX to the class level
    if (length(unique_abx) != 0) {
        abx_class <- unique(unlist(class_mapping[unique_abx]))
        sub_y$abx_class_all[i] <- paste(abx_class, collapse = " | ")
    }
}

Summary of all the ABX used in this study

strsplit(sub_y$abx_class_all, " | ", fixed = TRUE) %>% 
    unlist %>% 
    table %>% 
    sort(decreasing = TRUE)
## .
##              Penicillins Beta-lactamase inhibitor             Sulfonamides 
##                       79                       40                       27 
##               Macrolides           Cephalosporins  Systemic antibiotic NAS 
##                       13                        8                        1

7.3 Supplementary Tables

yas_general <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 1) %>% .[,1:6]
yas_abx <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 2)
yas_feeding <- xlsx::read.xlsx(file.path(dir, "Yassour_Table_S1.xls"), 3)
dim(yas_general)
## [1] 39  6
head(yas_general, 3)
##   subject Year.of.birth Country Gender    Delivery.type Gestational.age
## 1 E000823          2008 Finland   Male Vaginal delivery            39+4
## 2 E001958          2008 Finland Female Vaginal delivery            40+1
## 3 E003188          2008 Finland Female Vaginal delivery            38+6
length(unique(yas_general$subject)) # 39 unique subjects
## [1] 39

dim(yas_abx)
## [1] 233   6
head(yas_abx, 3)
##   Subject Age..months.                 Antibiotic.type Antibiotic.code
## 1 E003188          6.5                     Amoxicillin         J01CA04
## 2 E003188            7 Amoxicillin and clavulanic acid         J01CR02
## 3 E003188           13 Amoxicillin and clavulanic acid         J01CR02
##   Duration..days.           Symptoms
## 1               7 Acute otitis media
## 2               7 Acute otitis media
## 3               7 Acute otitis media
length(unique(yas_abx$Subject)) # 21 unique ABX-exposed subjects
## [1] 21
## One subject seems missing
# total 20 subjects with ABX exposure?
length(intersect(yas_general$subject, unique(yas_abx$Subject))) 
## [1] 20

# What is this subject "E028794"? It is only in 'abx' table - not in 'general' table
setdiff(yas_abx$Subject, yas_general$subject) 
## [1] "E028794"

7.3.1 The number of ABX treatments for each subject

# ## The number of subjects for each ABX exposure
# yas_abx %>%
#     group_by(Antibiotic.type) %>%
#     summarise(n = length(unique(Subject)))

z <- yas_abx %>% 
    group_by(Subject) %>%
    summarise(abx_trt = length(unique(Antibiotic.type)))

## All participants, except one, received more than one ABX
plotMultiABX(table(z$abx_trt))

7.3.2 ABX summary

yas_abx$abx_all <- NA
yas_abx$abx_unique <- NA
yas_abx$abx_unique_n <- 0
yas_abx$abx_class_all <- NA

for (i in seq_len(nrow(yas_abx))) {
    
    ## Collect all ABX info per participant
    x <- yas_abx[i, "Antibiotic.type"]
    yas_abx$abx_all[i] <- paste(x[!is.na(x)], collapse = " | ")
    yas_abx$abx_all[i] <- gsub("and", "|", yas_abx$abx_all[i])
    
    ## Unique ABXs
    unique_abx <- strsplit(yas_abx$abx_all[i], " | ", fixed = TRUE) %>% 
        unlist %>% 
        str_replace("^\\w{1}", toupper) %>% 
        unique
    
    yas_abx$abx_unique[i] <- paste(unique_abx, collapse = " | ")
    yas_abx$abx_unique_n[i] <- length(unique_abx)
    
    # Aggregate all ABX to the class level
    if (length(unique_abx) != 0) {
        abx_class <- unique(unlist(class_mapping[unique_abx]))
        yas_abx$abx_class_all[i] <- paste(abx_class, collapse = " | ")
    }
}

Summary of all the ABX used in this study

strsplit(yas_abx$abx_class_all, " | ", fixed = TRUE) %>% 
    unlist %>% 
    table %>% 
    sort(decreasing = TRUE)
## .
##              Penicillins Beta-lactamase inhibitor             Sulfonamides 
##                      129                       57                       41 
##               Macrolides           Cephalosporins  Systemic antibiotic NAS 
##                       30                       22                        2

8 Zaura dataset

8.1 Overview

dim(meta_z)
## [1] 389  25
head(meta_z, 3)
##        Description  study_name        sample_id subject_id assessment_day
## 1 HP1_faeces_amox2 ZauraE_2015 HP1_faeces_amox2        HP1          0.000
## 2 HP1_faeces_amox3 ZauraE_2015 HP1_faeces_amox3        HP1          7.000
## 3 HP1_faeces_amox4 ZauraE_2015 HP1_faeces_amox4        HP1         30.437
##   antibiotics_current_use study_condition disease age infant_age age_category
## 1                    amox            amox      NA  NA         NA           NA
## 2                    amox            amox      NA  NA         NA           NA
## 3                    amox            amox      NA  NA         NA           NA
##          country non_westernized sequencing_platform DNA_extraction_kit
## 1 United Kingdom              NA               LS454                 NA
## 2 United Kingdom              NA               LS454                 NA
## 3 United Kingdom              NA               LS454                 NA
##       PMID number_reads number_bases minimum_read_length median_read_length
## 1 26556275           NA      2159067                  NA                 NA
## 2 26556275           NA      3129754                  NA                 NA
## 3 26556275           NA      2404940                  NA                 NA
##   NCBI_accession pregnant lactating       curator type
## 1             NA       NA        NA Chloe Mirzayi  16S
## 2             NA       NA        NA Chloe Mirzayi  16S
## 3             NA       NA        NA Chloe Mirzayi  16S

8.2 ABX summary

Antibiotic class abbreviation in this dataset:

Label Full name Class
amox Amoxicillin Penicillins
cipro Ciprofloxacin Fluoroquinolones
clinda Clindamycin Lincomycins
minoc Minocycline Hydrochloride Tetracyclines
meta_z %>% 
    group_by(antibiotics_current_use) %>%   # `study_condition` is identical
    summarise(n = length(unique(subject_id)))
## # A tibble: 6 × 2
##   antibiotics_current_use     n
##   <chr>                   <int>
## 1 amox                       14
## 2 cipro                      10
## 3 clinda                      9
## 4 minoc                      10
## 5 minoc, minoc                1
## 6 placebo                    23
meta_z$abx <- NA
class_mapping <- list(amox = "Penicillins", 
                      cipro = "Fluoroquinolones",
                      clinda = "Lincomycins",
                      minoc = "Tetracyclines", 
                      placebo = "Placebo") 

for (class_abbr in names(class_mapping)) {
    ind <- which(meta_z$antibiotics_current_use == class_abbr)
    meta_z$abx[ind] <- class_mapping[[class_abbr]] 
}

# One data point with `minoc, minoc` label
ind <- which(meta_z$antibiotics_current_use == "minoc, minoc")
meta_z$abx[ind] <- "Tetracyclines"

meta_z %>% 
    group_by(abx) %>%
    summarise(n = length(unique(subject_id)))
## # A tibble: 5 × 2
##   abx                  n
##   <chr>            <int>
## 1 Fluoroquinolones    10
## 2 Lincomycins          9
## 3 Penicillins         14
## 4 Placebo             23
## 5 Tetracyclines       10