R Notebook: Provides reproducible analysis for Count Files in the following manuscript:
Citation: Romanowicz KJ, Resnick C, Hinton SR, Plesa C. (2025) Exploring antibiotic resistance in diverse homologs of the dihydrofolate reductase protein family through broad mutational scanning. Science Advances. https://doi.org/10.1126/sciadv.adw9178
GitHub Repository: https://github.com/PlesaLab/DHFR
NCBI BioProject: https://www.ncbi.nlm.nih.gov/bioproject/1189478
This pipeline processes a library of 1,536 DHFR homologs and their associated mutants, with two-fold redundancy (two codon variants per sequence). Fitness scores are derived from a multiplexed in-vivo assay using a trimethoprim concentration gradient, assessing the ability of these homologs and their mutants to complement functionality in an E. coli knockout strain and their tolerance to trimethoprim treatment. This analysis provides insights into how antibiotic resistance evolves across a range of evolutionary starting points. Sequence data were generated using the Illumina NovaSeq platform with 100 bp paired-end sequencing of amplicons.
The following R packages must be installed prior to loading into the R session. See the Reproducibility tab for a complete list of packages and their versions used in this workflow.
# Load the latest version of python (3.10.14) for downstream use:
library(reticulate)
use_python("/Users/krom/miniforge3/bin/python3")
# Make a vector of required packages
required.packages <- c("ape", "bio3d", "Biostrings", "castor", "cowplot", "devtools", "dplyr", "ggExtra", "ggnewscale", "ggplot2", "ggridges", "ggtree", "ggtreeExtra", "glmnet", "gridExtra","igraph", "knitr", "matrixStats", "patchwork", "pheatmap", "purrr", "pscl", "RColorBrewer", "reshape","reshape2", "ROCR", "seqinr", "scales", "stringr", "stringi", "tidyr", "tidytree", "viridis")
# Load required packages with error handling
loaded.packages <- lapply(required.packages, function(package) {
if (!require(package, character.only = TRUE)) {
install.packages(package, dependencies = TRUE)
if (!require(package, character.only = TRUE)) {
message("Package ", package, " could not be installed and loaded.")
return(NULL)
}
}
return(package)
})
# Remove NULL entries from loaded packages
loaded.packages <- loaded.packages[!sapply(loaded.packages, is.null)]
Loaded packages: ape, bio3d, Biostrings, castor, cowplot, devtools, dplyr, ggExtra, ggnewscale, ggplot2, ggridges, ggtree, ggtreeExtra, glmnet, gridExtra, igraph, knitr, matrixStats, patchwork, pheatmap, purrr, pscl, RColorBrewer, reshape, reshape2, ROCR, seqinr, scales, stringr, stringi, tidyr, tidytree, viridis
Import MAPPING files generated from DHFR.1.Mapping.RMD relevant for downstream analysis.
### BCs_mutID (both codon versions):
# Lib15 (Codon 1)
BCs_mutID_15 <- read.csv("Mapping/map_files_formatted/BCs_mutID_15.csv",
header = TRUE, stringsAsFactors = FALSE)
# Lib16 (Codon 2)
BCs_mutID_16 <- read.csv("Mapping/map_files_formatted/BCs_mutID_16.csv",
header = TRUE, stringsAsFactors = FALSE)
### mutIDinfo (both codon versions):
# Lib15 (Codon 1)
mutIDinfo15 <- read.csv("Mapping/map_files_formatted/mutIDinfo15.csv",
header = TRUE, stringsAsFactors = FALSE)
# Lib16 (Codon 2)
mutIDinfo16 <- read.csv("Mapping/map_files_formatted/mutIDinfo16.csv",
header = TRUE, stringsAsFactors = FALSE)
Import the count data (sequences) for each sampling condition
in Library 15 (D01, D03, D05:D11) and Library 16 (D02, D04,
D12:E6). Each .csv file contains two columns:
(1) [BC] represents the unique
barcode recovered from the sampling condition, and
(2) [D0X] represents the sequence
counts matching each barcode within the sampling condition.
Import all Library 15 count files:
### Library 15
## Library 15 - T0
# D01 - Lib15 - Overnight Growth on LB - T0
D01 = read.csv("Count/D01_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D01) <- c("BC","D01")
# D03 - Lib15 - Growth on M9 - Full Supplement - T0
D03 = read.csv("Count/D03_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D03) <- c("BC","D03")
## Library 15 - T1
# D05 - Lib15 - Growth on M9 - 0 ug/mL TMP - T1
D05 = read.csv("Count/D05_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D05) <- c("BC","D05")
# D06 - Lib15 - Growth on M9 - 0.058 ug/ml TMP - T1
D06 = read.csv("Count/D06_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D06) <- c("BC","D06")
# D07 - Lib15 - Growth on M9 - 0.5 ug/ml TMP - T1
D07 = read.csv("Count/D07_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D07) <- c("BC","D07")
# D08 - Lib15 - Growth on M9 - 1.0 ug/ml TMP - T1
D08 = read.csv("Count/D08_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D08) <- c("BC","D08")
# D09 - Lib15 - Growth on M9 - 10.0 ug/ml TMP - T1
D09 = read.csv("Count/D09_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D09) <- c("BC","D09")
# D10 - Lib15 - Growth on M9 - 50.0 ug/ml TMP - T1
D10 = read.csv("Count/D10_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D10) <- c("BC","D10")
# D11 - Lib15 - Growth on M9 - 200.0 ug/ml TMP - T1
D11 = read.csv("Count/D11_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D11) <- c("BC","D11")
Import all Library 16 count files:
### Library 16
## Library 16 - T0
# D02 - Lib16 - Overnight Growth on LB - T0
D02 = read.csv("Count/D02_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D02) <- c("BC","D02")
# D04 - Lib16 - Growth on M9 - Full Supplement - T0
D04 = read.csv("Count/D04_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D04) <- c("BC","D04")
## Library 16 - T1
# D12 - Lib16 - Growth on M9 - 0 ug/mL TMP - T1
D12 = read.csv("Count/D12_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(D12) <- c("BC","D12")
# E01 - Lib16 - Growth on M9 - 0.058 ug/ml TMP - T1
E01 = read.csv("Count/E01_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E01) <- c("BC","E01")
# E02 - Lib16 - Growth on M9 - 0.5 ug/ml TMP - T1
E02 = read.csv("Count/E02_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E02) <- c("BC","E02")
# E03 - Lib16 - Growth on M9 - 1.0 ug/ml TMP - T1
E03 = read.csv("Count/E03_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E03) <- c("BC","E03")
# E04 - Lib16 - Growth on M9 - 10.0 ug/ml TMP - T1
E04 = read.csv("Count/E04_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E04) <- c("BC","E04")
# E05 - Lib16 - Growth on M9 - 50.0 ug/ml TMP - T1
E05 = read.csv("Count/E05_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E05) <- c("BC","E05")
# E06 - Lib16 - Growth on M9 - 200.0 ug/ml TMP - T1
E06 = read.csv("Count/E06_collapse_d1.tsv", head=FALSE, sep="\t") %>%
select(-V3)
colnames(E06) <- c("BC","E06")
Summarize the imported count data by sampling condition for each codon-version library. Begin by combining the count data from each sampling condition into a single data frame for each library. Next, sum the full count values for each sampling condition (total number of sequences recovered for each sampling condition). Then, sum the number of unique barcodes recovered from each sampling condition. Define the sampling condition associated with each sample code. Finally, write the data summary to a .csv file and save in the working directory.
Summarize Library 15
## Library 15
# Merge data objects together into single dataframe
norm_vals_15 <- data.frame(sample=c("D01","D03","D05","D06","D07","D08","D09","D10","D11"),
counts=c(sum(D01$D01),sum(D03$D03),sum(D05$D05),sum(D06$D06),sum(D07$D07),sum(D08$D08),
sum(D09$D09),sum(D10$D10),sum(D11$D11)),
numBCs=c(length(D01$D01),length(D03$D03),length(D05$D05),length(D06$D06),length(D07$D07),
length(D08$D08),length(D09$D09),length(D10$D10),length(D11$D11)),
sampledesc=c("D01 - Lib15 - T0 - Overnight Growth on LB",
"D03 - Lib15 - T0 - Growth on M9 - Full Supplement",
"D05 - Lib15 - T1 - Growth on M9 - 0.0 ug/ml TMP",
"D06 - Lib15 - T1 - Growth on M9 - 0.058 ug/ml TMP",
"D07 - Lib15 - T1 - Growth on M9 - 0.5 ug/ml TMP",
"D08 - Lib15 - T1 - Growth on M9 - 1.0 ug/ml TMP",
"D09 - Lib15 - T1 - Growth on M9 - 10.0 ug/ml TMP",
"D10 - Lib15 - T1 - Growth on M9 - 50.0 ug/ml TMP",
"D11 - Lib15 - T1 - Growth on M9 - 200.0 ug/ml TMP"),
TMP=c("Overnight LB", "Full Supplement", "0-tmp", "0.058-tmp", "0.5-tmp",
"1.0-tmp", "10-tmp", "50-tmp", "200-tmp"))
Library 15 Summarized Count Data for Sequences and Barcodes (BC)
| Sample Conditions | Sample ID | Seq Counts | BC Counts |
|---|---|---|---|
| D01 - Lib15 - T0 - Overnight Growth on LB | D01 | 83,770,481 | 332,234 |
| D03 - Lib15 - T0 - Growth on M9 - Full Supplement | D03 | 108,604,061 | 359,595 |
| D05 - Lib15 - T1 - Growth on M9 - 0.0 ug/ml TMP | D05 | 54,119,812 | 246,911 |
| D06 - Lib15 - T1 - Growth on M9 - 0.058 ug/ml TMP | D06 | 47,847,019 | 174,073 |
| D07 - Lib15 - T1 - Growth on M9 - 0.5 ug/ml TMP | D07 | 40,802,227 | 161,495 |
| D08 - Lib15 - T1 - Growth on M9 - 1.0 ug/ml TMP | D08 | 39,746,862 | 171,737 |
| D09 - Lib15 - T1 - Growth on M9 - 10.0 ug/ml TMP | D09 | 53,156,432 | 103,600 |
| D10 - Lib15 - T1 - Growth on M9 - 50.0 ug/ml TMP | D10 | 49,744,711 | 179,548 |
| D11 - Lib15 - T1 - Growth on M9 - 200.0 ug/ml TMP | D11 | 38,565,223 | 46,363 |
Summarize Library 16
## Library 16
# Merge data objects together into single dataframe
norm_vals_16 <- data.frame(sample=c("D02","D04",
"D12","E01","E02","E03","E04","E05","E06"),
counts=c(sum(D02$D02),sum(D04$D04),sum(D12$D12),
sum(E01$E01),sum(E02$E02),sum(E03$E03),sum(E04$E04),sum(E05$E05),sum(E06$E06)),
numBCs=c(length(D02$D02),length(D04$D04),length(D12$D12),length(E01$E01),length(E02$E02),
length(E03$E03),length(E04$E04),length(E05$E05),length(E06$E06)),
sampledesc=c("D02 - Lib16 - T0 - Overnight Growth on LB",
"D04 - Lib16 - T0 - Growth on M9 - Full Supplement",
"D12 - Lib16 - T1 - Growth on M9 - 0.0 ug/ml TMP",
"E01 - Lib16 - T1 - Growth on M9 - 0.058 ug/ml TMP",
"E02 - Lib16 - T1 - Growth on M9 - 0.5 ug/ml TMP",
"E03 - Lib16 - T1 - Growth on M9 - 1.0 ug/ml TMP",
"E04 - Lib16 - T1 - Growth on M9 - 10.0 ug/ml TMP",
"E05 - Lib16 - T1 - Growth on M9 - 50.0 ug/ml TMP",
"E06 - Lib16 - T1 - Growth on M9 - 200.0 ug/ml TMP"),
TMP=c("Overnight LB", "Full Supplement", "0-tmp", "0.058-tmp", "0.5-tmp",
"1.0-tmp", "10-tmp", "50-tmp", "200-tmp"))
Table of Summarized Count Data for Sequences and Barcodes (BC)
| Sample Conditions | Sample ID | Seq Counts | BC Counts |
|---|---|---|---|
| D02 - Lib16 - T0 - Overnight Growth on LB | D02 | 48,821,977 | 428,302 |
| D04 - Lib16 - T0 - Growth on M9 - Full Supplement | D04 | 65,871,632 | 409,052 |
| D12 - Lib16 - T1 - Growth on M9 - 0.0 ug/ml TMP | D12 | 68,405,404 | 291,387 |
| E01 - Lib16 - T1 - Growth on M9 - 0.058 ug/ml TMP | E01 | 43,914,011 | 215,066 |
| E02 - Lib16 - T1 - Growth on M9 - 0.5 ug/ml TMP | E02 | 43,752,442 | 190,020 |
| E03 - Lib16 - T1 - Growth on M9 - 1.0 ug/ml TMP | E03 | 49,596,583 | 181,809 |
| E04 - Lib16 - T1 - Growth on M9 - 10.0 ug/ml TMP | E04 | 58,144,156 | 151,536 |
| E05 - Lib16 - T1 - Growth on M9 - 50.0 ug/ml TMP | E05 | 52,530,782 | 134,374 |
| E06 - Lib16 - T1 - Growth on M9 - 200.0 ug/ml TMP | E06 | 63,525,568 | 170,613 |
Merge and prune BCs from the imported count data by sampling condition for each codon-version library. Begin by combining the count data BCs from each sampling condition into a single dataframe for each library. Next, prune BCs to retain only those BCs with exactly 20 bp. Then, map the remaining BCs to the BC mapping file to determine how many BCs we were able to map relative to all BCs recovered. Finally, write the data summary to a .csv file and save in the working directory.
Merge the count data from each library for each sampling condition by barcode for downstream analysis.
# Library 15
BCs15 <- D01 %>%
full_join(D03,by="BC") %>%
full_join(D05,by="BC") %>%
full_join(D06,by="BC") %>%
full_join(D07,by="BC") %>%
full_join(D08,by="BC") %>%
full_join(D09,by="BC") %>%
full_join(D10,by="BC") %>%
full_join(D11,by="BC")
# Library 16
BCs16 <- D02 %>%
full_join(D04,by="BC") %>%
full_join(D12,by="BC") %>%
full_join(E01,by="BC") %>%
full_join(E02,by="BC") %>%
full_join(E03,by="BC") %>%
full_join(E04,by="BC") %>%
full_join(E05,by="BC") %>%
full_join(E06,by="BC")
Preview of BCs count data with N/A for blank values
Total BCs: Count the number of unique barcode for each library
# Count the number of unique barcodes for Library 15
BCs15.count <- length(unique(BCs15$BC))
format(BCs15.count, big.mark = ",")
[1] "674,517"
# Count the number of unique barcodes for Library 16
BCs16.count <- length(unique(BCs16$BC))
format(BCs16.count, big.mark = ",")
[1] "740,357"
Total BCs at Complementation: Count the number of unique barcode for each library at complementation
# Lib15
# Filter rows where D05 is not NA
filtered_BCs15 <- BCs15[!is.na(BCs15$D05), ]
# Count unique values in the BC column of the filtered data
BCs15.D05.count <- length(unique(filtered_BCs15$BC))
format(BCs15.D05.count, big.mark = ",")
[1] "246,911"
# Lib 16
# Filter rows where D12 is not NA
filtered_BCs16 <- BCs16[!is.na(BCs16$D12), ]
# Count unique values in the BC column of the filtered data
BCs16.D12.count <- length(unique(filtered_BCs16$BC))
format(BCs16.D12.count, big.mark = ",")
[1] "291,387"
Remove unique BCs less than 20 nucleotides in length as these do not represent true BCs from designed pool.
# Lib15: Sum the count of unique BC lengths in the BCs objects
BCs15.unique.table <- table(str_length(BCs15$BC))
BCs15.unique.table
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 3 11 30 49 71 100 140 197 236 326 402 491 716 1007 1263 1704 5340
19 20
8816 653614
# Lib16: Sum the count of unique BC lengths in the BCs objects
BCs16.unique.table <- table(str_length(BCs16$BC))
BCs16.unique.table
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
1 5 12 20 48 66 78 123 118 209 243 289 389 557 770 1143 1347 5044
19 20
8392 721503
# Filter rows where the length of BC is greater than or equal to 20
# Lib15
BCs15.prune <- BCs15 %>%
filter(str_length(BC) >= 20)
# Lib16
BCs16.prune <- BCs16 %>%
filter(str_length(BC) >= 20)
# Lib15: Sum the count of unique BC lengths in the BCprune objects
BCs15.prune.unique.table <- table(str_length(BCs15.prune$BC))
BCs15.prune.unique.table
20
653614
# Lib16: Sum the count of unique BC lengths in the BCprune objects
BCs16.prune.unique.table <- table(str_length(BCs16.prune$BC))
BCs16.prune.unique.table
20
721503
Total BCs: Count the number of unique barcode for each library after pruning by BC length
# Count the number of unique barcodes for Library 15
BCs15.prune.count <- length(unique(BCs15.prune$BC))
format(BCs15.prune.count, big.mark = ",")
[1] "653,614"
# Count the number of unique barcodes for Library 16
BCs16.prune.count <- length(unique(BCs16.prune$BC))
format(BCs16.prune.count, big.mark = ",")
[1] "721,503"
Unique BCs between Codon Versions: Count the number of unique barcode between both libraries after pruning by BC length
# Merge by shared "mutID" (1BC)
BCs.prune.shared <- merge(BCs15.prune, BCs16.prune, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.prune.shared.count <- length(unique(BCs.prune.shared$BC))
format(BCs.prune.shared.count, big.mark = ",")
[1] "238,237"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.prune.unique <- bind_rows(
anti_join(BCs15.prune, BCs16.prune, by = "BC"),
anti_join(BCs16.prune, BCs15.prune, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.prune.unique.count <- length(unique(BCs.prune.unique$BC))
format(BCs.prune.unique.count, big.mark = ",")
[1] "898,643"
# Count number of shared and unique BCs
BCs.prune.all.count <- sum(BCs.prune.shared.count + BCs.prune.unique.count)
format(BCs.prune.all.count, big.mark = ",")
[1] "1,136,880"
Total BCs at Complementation: Count the number of unique barcode for each library at complementation after pruning
# Lib15
# Filter rows where D05 is not NA
prune_filtered_BCs15 <- BCs15.prune[!is.na(BCs15.prune$D05), ]
# Count unique values in the BC column of the filtered data
BCs15.D05.prune.count <- length(unique(prune_filtered_BCs15$BC))
format(BCs15.D05.prune.count, big.mark = ",")
[1] "242,884"
# Lib 16
# Filter rows where D12 is not NA
prune_filtered_BCs16 <- BCs16.prune[!is.na(BCs16.prune$D12), ]
# Count unique values in the BC column of the filtered data
BCs16.D12.prune.count <- length(unique(prune_filtered_BCs16$BC))
format(BCs16.D12.prune.count, big.mark = ",")
[1] "286,031"
Unique BCs between Codon Versions at Complementation: Count the number of unique barcodes between both libraries after pruning by BC length for Complementation
# Merge by shared "mutID" (1BC)
BCs.comp.prune.shared <- merge(prune_filtered_BCs15, prune_filtered_BCs16, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.comp.prune.shared.count <- length(unique(BCs.comp.prune.shared$BC))
format(BCs.comp.prune.shared.count, big.mark = ",")
[1] "125,761"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.comp.prune.unique <- bind_rows(
anti_join(prune_filtered_BCs15, prune_filtered_BCs16, by = "BC"),
anti_join(prune_filtered_BCs16, prune_filtered_BCs15, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.comp.prune.unique.count <- length(unique(BCs.comp.prune.unique$BC))
format(BCs.comp.prune.unique.count, big.mark = ",")
[1] "277,393"
# Count number of shared and unique BCs
BCs.comp.prune.all.count <- sum(BCs.comp.prune.shared.count + BCs.comp.prune.unique.count)
format(BCs.comp.prune.all.count, big.mark = ",")
[1] "403,154"
Map each pruned BC to it’s corresponding BC in the
BCs_mutID mapping file. This links each BC (normalized and
pruned) recovered in each count file with its identifying sequence
information from the mapping file.
# Library 15
BCs15_map <- inner_join(BCs_mutID_15,BCs15.prune,by="BC")
# Library 16
BCs16_map <- inner_join(BCs_mutID_16,BCs16.prune,by="BC")
Calculate the total number of reads and unique barcodes recovered from each sampling condition.
# Lib15
BCs15_map_frac <- BCs15_map
# Sum the total reads recovered from each sampling condition:
norm_vals_15$mappedcounts <- c(
sum(BCs15_map_frac$D01, na.rm=T),sum(BCs15_map_frac$D03, na.rm=T),sum(BCs15_map_frac$D05, na.rm=T),
sum(BCs15_map_frac$D06, na.rm=T),sum(BCs15_map_frac$D07, na.rm=T),sum(BCs15_map_frac$D08, na.rm=T),
sum(BCs15_map_frac$D09, na.rm=T),sum(BCs15_map_frac$D10, na.rm=T),sum(BCs15_map_frac$D11, na.rm=T))
# Determine the number of unique barcodes recovered from each sampling condition:
norm_vals_15$mappedBCs <- c(
sum(!is.na(BCs15_map_frac$D01)),
sum(!is.na(BCs15_map_frac$D03)),
sum(!is.na(BCs15_map_frac$D05)),
sum(!is.na(BCs15_map_frac$D06)),
sum(!is.na(BCs15_map_frac$D07)),
sum(!is.na(BCs15_map_frac$D08)),
sum(!is.na(BCs15_map_frac$D09)),
sum(!is.na(BCs15_map_frac$D10)),
sum(!is.na(BCs15_map_frac$D11)))
Table of Summarized Count Data including Mapped Counts and Mapped BCs for each Sample Condition
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs |
|---|---|---|---|---|---|
| D01 - Lib15 - T0 - Overnight Growth on LB | D01 | 83,770,481 | 332,234 | 61,036,972 | 197,558 |
| D03 - Lib15 - T0 - Growth on M9 - Full Supplement | D03 | 108,604,061 | 359,595 | 79,808,887 | 145,597 |
| D05 - Lib15 - T1 - Growth on M9 - 0.0 ug/ml TMP | D05 | 54,119,812 | 246,911 | 39,528,966 | 95,345 |
| D06 - Lib15 - T1 - Growth on M9 - 0.058 ug/ml TMP | D06 | 47,847,019 | 174,073 | 34,955,335 | 87,057 |
| D07 - Lib15 - T1 - Growth on M9 - 0.5 ug/ml TMP | D07 | 40,802,227 | 161,495 | 29,804,232 | 76,336 |
| D08 - Lib15 - T1 - Growth on M9 - 1.0 ug/ml TMP | D08 | 39,746,862 | 171,737 | 28,977,937 | 71,182 |
| D09 - Lib15 - T1 - Growth on M9 - 10.0 ug/ml TMP | D09 | 53,156,432 | 103,600 | 38,534,158 | 51,972 |
| D10 - Lib15 - T1 - Growth on M9 - 50.0 ug/ml TMP | D10 | 49,744,711 | 179,548 | 36,333,615 | 55,599 |
| D11 - Lib15 - T1 - Growth on M9 - 200.0 ug/ml TMP | D11 | 38,565,223 | 46,363 | 29,013,111 | 15,161 |
# Library 16
BCs16_map_frac <- BCs16_map
# Sum the total reads recovered from each sampling condition:
norm_vals_16$mappedcounts <- c(
sum(BCs16_map_frac$D02, na.rm=T),sum(BCs16_map_frac$D04, na.rm=T),sum(BCs16_map_frac$D12, na.rm=T),
sum(BCs16_map_frac$E01, na.rm=T),sum(BCs16_map_frac$E02, na.rm=T),sum(BCs16_map_frac$E03, na.rm=T),
sum(BCs16_map_frac$E04, na.rm=T),sum(BCs16_map_frac$E05, na.rm=T),sum(BCs16_map_frac$E06, na.rm=T))
# Determine the number of unique barcodes recovered from each sampling condition:
norm_vals_16$mappedBCs <- c(
sum(!is.na(BCs16_map_frac$D02)),
sum(!is.na(BCs16_map_frac$D04)),
sum(!is.na(BCs16_map_frac$D12)),
sum(!is.na(BCs16_map_frac$E01)),
sum(!is.na(BCs16_map_frac$E02)),
sum(!is.na(BCs16_map_frac$E03)),
sum(!is.na(BCs16_map_frac$E04)),
sum(!is.na(BCs16_map_frac$E05)),
sum(!is.na(BCs16_map_frac$E06)))
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs |
|---|---|---|---|---|---|
| D02 - Lib16 - T0 - Overnight Growth on LB | D02 | 48,821,977 | 428,302 | 34,177,971 | 266,916 |
| D04 - Lib16 - T0 - Growth on M9 - Full Supplement | D04 | 65,871,632 | 409,052 | 45,234,084 | 191,349 |
| D12 - Lib16 - T1 - Growth on M9 - 0.0 ug/ml TMP | D12 | 68,405,404 | 291,387 | 47,717,303 | 141,835 |
| E01 - Lib16 - T1 - Growth on M9 - 0.058 ug/ml TMP | E01 | 43,914,011 | 215,066 | 30,754,811 | 126,987 |
| E02 - Lib16 - T1 - Growth on M9 - 0.5 ug/ml TMP | E02 | 43,752,442 | 190,020 | 30,367,898 | 108,523 |
| E03 - Lib16 - T1 - Growth on M9 - 1.0 ug/ml TMP | E03 | 49,596,583 | 181,809 | 34,229,832 | 101,164 |
| E04 - Lib16 - T1 - Growth on M9 - 10.0 ug/ml TMP | E04 | 58,144,156 | 151,536 | 38,659,435 | 76,505 |
| E05 - Lib16 - T1 - Growth on M9 - 50.0 ug/ml TMP | E05 | 52,530,782 | 134,374 | 33,688,875 | 67,666 |
| E06 - Lib16 - T1 - Growth on M9 - 200.0 ug/ml TMP | E06 | 63,525,568 | 170,613 | 39,388,556 | 66,503 |
Calculate the fraction of mapped counts and BCs relative to total counts and BCs, respectively
# Library 15
# Calculate the fraction of mapped counts or barcodes relative to total counts or barcodes, respectively
norm_vals_15 <- norm_vals_15 %>%
mutate(frac_mapped_counts=mappedcounts/counts,
frac_mapped_BC=mappedBCs/numBCs)
Table of Summarized Count Data including Mapped Counts and Mapped BCs for each Sample Condition
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs | Frac Mapped Counts | Frac Mapped BCs |
|---|---|---|---|---|---|---|---|
| D01 - Lib15 - T0 - Overnight Growth on LB | D01 | 83,770,481 | 332,234 | 61,036,972 | 197,558 | 0.73 | 0.59 |
| D03 - Lib15 - T0 - Growth on M9 - Full Supplement | D03 | 108,604,061 | 359,595 | 79,808,887 | 145,597 | 0.73 | 0.40 |
| D05 - Lib15 - T1 - Growth on M9 - 0.0 ug/ml TMP | D05 | 54,119,812 | 246,911 | 39,528,966 | 95,345 | 0.73 | 0.39 |
| D06 - Lib15 - T1 - Growth on M9 - 0.058 ug/ml TMP | D06 | 47,847,019 | 174,073 | 34,955,335 | 87,057 | 0.73 | 0.50 |
| D07 - Lib15 - T1 - Growth on M9 - 0.5 ug/ml TMP | D07 | 40,802,227 | 161,495 | 29,804,232 | 76,336 | 0.73 | 0.47 |
| D08 - Lib15 - T1 - Growth on M9 - 1.0 ug/ml TMP | D08 | 39,746,862 | 171,737 | 28,977,937 | 71,182 | 0.73 | 0.41 |
| D09 - Lib15 - T1 - Growth on M9 - 10.0 ug/ml TMP | D09 | 53,156,432 | 103,600 | 38,534,158 | 51,972 | 0.72 | 0.50 |
| D10 - Lib15 - T1 - Growth on M9 - 50.0 ug/ml TMP | D10 | 49,744,711 | 179,548 | 36,333,615 | 55,599 | 0.73 | 0.31 |
| D11 - Lib15 - T1 - Growth on M9 - 200.0 ug/ml TMP | D11 | 38,565,223 | 46,363 | 29,013,111 | 15,161 | 0.75 | 0.33 |
# Library 16
# Calculate the fraction of mapped counts or barcodes relative to total counts or barcodes, respectively
norm_vals_16 <- norm_vals_16 %>%
mutate(frac_mapped_counts=mappedcounts/counts,
frac_mapped_BC=mappedBCs/numBCs)
Table of Summarized Count Data including Mapped Counts and Mapped BCs for each Sample Condition
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs | Frac Mapped Counts | Frac Mapped BCs |
|---|---|---|---|---|---|---|---|
| D02 - Lib16 - T0 - Overnight Growth on LB | D02 | 48,821,977 | 428,302 | 34,177,971 | 266,916 | 0.70 | 0.62 |
| D04 - Lib16 - T0 - Growth on M9 - Full Supplement | D04 | 65,871,632 | 409,052 | 45,234,084 | 191,349 | 0.69 | 0.47 |
| D12 - Lib16 - T1 - Growth on M9 - 0.0 ug/ml TMP | D12 | 68,405,404 | 291,387 | 47,717,303 | 141,835 | 0.70 | 0.49 |
| E01 - Lib16 - T1 - Growth on M9 - 0.058 ug/ml TMP | E01 | 43,914,011 | 215,066 | 30,754,811 | 126,987 | 0.70 | 0.59 |
| E02 - Lib16 - T1 - Growth on M9 - 0.5 ug/ml TMP | E02 | 43,752,442 | 190,020 | 30,367,898 | 108,523 | 0.69 | 0.57 |
| E03 - Lib16 - T1 - Growth on M9 - 1.0 ug/ml TMP | E03 | 49,596,583 | 181,809 | 34,229,832 | 101,164 | 0.69 | 0.56 |
| E04 - Lib16 - T1 - Growth on M9 - 10.0 ug/ml TMP | E04 | 58,144,156 | 151,536 | 38,659,435 | 76,505 | 0.66 | 0.50 |
| E05 - Lib16 - T1 - Growth on M9 - 50.0 ug/ml TMP | E05 | 52,530,782 | 134,374 | 33,688,875 | 67,666 | 0.64 | 0.50 |
| E06 - Lib16 - T1 - Growth on M9 - 200.0 ug/ml TMP | E06 | 63,525,568 | 170,613 | 39,388,556 | 66,503 | 0.62 | 0.39 |
Plot the mapped reads (match with a barcode) as a fraction of the total reads.
# Merge the two datasets and add the "Lib" column
merged_norm_vals <- bind_rows(
norm_vals_15 %>% mutate(Lib = "Lib15"),
norm_vals_16 %>% mutate(Lib = "Lib16"))
merged_norm_vals_sub <- merged_norm_vals %>%
select(5:10)
# Create a new data frame for plotting
merged_norm_vals_sub <- merged_norm_vals_sub %>%
filter(!grepl("Full Supplement|Overnight LB", TMP))
# Multiply the "mean_frac_mapped_counts" and "mean_frac_mapped_BC" columns by 100
merged_norm_vals_sub$frac_mapped_counts <- merged_norm_vals_sub$frac_mapped_counts * 100
merged_norm_vals_sub$frac_mapped_BC <- merged_norm_vals_sub$frac_mapped_BC * 100
# Place variables in order for plotting
frac_order <- c("0-tmp", "0.058-tmp", "0.5-tmp", "1.0-tmp", "10-tmp", "50-tmp", "200-tmp")
# Create custom labels for the x-axis
frac_labels <- c("0", "0.058", "0.5", "1.0", "10", "50", "200")
# Create the boxplot
merged_norm_frac_read_plot <- ggplot(merged_norm_vals_sub, aes(x = TMP, y = frac_mapped_counts, fill = Lib)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.85), alpha = 0.8) +
xlab("Trimethoprim (μg/mL)") +
ylab("Fraction of Total Reads Mapped (%)") +
theme_minimal() +
theme(
axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
panel.background = element_blank(),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none"
) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) +
scale_x_discrete(labels = unique(frac_labels)) +
scale_fill_manual(values = c("#0072B2", "#E69F00"))
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
Please use the `linewidth` argument instead.
merged_norm_frac_read_plot
# Create the boxplot
merged_norm_frac_BC_plot <- ggplot(merged_norm_vals_sub, aes(x = TMP, y = frac_mapped_BC, fill = Lib)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.85), alpha = 0.75) +
xlab("Trimethoprim (μg/mL)") +
ylab("Fraction of Total BCs Mapped (%)") +
theme_minimal() +
theme(
axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
panel.background = element_blank(),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none"
) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) +
scale_x_discrete(labels = unique(frac_labels)) +
scale_fill_manual(values = c("#0072B2", "#E69F00"))
merged_norm_frac_BC_plot
# Create the boxplot
merged_norm_reads_plot <- ggplot(merged_norm_vals_sub, aes(x = TMP, y = mappedcounts, fill = Lib)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.85), alpha = 0.75) +
xlab("Trimethoprim (μg/mL)") +
ylab("Total Reads Mapped") +
theme_minimal() +
theme(
axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
panel.background = element_blank(),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none"
) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 50000000), labels = label_comma()) +
scale_x_discrete(labels = unique(frac_labels)) +
scale_fill_manual(values = c("#0072B2", "#E69F00"))
merged_norm_reads_plot
# Create the boxplot
merged_norm_BC_plot <- ggplot(merged_norm_vals_sub, aes(x = TMP, y = mappedBCs, fill = Lib)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.85), alpha = 0.75) +
xlab("Trimethoprim (μg/mL)") +
ylab("Total BCs Mapped") +
theme_minimal() +
theme(
axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
axis.text.x = element_text(size = 16),
axis.text.y = element_text(size = 16),
panel.background = element_blank(),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none"
) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 155000), labels = label_comma()) +
scale_x_discrete(labels = unique(frac_labels)) +
scale_fill_manual(values = c("#0072B2", "#E69F00"))
merged_norm_BC_plot
With the count data from both libraries imported and merged, we can perform some basic checks to determine everything has been incorporated properly.
BCs matching mapping file (min. 1 Seq/BC): How many of our barcodes (from count files) are in the mapping files for each library?
# Lib15 - Count number of barcodes (rows)
format(nrow(BCs15_map), big.mark = ",")
[1] "200,790"
# Lib16 - Count number of barcodes (rows)
format(nrow(BCs16_map), big.mark = ",")
[1] "273,999"
BCs matching mapping file for Complementation (min. 1 Seq/BC):
# Lib15 - Filter rows where D05 is not NA
BCs15_map_filtered <- BCs15_map[!is.na(BCs15_map$D05), ]
# Count unique values in the BC column of the filtered data
BCs15_map_filtered.count <- length(unique(BCs15_map_filtered$BC))
format(BCs15_map_filtered.count, big.mark = ",")
[1] "95,345"
# Lib16 - Filter rows where D12 is not NA
BCs16_map_filtered <- BCs16_map[!is.na(BCs16_map$D12), ]
# Count unique values in the BC column of the filtered data
BCs16_map_filtered.count <- length(unique(BCs16_map_filtered$BC))
format(BCs16_map_filtered.count, big.mark = ",")
[1] "141,835"
Unique BCs matching mapping file between Codon Versions at Complementation: Count the number of unique barcodes between both libraries after pruning by BC length for Complementation
# Merge by shared "mutID" (1BC)
BCs.comp.map.shared <- merge(BCs15_map_filtered, BCs16_map_filtered, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.comp.map.shared.count <- length(unique(BCs.comp.map.shared$BC))
format(BCs.comp.map.shared.count, big.mark = ",")
[1] "107"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.comp.map.unique <- bind_rows(
anti_join(BCs15_map_filtered, BCs16_map_filtered, by = "BC"),
anti_join(BCs16_map_filtered, BCs15_map_filtered, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.comp.map.unique.count <- length(unique(BCs.comp.map.unique$BC))
format(BCs.comp.map.unique.count, big.mark = ",")
[1] "236,966"
# Count number of shared and unique BCs
BCs.comp.map.all.count <- sum(BCs.comp.map.shared.count + BCs.comp.map.unique.count)
format(BCs.comp.map.all.count, big.mark = ",")
[1] "237,073"
Perfect BCs matching mapping file (min. 1 Seq/BC): How many of our barcodes are associated with perfects (perfect sequence match with a designed homolog) for each unique library after filtering to the BC mapping file?
# Library 15
BCs15_perf <- BCs15_map %>%
filter(mutations==0)
# Count number of perfects (rows)
format(nrow(BCs15_perf), big.mark = ",")
[1] "81,053"
# Library 16
BCs16_perf <- BCs16_map %>%
filter(mutations==0)
# Count number of perfects (rows)
format(nrow(BCs16_perf), big.mark = ",")
[1] "129,671"
Perfect BCs matching mapping file for Complementation:
# Library 15
BCs15_perf_filtered <- BCs15_map_filtered %>%
filter(mutations==0)
# Count unique values in the BC column of the filtered data
BCs15_perf_filtered.count <- length(unique(BCs15_perf_filtered$BC))
format(BCs15_perf_filtered.count, big.mark = ",")
[1] "59,466"
# Library 16
BCs16_perf_filtered <- BCs16_map_filtered %>%
filter(mutations==0)
# Count unique values in the BC column of the filtered data
BCs16_perf_filtered.count <- length(unique(BCs16_perf_filtered$BC))
format(BCs16_perf_filtered.count, big.mark = ",")
[1] "91,515"
Mutant BCs matching mapping file (min 1 Seq/BC): How many of our barcodes are associated with mutants (sequence contains at least 1 mutation from a designed homolog) for each unique library after filtering to the BC mapping file?
# Library 15
BCs15_mutant <- BCs15_map %>%
filter(mutations!=0)
# Count number of perfects (rows)
format(nrow(BCs15_mutant), big.mark = ",")
[1] "119,737"
# Library 16
BCs16_mutant <- BCs16_map %>%
filter(mutations!=0)
# Count number of perfects (rows)
format(nrow(BCs16_mutant), big.mark = ",")
[1] "144,328"
Mutant BCs matching mapping file for Complementation:
# Library 15
BCs15_mut_filtered <- BCs15_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs15_mut_filtered.count <- length(unique(BCs15_mut_filtered$BC))
format(BCs15_mut_filtered.count, big.mark = ",")
[1] "35,879"
# Library 16
BCs16_mut_filtered <- BCs16_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs16_mut_filtered.count <- length(unique(BCs16_mut_filtered$BC))
format(BCs16_mut_filtered.count, big.mark = ",")
[1] "50,320"
Mutant Counts by Distance (min. 1 Seq/BC): Summarize the sequence counts across mapped barcodes at numerous mutation levels:
# Define the columns we want to summarize
L15.columns_to_summarize <- c("D01", "D03", "D05", "D06", "D07", "D08", "D09", "D10", "D11")
# Create a function to sum values for multiple columns
L15.sum_columns <- function(data, condition_name) {
data %>%
summarise(across(all_of(L15.columns_to_summarize), ~sum(., na.rm = TRUE))) %>%
mutate(condition = condition_name) %>%
select(condition, everything())
}
# Sum values for each condition
L15.summary_all <- bind_rows(
BCs15_map_filtered %>% filter(mutations == 0) %>% L15.sum_columns("mutations == 0"),
BCs15_map_filtered %>% filter(mutations == 1) %>% L15.sum_columns("mutations == 1"),
BCs15_map_filtered %>% filter(mutations >= 2 & mutations <= 5) %>% L15.sum_columns("mutations 2-5"),
BCs15_map_filtered %>% filter(mutations >= 6 & mutations <= 50) %>% L15.sum_columns("mutations 6-50"),
BCs15_map_filtered %>% filter(mutations >= 51 & mutations <= 100) %>% L15.sum_columns("mutations 51-100"),
BCs15_map_filtered %>% filter(mutations > 100) %>% L15.sum_columns("mutations > 100")
)
# Add a total row to the sum table
L15.summary_all_with_total <- L15.summary_all %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Calculate the percentage of total sum for each column
L15.summary_percentage <- L15.summary_all %>%
mutate(across(all_of(L15.columns_to_summarize),
~. / sum(., na.rm = TRUE) * 100,
.names = "{col}_pct"))
# Add a total row to the percentage table (will sum to 100 for each column)
L15.summary_percentage_with_total <- L15.summary_percentage %>%
select(condition, ends_with("_pct")) %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Round the values for better readability
L15.summary_all_rounded <- L15.summary_all_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
L15.summary_percentage_rounded <- L15.summary_percentage_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
# Print the sum table
print(L15.summary_all_rounded, n = Inf, width = Inf)
print(L15.summary_percentage_rounded, n = Inf, width = Inf)
# Define the columns we want to summarize
L16.columns_to_summarize <- c("D02", "D04", "D12", "E01", "E02", "E03", "E04", "E05", "E06")
# Create a function to sum values for multiple columns
L16.sum_columns <- function(data, condition_name) {
data %>%
summarise(across(all_of(L16.columns_to_summarize), ~sum(., na.rm = TRUE))) %>%
mutate(condition = condition_name) %>%
select(condition, everything())
}
# Sum values for each condition
L16.summary_all <- bind_rows(
BCs16_map_filtered %>% filter(mutations == 0) %>% L16.sum_columns("mutations == 0"),
BCs16_map_filtered %>% filter(mutations == 1) %>% L16.sum_columns("mutations == 1"),
BCs16_map_filtered %>% filter(mutations >= 2 & mutations <= 5) %>% L16.sum_columns("mutations 2-5"),
BCs16_map_filtered %>% filter(mutations >= 6 & mutations <= 50) %>% L16.sum_columns("mutations 6-50"),
BCs16_map_filtered %>% filter(mutations >= 51 & mutations <= 100) %>% L16.sum_columns("mutations 51-100"),
BCs16_map_filtered %>% filter(mutations > 100) %>% L16.sum_columns("mutations > 100")
)
# Add a total row to the sum table
L16.summary_all_with_total <- L16.summary_all %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Calculate the percentage of total sum for each column
L16.summary_percentage <- L16.summary_all %>%
mutate(across(all_of(L16.columns_to_summarize),
~. / sum(., na.rm = TRUE) * 100,
.names = "{col}_pct"))
# Add a total row to the percentage table (will sum to 100 for each column)
L16.summary_percentage_with_total <- L16.summary_percentage %>%
select(condition, ends_with("_pct")) %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Round the values for better readability
L16.summary_all_rounded <- L16.summary_all_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
L16.summary_percentage_rounded <- L16.summary_percentage_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
# Print the sum table
print(L16.summary_all_rounded, n = Inf, width = Inf)
print(L16.summary_percentage_rounded, n = Inf, width = Inf)
Unique Perfects (min. 1 Seq/BC): How many unique perfects did we recover from all sampling conditions for each library?
# Library 15
perf_uniq_15 <- BCs15_map %>%
filter(mutations==0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(perf_uniq_15, big.mark = ",")
[1] "973"
# Library 16
perf_uniq_16 <- BCs16_map %>%
filter(mutations==0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(perf_uniq_16, big.mark = ",")
[1] "840"
Unique Mutants (min 1 Seq/BC): How many unique mutants did we recover from all sampling conditions for unique libraries?
# Library 15
mutant_15 <- BCs15_map %>%
filter(mutations!=0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(mutant_15, big.mark = ",")
[1] "94,125"
# Library 16
mutant_16 <- BCs16_map %>%
filter(mutations!=0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(mutant_16, big.mark = ",")
[1] "93,645"
The following section merges Lib15 and Lib16 based on shared “mutID” and sums the number of unique perfects shared between libraries. It also merges Lib15 and Lib16 based on “mutIDs” unique to each library and sums the number of perfects unique to one library or the other. Finally, it sums the total number of unique perfects recovered that are either shared between libraries or unique to one library or the other.
NOTE: This section is suppressed by default to save processing time and resources. Re-evaluate as needed.
Also count the number of perfect sequences unique to one or the other library (mutations == 0).
# Merge the mutIDinfo datasets but retain only the mutIDs unique to one library or the other
BCs_mutID_perfects.unique <- bind_rows(
anti_join(BCs15_map, BCs16_map, by = "mutID"),
anti_join(BCs16_map, BCs15_map, by = "mutID"))
# Retain only the rows with a specific value in the "specific_column" column
raw_data.unique <-BCs_mutID_perfects.unique %>%
filter(mutations == 0)
# Count the number of perfects (mutations == 0) unique to one library or the other
BCs_mutID_perfects.unique.count <- length(unique(raw_data.unique$mutID))
format(BCs_mutID_perfects.unique.count, big.mark = ",")
Count the number of shared and unique perfects recovered for both codon version libraries.
# Sum the number of shared and unique perfects recovered for both codon version libraries
BCs_mutID_perfects.15.16.all.count <- sum(BCs_mutID_perfects.shared.count + BCs_mutID_perfects.unique.count)
format(BCs_mutID_perfects.15.16.all.count, big.mark = ",")
Up to this point, raw BCs have been pruned by BC length and all
remaining BC counts have been normalized against sequence reads at
“T0 - M9 - Full Supplement”. Here, we’ll start by
adding the raw count values, normalized count values, and log2-FC values
for each BC to the BCmut object that represent the
pre-filtered BC counts. Then, we’ll filter BCs from our library datasets
based on a minimum sequence count threshold across sampling conditions
for use in all downstream analyses. We use a minimum cutoff of
10 sequence reads per barcode.
Count the number of unique BCs prior to filtering by minimum sequence count:
# Pre-Filtered BC Counts by Library
# Count the number of BCs prior to filtering by minimum sequence count
BCmut_15.pre.count <- BCs15_map %>% nrow()
format(BCmut_15.pre.count, big.mark = ",")
[1] "200,790"
# Count the number of BCs prior to filtering by minimum sequence count
BCmut_16.pre.count <- BCs16_map %>% nrow()
format(BCmut_16.pre.count, big.mark = ",")
[1] "273,999"
Determine the number of unique barcodes based on a minimum sequence count threshold across sampling conditions. Filter the barcodes by a minimum threshold of sequence reads per unique barcode. In this case, we use a minimum cutoff of 10 sequence reads per barcode:
Lib15 BC Count
# Filter unique barcodes by minimum sequence count. This code uses "or" such that if any of the sampling conditions contain at least 10 sequences counts/barcode, then the barcode is retained for the full dataset.
# Lib15
BCs15_map <- BCs15_map %>%
filter(D01>9 | D03>9 | D05>9 | D06>9 | D07>9 | D08>9 | D09>9 | D10>9 | D11>9)
# Count the number of BCs remaining after filtering by minimum sequence count
BCs15_map.count <- BCs15_map %>% nrow()
format(BCs15_map.count, big.mark = ",")
[1] "143,706"
# Lib16
BCs16_map <- BCs16_map %>%
filter(D02>9 | D04>9 | D12>9 | E01>9 | E02>9 | E03>9 | E04>9 | E05>9 | E06>9)
# Count the number of BCs remaining after filtering by minimum sequence count
BCs16_map.count <- BCs16_map %>% nrow()
format(BCs16_map.count, big.mark = ",")
[1] "179,550"
Confirm that all unique BCs match the 20-bp length requirement after filtering by minimum sequence count
# Lib15: Sum the count of unique BCs that match the 20-bp length requirement in the BCs15_map object
BC_filter15.unique.table <- table(str_length(BCs15_map$BC))
format(BC_filter15.unique.table, big.mark = ",")
20
"143,706"
# Lib16: Sum the count of unique BCs that match the 20-bp length requirement in the BCs16_map object
BC_filter16.unique.table <- table(str_length(BCs16_map$BC))
format(BC_filter16.unique.table, big.mark = ",")
20
"179,550"
BCs matching mapping file for Complementation (min. 10 Seq/BC):
# Lib15 - Filter rows where D05 is not NA
BCs15_map_filtered <- BCs15_map[!is.na(BCs15_map$D05), ]
# Count unique values in the BC column of the filtered data
BCs15_map_filtered.count <- length(unique(BCs15_map_filtered$BC))
format(BCs15_map_filtered.count, big.mark = ",")
[1] "92,273"
# Lib16 - Filter rows where D12 is not NA
BCs16_map_filtered <- BCs16_map[!is.na(BCs16_map$D12), ]
# Count unique values in the BC column of the filtered data
BCs16_map_filtered.count <- length(unique(BCs16_map_filtered$BC))
format(BCs16_map_filtered.count, big.mark = ",")
[1] "135,228"
Unique BCs matching mapping file between Codon Versions at Complementation: Count the number of unique barcodes between both libraries after pruning by BC length for Complementation
# Merge by shared "mutID" (1BC)
BCs.comp.map.shared <- merge(BCs15_map_filtered, BCs16_map_filtered, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.comp.map.shared.count <- length(unique(BCs.comp.map.shared$BC))
format(BCs.comp.map.shared.count, big.mark = ",")
[1] "50"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.comp.map.unique <- bind_rows(
anti_join(BCs15_map_filtered, BCs16_map_filtered, by = "BC"),
anti_join(BCs16_map_filtered, BCs15_map_filtered, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.comp.map.unique.count <- length(unique(BCs.comp.map.unique$BC))
format(BCs.comp.map.unique.count, big.mark = ",")
[1] "227,401"
# Count number of shared and unique BCs
BCs.comp.map.all.count <- sum(BCs.comp.map.shared.count + BCs.comp.map.unique.count)
format(BCs.comp.map.all.count, big.mark = ",")
[1] "227,451"
Filtered Unique Perfect BCs: How many unique perfects BCs did we recover from all sampling conditions for each library after filtering by minimum sequence count?
# Lib15: Count the number of BCs remaining after filtering by minimum sequence count
BCs15_map.BC.perfect.count <- BCs15_map %>%
filter(mutations == 0) %>%
distinct(BC) %>%
nrow()
format(BCs15_map.BC.perfect.count, big.mark = ",")
[1] "72,952"
# Lib16: Count the number of BCs remaining after filtering by minimum sequence count
BCs16_map.BC.perfect.count <- BCs16_map %>%
filter(mutations == 0) %>%
distinct(BC) %>%
nrow()
format(BCs16_map.BC.perfect.count, big.mark = ",")
[1] "107,303"
Perfect BCs matching mapping file for Complementation:
# Library 15
BCs15_perf_filtered <- BCs15_map_filtered %>%
filter(mutations==0)
# Count unique values in the BC column of the filtered data
BCs15_perf_filtered.count <- length(unique(BCs15_perf_filtered$BC))
format(BCs15_perf_filtered.count, big.mark = ",")
[1] "58,806"
# Library 16
BCs16_perf_filtered <- BCs16_map_filtered %>%
filter(mutations==0)
# Count unique values in the BC column of the filtered data
BCs16_perf_filtered.count <- length(unique(BCs16_perf_filtered$BC))
format(BCs16_perf_filtered.count, big.mark = ",")
[1] "89,072"
Unique Perfect BCs matching mapping file between Codon Versions at Complementation:
# Merge by shared "mutID" (1BC)
BCs.comp.perf.shared <- merge(BCs15_perf_filtered, BCs16_perf_filtered, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.comp.perf.shared.count <- length(unique(BCs.comp.perf.shared$BC))
format(BCs.comp.perf.shared.count, big.mark = ",")
[1] "10"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.comp.perf.unique <- bind_rows(
anti_join(BCs15_perf_filtered, BCs16_perf_filtered, by = "BC"),
anti_join(BCs16_perf_filtered, BCs15_perf_filtered, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.comp.perf.unique.count <- length(unique(BCs.comp.perf.unique$BC))
format(BCs.comp.perf.unique.count, big.mark = ",")
[1] "147,858"
# Count number of shared and unique BCs
BCs.comp.perf.all.count <- sum(BCs.comp.perf.shared.count + BCs.comp.perf.unique.count)
format(BCs.comp.perf.all.count, big.mark = ",")
[1] "147,868"
Filtered Unique Mutant BCs: How many unique mutant BCs did we recover from all sampling conditions for each library after filtering by minimum sequence count?
# Library 15
BCs15_mut_filtered <- BCs15_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs15_mut_filtered.count <- length(unique(BCs15_mut_filtered$BC))
format(BCs15_mut_filtered.count, big.mark = ",")
[1] "33,467"
# Library 16
BCs16_mut_filtered <- BCs16_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs16_mut_filtered.count <- length(unique(BCs16_mut_filtered$BC))
format(BCs16_mut_filtered.count, big.mark = ",")
[1] "46,156"
Unique Mutant BCs matching mapping file between Codon Versions at Complementation:
# Merge by shared "mutID" (1BC)
BCs.comp.mut.shared <- merge(BCs15_mut_filtered, BCs16_mut_filtered, by = "BC", all = FALSE)
# Count the number of unique BCs shared between libraries
BCs.comp.mut.shared.count <- length(unique(BCs.comp.mut.shared$BC))
format(BCs.comp.mut.shared.count, big.mark = ",")
[1] "6"
# Create a new dataset retaining unique values from both datasets (1BC)
BCs.comp.mut.unique <- bind_rows(
anti_join(BCs15_mut_filtered, BCs16_mut_filtered, by = "BC"),
anti_join(BCs16_mut_filtered, BCs15_mut_filtered, by = "BC"))
# Count the number of unique BCs unique to one library or the other
BCs.comp.mut.unique.count <- length(unique(BCs.comp.mut.unique$BC))
format(BCs.comp.mut.unique.count, big.mark = ",")
[1] "79,611"
# Count number of shared and unique BCs
BCs.comp.mut.all.count <- sum(BCs.comp.mut.shared.count + BCs.comp.mut.unique.count)
format(BCs.comp.mut.all.count, big.mark = ",")
[1] "79,617"
# Define the columns we want to summarize
L15.columns_to_summarize <- c("D01", "D03", "D05", "D06", "D07", "D08", "D09", "D10", "D11",
"D02", "D04", "D12", "E01", "E02", "E03", "E04", "E05", "E06")
# Create a function to count unique BCs for multiple columns
L15.count_unique_BC <- function(data, condition_name) {
data %>%
summarise(across(all_of(L15.columns_to_summarize),
~sum(. > 0, na.rm = TRUE))) %>%
mutate(condition = condition_name) %>%
select(condition, everything())
}
# Count unique BCs for each condition
L15.summary_all <- bind_rows(
BCs.comp.mut.unique %>% filter(mutations == 0) %>% L15.count_unique_BC("mutations == 0"),
BCs.comp.mut.unique %>% filter(mutations == 1) %>% L15.count_unique_BC("mutations == 1"),
BCs.comp.mut.unique %>% filter(mutations >= 2 & mutations <= 5) %>% L15.count_unique_BC("mutations 2-5"),
BCs.comp.mut.unique %>% filter(mutations >= 6 & mutations <= 50) %>% L15.count_unique_BC("mutations 6-50"),
BCs.comp.mut.unique %>% filter(mutations >= 51 & mutations <= 100) %>% L15.count_unique_BC("mutations 51-100"),
BCs.comp.mut.unique %>% filter(mutations > 100) %>% L15.count_unique_BC("mutations > 100")
)
# Add a total row to the count table
L15.summary_all_with_total <- L15.summary_all %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Calculate the percentage of total count for each column
L15.summary_percentage <- L15.summary_all %>%
mutate(across(all_of(L15.columns_to_summarize),
~. / sum(., na.rm = TRUE) * 100,
.names = "{col}_pct"))
# Add a total row to the percentage table (will sum to 100 for each column)
L15.summary_percentage_with_total <- L15.summary_percentage %>%
select(condition, ends_with("_pct")) %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Round the values for better readability
L15.summary_all_rounded <- L15.summary_all_with_total %>%
mutate(across(where(is.numeric), ~round(., 0)))
L15.summary_percentage_rounded <- L15.summary_percentage_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
# Print the count table
print(L15.summary_all_rounded, n = Inf, width = Inf)
# Print the percentage table
print(L15.summary_percentage_rounded, n = Inf, width = Inf)
Average Mutant BC per Perfect mutID:
Mutant BCs matching mapping file for Complementation:
# Library 15
BCs15_mut_filtered <- BCs15_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs15_mut_filtered.count <- length(unique(BCs15_mut_filtered$BC))
format(BCs15_mut_filtered.count, big.mark = ",")
[1] "33,467"
# Library 16
BCs16_mut_filtered <- BCs16_map_filtered %>%
filter(mutations!=0)
# Count unique values in the BC column of the filtered data
BCs16_mut_filtered.count <- length(unique(BCs16_mut_filtered$BC))
format(BCs16_mut_filtered.count, big.mark = ",")
[1] "46,156"
Mutant Counts by Distance (min. 1 Seq/BC): Summarize the sequence counts across mapped barcodes at numerous mutation levels:
# Define the columns we want to summarize
L15.columns_to_summarize <- c("D01", "D03", "D05", "D06", "D07", "D08", "D09", "D10", "D11")
# Create a function to sum values for multiple columns
L15.sum_columns <- function(data, condition_name) {
data %>%
summarise(across(all_of(L15.columns_to_summarize), ~sum(., na.rm = TRUE))) %>%
mutate(condition = condition_name) %>%
select(condition, everything())
}
# Sum values for each condition
L15.summary_all <- bind_rows(
BCs15_map_filtered %>% filter(mutations == 0) %>% L15.sum_columns("mutations == 0"),
BCs15_map_filtered %>% filter(mutations == 1) %>% L15.sum_columns("mutations == 1"),
BCs15_map_filtered %>% filter(mutations >= 2 & mutations <= 5) %>% L15.sum_columns("mutations 2-5"),
BCs15_map_filtered %>% filter(mutations >= 6 & mutations <= 50) %>% L15.sum_columns("mutations 6-50"),
BCs15_map_filtered %>% filter(mutations >= 51 & mutations <= 100) %>% L15.sum_columns("mutations 51-100"),
BCs15_map_filtered %>% filter(mutations > 100) %>% L15.sum_columns("mutations > 100")
)
# Add a total row to the sum table
L15.summary_all_with_total <- L15.summary_all %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Calculate the percentage of total sum for each column
L15.summary_percentage <- L15.summary_all %>%
mutate(across(all_of(L15.columns_to_summarize),
~. / sum(., na.rm = TRUE) * 100,
.names = "{col}_pct"))
# Add a total row to the percentage table (will sum to 100 for each column)
L15.summary_percentage_with_total <- L15.summary_percentage %>%
select(condition, ends_with("_pct")) %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Round the values for better readability
L15.summary_all_rounded <- L15.summary_all_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
L15.summary_percentage_rounded <- L15.summary_percentage_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
# Print the sum table
print(L15.summary_all_rounded, n = Inf, width = Inf)
print(L15.summary_percentage_rounded, n = Inf, width = Inf)
# Define the columns we want to summarize
L16.columns_to_summarize <- c("D02", "D04", "D12", "E01", "E02", "E03", "E04", "E05", "E06")
# Create a function to sum values for multiple columns
L16.sum_columns <- function(data, condition_name) {
data %>%
summarise(across(all_of(L16.columns_to_summarize), ~sum(., na.rm = TRUE))) %>%
mutate(condition = condition_name) %>%
select(condition, everything())
}
# Sum values for each condition
L16.summary_all <- bind_rows(
BCs16_map_filtered %>% filter(mutations == 0) %>% L16.sum_columns("mutations == 0"),
BCs16_map_filtered %>% filter(mutations == 1) %>% L16.sum_columns("mutations == 1"),
BCs16_map_filtered %>% filter(mutations >= 2 & mutations <= 5) %>% L16.sum_columns("mutations 2-5"),
BCs16_map_filtered %>% filter(mutations >= 6 & mutations <= 50) %>% L16.sum_columns("mutations 6-50"),
BCs16_map_filtered %>% filter(mutations >= 51 & mutations <= 100) %>% L16.sum_columns("mutations 51-100"),
BCs16_map_filtered %>% filter(mutations > 100) %>% L16.sum_columns("mutations > 100")
)
# Add a total row to the sum table
L16.summary_all_with_total <- L16.summary_all %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Calculate the percentage of total sum for each column
L16.summary_percentage <- L16.summary_all %>%
mutate(across(all_of(L16.columns_to_summarize),
~. / sum(., na.rm = TRUE) * 100,
.names = "{col}_pct"))
# Add a total row to the percentage table (will sum to 100 for each column)
L16.summary_percentage_with_total <- L16.summary_percentage %>%
select(condition, ends_with("_pct")) %>%
bind_rows(summarise(., across(where(is.numeric), sum), condition = "Total"))
# Round the values for better readability
L16.summary_all_rounded <- L16.summary_all_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
L16.summary_percentage_rounded <- L16.summary_percentage_with_total %>%
mutate(across(where(is.numeric), ~round(., 2)))
# Print the sum table
print(L16.summary_all_rounded, n = Inf, width = Inf)
print(L16.summary_percentage_rounded, n = Inf, width = Inf)
Unique Perfects mutID: How many unique perfects (designed homologs with 0 mutations) did we recover from all sampling conditions for each library after filtering by minimum sequence count?
# Lib15
perf_filtered_uniq_15 <- BCs15_map %>%
filter(mutations==0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(perf_filtered_uniq_15, big.mark = ",")
[1] "961"
# Lib16
perf_filtered_uniq_16 <- BCs16_map %>%
filter(mutations==0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(perf_filtered_uniq_16, big.mark = ",")
[1] "818"
Unique Perfects mutID by TMP Treatment: How many unique perfects did we recover from each TMP sampling treatment for each library after filtering by minimum sequence count?
## Library 15
# Columns to count distinct values for
columns_to_count15 <- c("D05", "D06", "D07", "D08", "D09", "D10", "D11")
# Count unique values in "mutID" associated with each column
counts_table15 <- BCs15_map %>%
filter(mutations == 0) %>%
summarise(across(all_of(columns_to_count15), ~ n_distinct(mutID[. > 0])))
# Display the counts table
print(counts_table15)
## Library 16
# Columns to count distinct values for
columns_to_count16 <- c("D12", "E01", "E02", "E03", "E04", "E05", "E06")
# Count unique values in "mutID" associated with each column
counts_table16 <- BCs16_map %>%
filter(mutations == 0) %>%
summarise(across(all_of(columns_to_count16), ~ n_distinct(mutID[. > 0])))
# Display the counts table
print(counts_table16)
Unique Mutants: How many unique mutants did we recover from all sampling conditions for unique libraries after filtering by minimum sequence count?
# Library 15
mutant_filtered_15 <- BCs15_map %>%
filter(mutations!=0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(mutant_filtered_15, big.mark = ",")
[1] "59,763"
# Library 16
mutant_filtered_16 <- BCs16_map %>%
filter(mutations!=0) %>%
select(mutID) %>%
distinct() %>%
nrow()
format(mutant_filtered_16, big.mark = ",")
[1] "49,691"
The following section merges the filtered versions of Lib15 and Lib16 based on shared “mutID” and sums the number of unique perfects shared between libraries. It also merges Lib15 and Lib16 based on “mutIDs” unique to each library and sums the number of perfects unique to one library or the other. Finally, it sums the total number of unique perfects recovered that are either shared between libraries or unique to one library or the other.
NOTE: This section is suppressed by default to save processing time and resources. Re-evaluate as needed.
Count the number of shared perfects (mutations == 0) recovered between libraries after filtering by minimum sequence count.
# Merge the mutIDinfo datasets but keep only the mutIDs shared between libraries
BCs_mutID_filtered_perfects.shared <- merge(BCs15_map, BCs16_map, by = "mutID", all = FALSE)
# Retain only the rows with a specific value in the "specific_column" column
filtered_data.shared <- BCs_mutID_filtered_perfects.shared %>%
filter(mutations.x == 0)
# Count the number of perfects (mutations == 0) shared between both libraries
BCs_mutID_filtered_perfects.shared.count <- length(unique(filtered_data.shared$mutID))
format(BCs_mutID_filtered_perfects.shared.count, big.mark = ",")
Also count the number of perfect sequences unique to one or the other library (mutations == 0) after filtering by minimum sequence count.
# Merge the mutIDinfo datasets but retain only the mutIDs unique to one library or the other
BCs_mutID_filtered_perfects.unique <- bind_rows(
anti_join(BCs15_map, BCs16_map, by = "mutID"),
anti_join(BCs16_map, BCs15_map, by = "mutID"))
# Retain only the rows with a specific value in the "specific_column" column
filtered_data.unique <-BCs_mutID_filtered_perfects.unique %>%
filter(mutations == 0)
# Count the number of perfects (mutations == 0) unique to one library or the other
BCs_mutID_filtered_perfects.unique.count <- length(unique(filtered_data.unique$mutID))
format(BCs_mutID_filtered_perfects.unique.count, big.mark = ",")
Sum the shared and unique counts for total perfect sequences recovered after filtering by minimum sequence count.
# Sum the number of shared and unique perfects recovered for both codon version libraries
BCs_mutID_filtered_perfects.15.16.all.count <- sum(BCs_mutID_filtered_perfects.shared.count + BCs_mutID_filtered_perfects.unique.count)
format(BCs_mutID_filtered_perfects.15.16.all.count, big.mark = ",")
Count data needs to be normalized across sampling conditions prior to making any downstream comparisons. Here, we scale the sequence counts against the “T0-Overnight Growth on LB” sampling condition. This represents “D01” for Lib15 and “D02” for Lib16.
Lib15 Scaling: Create a scaling value to normalize all sequence count data by the total number of sequences at “T0 - Growth on M9 - Full Supplement” sample condition (Lib15: D03; Lib16: D04).
# Calculating the scaling factor for each sampling condition based on D03 (T0-M9-Full Supplement) as the standard
norm_scaling_15 <- rep.int(norm_vals_15$counts[2],nrow(norm_vals_15))/norm_vals_15$counts
# Add the scaling values as a column to the "norm_vals_15" dataframe
norm_vals_15$scaling <- norm_scaling_15
Table of Summarized Count Data including the Scaling Value for each Sample Condition
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs | Scaling |
|---|---|---|---|---|---|---|
| D01 - Lib15 - T0 - Overnight Growth on LB | D01 | 83,770,481 | 332,234 | 61,036,972 | 197,558 | 1.30 |
| D03 - Lib15 - T0 - Growth on M9 - Full Supplement | D03 | 108,604,061 | 359,595 | 79,808,887 | 145,597 | 1.00 |
| D05 - Lib15 - T1 - Growth on M9 - 0.0 ug/ml TMP | D05 | 54,119,812 | 246,911 | 39,528,966 | 95,345 | 2.01 |
| D06 - Lib15 - T1 - Growth on M9 - 0.058 ug/ml TMP | D06 | 47,847,019 | 174,073 | 34,955,335 | 87,057 | 2.27 |
| D07 - Lib15 - T1 - Growth on M9 - 0.5 ug/ml TMP | D07 | 40,802,227 | 161,495 | 29,804,232 | 76,336 | 2.66 |
| D08 - Lib15 - T1 - Growth on M9 - 1.0 ug/ml TMP | D08 | 39,746,862 | 171,737 | 28,977,937 | 71,182 | 2.73 |
| D09 - Lib15 - T1 - Growth on M9 - 10.0 ug/ml TMP | D09 | 53,156,432 | 103,600 | 38,534,158 | 51,972 | 2.04 |
| D10 - Lib15 - T1 - Growth on M9 - 50.0 ug/ml TMP | D10 | 49,744,711 | 179,548 | 36,333,615 | 55,599 | 2.18 |
| D11 - Lib15 - T1 - Growth on M9 - 200.0 ug/ml TMP | D11 | 38,565,223 | 46,363 | 29,013,111 | 15,161 | 2.82 |
Lib16 Scaling
# Calculate scaling factor for each sampling condition based on D04 (T0 - M9 - Full Supplement) as the standard
norm_scaling_16 <- rep.int(norm_vals_16$counts[2],nrow(norm_vals_16))/norm_vals_16$counts
# Add the scaling values as a column to the "norm_vals_16" dataframe
norm_vals_16$scaling <- norm_scaling_16
Table of Summarized Count Data including the Scaling Value for each Sample Condition
| Sample Conditions | Sample ID | Seq Counts | BC Counts | Mapped Counts | Mapped BCs | Scaling |
|---|---|---|---|---|---|---|
| D02 - Lib16 - T0 - Overnight Growth on LB | D02 | 48,821,977 | 428,302 | 34,177,971 | 266,916 | 1.35 |
| D04 - Lib16 - T0 - Growth on M9 - Full Supplement | D04 | 65,871,632 | 409,052 | 45,234,084 | 191,349 | 1.00 |
| D12 - Lib16 - T1 - Growth on M9 - 0.0 ug/ml TMP | D12 | 68,405,404 | 291,387 | 47,717,303 | 141,835 | 0.96 |
| E01 - Lib16 - T1 - Growth on M9 - 0.058 ug/ml TMP | E01 | 43,914,011 | 215,066 | 30,754,811 | 126,987 | 1.50 |
| E02 - Lib16 - T1 - Growth on M9 - 0.5 ug/ml TMP | E02 | 43,752,442 | 190,020 | 30,367,898 | 108,523 | 1.51 |
| E03 - Lib16 - T1 - Growth on M9 - 1.0 ug/ml TMP | E03 | 49,596,583 | 181,809 | 34,229,832 | 101,164 | 1.33 |
| E04 - Lib16 - T1 - Growth on M9 - 10.0 ug/ml TMP | E04 | 58,144,156 | 151,536 | 38,659,435 | 76,505 | 1.13 |
| E05 - Lib16 - T1 - Growth on M9 - 50.0 ug/ml TMP | E05 | 52,530,782 | 134,374 | 33,688,875 | 67,666 | 1.25 |
| E06 - Lib16 - T1 - Growth on M9 - 200.0 ug/ml TMP | E06 | 63,525,568 | 170,613 | 39,388,556 | 66,503 | 1.04 |
Lib15 BC Scaling
The following table displays a preview of the original count values by unique barcode:
| BC | D01 | D03 | D05 | D06 | D07 | D08 | D09 | D10 | D11 |
|---|---|---|---|---|---|---|---|---|---|
| AACACCATATTGCTTCTAAT | 424 | 510 | 235 | 134 | 28 | 13 | NA | 5 | NA |
| CGACATTTTGCCAAGTCCCT | 2,197 | 324 | 2 | 21 | 1 | 3 | NA | NA | NA |
| GATTAATTTATGCGCCGCTT | 17 | 3 | 3 | NA | 1 | NA | NA | NA | NA |
| TGTGTTCGAATTGCTGTTCG | 47 | 36 | 3 | 2 | 3 | NA | NA | NA | NA |
| CCCACACCCCTTATGGTCTC | 513 | 1,539 | 861 | 386 | 93 | 54 | 4 | 5 | NA |
| CGTTGTCCGACCAGTCTCAG | 30 | 35 | 1 | 1 | NA | NA | NA | 2 | NA |
For each sample, make a normalized counts value based on sequencing depth using the scaling values derived for each sampling condition in the table above.
# Make new columns for the normalized counts in the BCs object:
oldnames15 <- colnames(BCs15_map)
newnamesfrac15 = c('D01n', 'D03n',
'D05n', 'D06n', 'D07n', 'D08n', 'D09n', 'D10n', 'D11n')
# Add the new columns to the BCs object based on the number of original columns for sampling conditions:
numcol_15 <- ncol(BCs15_map)
D01index = which( colnames(BCs15_map)=="D01" )
# For each sample, re-scale the number of reads by relative sequencing depth of the sample using the scaling values in the norm_vals object:
for (i in 1:9){
BCs15_map[,numcol_15+i] <- BCs15_map[,D01index+i-1]*as.numeric(norm_vals_15$scaling[i])
}
# Add the new column names to the BCs object:
colnames(BCs15_map) <- c(oldnames15,newnamesfrac15)
The following table displays a preview of the normalized count values by unique barcode:
| BC | D01n | D03n | D05n | D06n | D07n | D08n | D09n | D10n | D11n |
|---|---|---|---|---|---|---|---|---|---|
| AACACCATATTGCTTCTAAT | 550 | 510 | 472 | 304 | 75 | 36 | NA | 11 | NA |
| CGACATTTTGCCAAGTCCCT | 2,848 | 324 | 4 | 48 | 3 | 8 | NA | NA | NA |
| GATTAATTTATGCGCCGCTT | 22 | 3 | 6 | NA | 3 | NA | NA | NA | NA |
| TGTGTTCGAATTGCTGTTCG | 61 | 36 | 6 | 5 | 8 | NA | NA | NA | NA |
| CCCACACCCCTTATGGTCTC | 665 | 1,539 | 1,728 | 876 | 248 | 148 | 8 | 11 | NA |
| CGTTGTCCGACCAGTCTCAG | 39 | 35 | 2 | 2 | NA | NA | NA | 4 | NA |
Lib16 BC Scaling
The following table displays a preview of the original count values by unique barcode:
| BC | D02 | D04 | D12 | E01 | E02 | E03 | E04 | E05 | E06 |
|---|---|---|---|---|---|---|---|---|---|
| CTTGACTAGGCGTGACAAGG | 16 | 63 | 51 | 6 | 1 | NA | NA | NA | NA |
| GATGTAACAATGCAGGAACC | 25 | 14 | NA | NA | NA | NA | NA | NA | NA |
| GGTGGACTAGGGGTAAGTCG | 246 | 624 | 848 | 655 | 882 | 1,059 | 626 | 49 | 7 |
| GCAATCTTTAGTAACTGCTT | 201 | 306 | 443 | 399 | 540 | 446 | 104 | 29 | 5 |
| AGGCATCCAAGCAATTCATA | 17 | NA | NA | NA | NA | NA | NA | NA | NA |
| ATTTCTACTCGGTGCTCACA | 130 | 510 | 854 | 788 | 1,366 | 1,314 | 4,771 | 1,794 | 592 |
For each sample, make a normalized counts value based on sequencing depth using the scaling values derived for each sampling condition in the table above.
# Make new columns for the normalized counts in the BCs object:
oldnames16 <- colnames(BCs16_map)
newnamesfrac16 = c('D02n', 'D04n',
'D12n', 'E01n', 'E02n', 'E03n', 'E04n', 'E05n', 'E06n')
# Add the new columns to the BCs object based on the number of original columns for sampling conditions:
numcol_16 <- ncol(BCs16_map)
D02index = which( colnames(BCs16_map)=="D02" )
# For each dilution, re-scale the number of reads by relative sequencing depth of the sample using the scaling values in the norm_vals object:
for (i in 1:9){
BCs16_map[,numcol_16+i] <- BCs16_map[,D02index+i-1]*as.numeric(norm_vals_16$scaling[i])
}
# Add the new column names to the BCs object:
colnames(BCs16_map) <- c(oldnames16,newnamesfrac16)
The following table displays a preview of the normalized count values by unique barcode:
| BC | D02n | D04n | D12n | E01n | E02n | E03n | E04n | E05n | E06n |
|---|---|---|---|---|---|---|---|---|---|
| CTTGACTAGGCGTGACAAGG | 22 | 63 | 49 | 9 | 2 | NA | NA | NA | NA |
| GATGTAACAATGCAGGAACC | 34 | 14 | NA | NA | NA | NA | NA | NA | NA |
| GGTGGACTAGGGGTAAGTCG | 332 | 624 | 817 | 983 | 1,328 | 1,407 | 709 | 61 | 7 |
| GCAATCTTTAGTAACTGCTT | 271 | 306 | 427 | 599 | 813 | 592 | 118 | 36 | 5 |
| AGGCATCCAAGCAATTCATA | 23 | NA | NA | NA | NA | NA | NA | NA | NA |
| ATTTCTACTCGGTGCTCACA | 175 | 510 | 822 | 1,182 | 2,057 | 1,745 | 5,405 | 2,250 | 614 |
Library 15: Determine the log2-fold changes for each barcode relative to the M9-Supplementation condition (Lib15=D03 | Lib16=D04).
pseudocount = 1
#Library 15
BCs15_map <- BCs15_map %>%
mutate(D03D01fc=log2(D03n+pseudocount)-log2(D01n+pseudocount),
D05D03fc=log2(D05n+pseudocount)-log2(D03n+pseudocount),
D06D03fc=log2(D06n+pseudocount)-log2(D03n+pseudocount),
D07D03fc=log2(D07n+pseudocount)-log2(D03n+pseudocount),
D08D03fc=log2(D08n+pseudocount)-log2(D03n+pseudocount),
D09D03fc=log2(D09n+pseudocount)-log2(D03n+pseudocount),
D10D03fc=log2(D10n+pseudocount)-log2(D03n+pseudocount),
D11D03fc=log2(D11n+pseudocount)-log2(D03n+pseudocount))
Library 16:
#Library 16
BCs16_map <- BCs16_map %>%
mutate(D04D02fc=log2(D04n+pseudocount)-log2(D02n+pseudocount),
D12D04fc=log2(D12n+pseudocount)-log2(D04n+pseudocount),
E01D04fc=log2(E01n+pseudocount)-log2(D04n+pseudocount),
E02D04fc=log2(E02n+pseudocount)-log2(D04n+pseudocount),
E03D04fc=log2(E03n+pseudocount)-log2(D04n+pseudocount),
E04D04fc=log2(E04n+pseudocount)-log2(D04n+pseudocount),
E05D04fc=log2(E05n+pseudocount)-log2(D04n+pseudocount),
E06D04fc=log2(E06n+pseudocount)-log2(D04n+pseudocount))
Lib15 Log2-FC The following table displays a preview of the log2-fold change values between sampling treatments:
| BC | D03-D01fc | D05-D03fc | D06-D03fc | D07-D03fc | D08-D03fc | D09-D03fc | D10-D03fc | D11-D03fc |
|---|---|---|---|---|---|---|---|---|
| AACACCATATTGCTTCTAAT | 0 | 0 | -1 | -3 | -4 | NA | -5 | NA |
| CGACATTTTGCCAAGTCCCT | -3 | -6 | -3 | -6 | -5 | NA | NA | NA |
| GATTAATTTATGCGCCGCTT | -3 | 1 | NA | 0 | NA | NA | NA | NA |
| TGTGTTCGAATTGCTGTTCG | -1 | -2 | -3 | -2 | NA | NA | NA | NA |
| CCCACACCCCTTATGGTCTC | 1 | 0 | -1 | -3 | -3 | -7 | -7 | NA |
| CGTTGTCCGACCAGTCTCAG | 0 | -4 | -3 | NA | NA | NA | -3 | NA |
Lib16 Log2-FC The following table displays a preview of the log2-fold change values between sampling treatments:
| BC | D04-D02fc | D12-D04fc | E01-D04fc | E02-D04fc | E03-D04fc | E04-D04fc | E05-D04fc | E06-D04fc |
|---|---|---|---|---|---|---|---|---|
| CTTGACTAGGCGTGACAAGG | 2 | 0 | -3 | -5 | NA | NA | NA | NA |
| GATGTAACAATGCAGGAACC | -1 | NA | NA | NA | NA | NA | NA | NA |
| GGTGGACTAGGGGTAAGTCG | 1 | 0 | 1 | 1 | 1 | 0 | -3 | -6 |
| GCAATCTTTAGTAACTGCTT | 0 | 0 | 1 | 1 | 1 | -1 | -3 | -6 |
| AGGCATCCAAGCAATTCATA | NA | NA | NA | NA | NA | NA | NA | NA |
| ATTTCTACTCGGTGCTCACA | 2 | 1 | 1 | 2 | 2 | 3 | 2 | 0 |
First, update the mutIDinfo object to group BCs by mutID
while also adding a new column called “numprunedBCs” that counts the
number of unique BCs associated with each unique mutID that passed the
filtering threshold in the BCs_map object.
# Lib15
mutIDinfo15 <- BCs15_map %>%
group_by(mutID) %>%
summarise(numprunedBCs = n()) %>%
right_join(mutIDinfo15, by="mutID")
# Lib16
mutIDinfo16 <- BCs16_map %>%
group_by(mutID) %>%
summarise(numprunedBCs = n()) %>%
right_join(mutIDinfo16, by="mutID")
Total: Count the number of unique mutIDs prior to filtering by minimum numprunedBCs (>0 BC):
# Count the number of unique mutants prior to filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID)), big.mark = ",")
[1] "473,854"
# Lib16
format(length(unique(mutIDinfo16$mutID)), big.mark = ",")
[1] "469,672"
Perfects: Count the number of unique perfect variants (mutations == 0):
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID[mutIDinfo15$mutations == 0])), big.mark = ",")
[1] "1,048"
# Lib16
format(length(unique(mutIDinfo16$mutID[mutIDinfo16$mutations == 0])), big.mark = ",")
[1] "904"
Mutants: Count the number of unique mutant variants (mutations != 0):
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID[mutIDinfo15$mutations != 0])), big.mark = ",")
[1] "472,806"
# Lib16
format(length(unique(mutIDinfo16$mutID[mutIDinfo16$mutations != 0])), big.mark = ",")
[1] "468,768"
Replace all NAs with 0 in the “numprunedBCs” column
#replace NAs with 0
# Lib15
mutIDinfo15$numprunedBCs[is.na(mutIDinfo15$numprunedBCs)] <- 0
# Lib16
mutIDinfo16$numprunedBCs[is.na(mutIDinfo16$numprunedBCs)] <- 0
Filter the mutIDinfo objects by minimum number of
numprunedBCs (>0 BC):
#variants must have at least 1 BC after pruning:
# Lib15
mutIDinfo15 <- mutIDinfo15 %>%
filter(numprunedBCs>0)
# Lib16
mutIDinfo16 <- mutIDinfo16 %>%
filter(numprunedBCs>0)
Total: Count the number of unique variants remaining after filtering:
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID)), big.mark = ",")
[1] "60,724"
# Lib16
format(length(unique(mutIDinfo16$mutID)), big.mark = ",")
[1] "50,509"
Perfects: Count the number of unique perfect variants (mutations == 0) remaining after filtering:
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID[mutIDinfo15$mutations == 0])), big.mark = ",")
[1] "961"
# Lib16
format(length(unique(mutIDinfo16$mutID[mutIDinfo16$mutations == 0])), big.mark = ",")
[1] "818"
Mutants: Count the number of unique homologs that mutant variants (mutations != 0) are associated with after filtering:
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$IDalign[mutIDinfo15$mutations != 0])), big.mark = ",")
[1] "1,041"
# Lib16
format(length(unique(mutIDinfo16$IDalign[mutIDinfo16$mutations != 0])), big.mark = ",")
[1] "907"
Mutants: Count the number of unique mutant variants (mutations != 0) remaining after filtering:
# Count the number of unique variants remaining after filtering:
# Lib15
format(length(unique(mutIDinfo15$mutID[mutIDinfo15$mutations != 0])), big.mark = ",")
[1] "59,763"
# Lib16
format(length(unique(mutIDinfo16$mutID[mutIDinfo16$mutations != 0])), big.mark = ",")
[1] "49,691"
Mutants (1-5AA): Count the number of unique mutant variants (mutations != 0) remaining after filtering:
# Lib15
unique_mutIDinfo15_5AA_count <- mutIDinfo15 %>%
filter(mutations > 0 & mutations < 6) %>%
distinct(mutID) %>%
nrow()
format(unique_mutIDinfo15_5AA_count, big.mark = ",")
[1] "12,274"
# Lib16
unique_mutIDinfo16_5AA_count <- mutIDinfo16 %>%
filter(mutations > 0 & mutations < 6) %>%
distinct(mutID) %>%
nrow()
format(unique_mutIDinfo16_5AA_count, big.mark = ",")
[1] "16,060"
Mutants (>5AA): Count the number of unique mutant variants (mutations != 0) remaining after filtering:
# Lib15
unique_mutIDinfo15_6AA_count <- mutIDinfo15 %>%
filter(mutations > 5) %>%
distinct(mutID) %>%
nrow()
format(unique_mutIDinfo15_6AA_count, big.mark = ",")
[1] "47,489"
# Lib16
unique_mutIDinfo16_6AA_count <- mutIDinfo16 %>%
filter(mutations > 5) %>%
distinct(mutID) %>%
nrow()
format(unique_mutIDinfo16_6AA_count, big.mark = ",")
[1] "33,631"
Shared and Unique Mutant mutIDs between Codon Versions at Complementation:
# Merge by shared "mutID" (1BC)
mutIDinfo15.16.shared <- merge(mutIDinfo15, mutIDinfo16, by = "mutID", all = FALSE)
# Count the number of unique BCs shared between libraries
mutIDinfo15.16.shared.count <- length(unique(mutIDinfo15.16.shared$mutID))
format(mutIDinfo15.16.shared.count, big.mark = ",")
[1] "1,124"
# Create a new dataset retaining unique values from both datasets (1BC)
mutIDinfo15.16.unique <- bind_rows(
anti_join(mutIDinfo15, mutIDinfo16, by = "mutID"),
anti_join(mutIDinfo16, mutIDinfo15, by = "mutID"))
# Count the number of unique BCs unique to one library or the other
mutIDinfo15.16.unique.count <- length(unique(mutIDinfo15.16.unique$mutID))
format(mutIDinfo15.16.unique.count, big.mark = ",")
[1] "108,985"
# Count number of shared and unique BCs
mutIDinfo15.16.all.count <- sum(mutIDinfo15.16.shared.count + mutIDinfo15.16.unique.count)
format(mutIDinfo15.16.all.count, big.mark = ",")
[1] "110,109"
Count the number of shared and unique mutant mutIDs within 1-5 AA distance:
# Filter the shared dataset to include only rows where mutations > 0 and < 6
filtered_shared_data <- mutIDinfo15.16.shared %>%
filter(mutations.x > 0 & mutations.x < 6)
# Count the number of unique mutIDs in the filtered shared dataset
mutIDinfo15.16.5AA.shared.count <- length(unique(filtered_shared_data$mutID))
format(mutIDinfo15.16.5AA.shared.count, big.mark = ",")
[1] "308"
# Filter the dataset to include only rows where mutations > 0 and < 6
filtered_unique_data <- mutIDinfo15.16.unique %>%
filter(mutations > 0 & mutations < 6)
# Count the number of unique mutIDs in the filtered dataset
mutIDinfo15.16.5AA.unique.count <- length(unique(filtered_unique_data$mutID))
format(mutIDinfo15.16.5AA.unique.count, big.mark = ",")
[1] "27,718"
# Count number of shared and unique BCs
mutIDinfo15.16.5AA.all.count <- sum(mutIDinfo15.16.5AA.shared.count + mutIDinfo15.16.5AA.unique.count)
format(mutIDinfo15.16.5AA.all.count, big.mark = ",")
[1] "28,026"
Average Mutants for each Homolog: Calculate the average number of unique mutID associated with each unique IDalign
# Lib15 - Calculate the average number of unique mutIDs per IDalign
average_mutIDs_per_IDalign15 <- mutIDinfo15 %>%
group_by(IDalign) %>%
summarise(unique_mutIDs = n_distinct(mutID)) %>%
summarise(avg_unique_mutIDs = mean(unique_mutIDs))
# Print the result
print(paste("Lib15: Average number of unique mutIDs per IDalign:", round(average_mutIDs_per_IDalign15$avg_unique_mutIDs, 2)))
[1] "Lib15: Average number of unique mutIDs per IDalign: 56.96"
# Lib16 - Calculate the average number of unique mutIDs per IDalign
average_mutIDs_per_IDalign16 <- mutIDinfo16 %>%
group_by(IDalign) %>%
summarise(unique_mutIDs = n_distinct(mutID)) %>%
summarise(avg_unique_mutIDs = mean(unique_mutIDs))
# Print the result
print(paste("Lib16: Average number of unique mutIDs per IDalign:", round(average_mutIDs_per_IDalign16$avg_unique_mutIDs, 2)))
[1] "Lib16: Average number of unique mutIDs per IDalign: 52.56"
Median Mutants for each Homolog: Calculate the median number of unique mutID associated with each unique IDalign
# Lib15 - Calculate the number of unique mutIDs per IDalign
mutIDs_per_IDalign15 <- mutIDinfo15 %>%
group_by(IDalign) %>%
summarise(unique_mutIDs = n_distinct(mutID))
# Calculate the median number of unique mutIDs per IDalign
median_mutIDs_per_IDalign15 <- median(mutIDs_per_IDalign15$unique_mutIDs)
# Print the result
print(paste("Lib15: Median number of unique mutIDs per IDalign:", median_mutIDs_per_IDalign15))
[1] "Lib15: Median number of unique mutIDs per IDalign: 29.5"
# Lib16 - Calculate the number of unique mutIDs per IDalign
mutIDs_per_IDalign16 <- mutIDinfo16 %>%
group_by(IDalign) %>%
summarise(unique_mutIDs = n_distinct(mutID))
# Calculate the median number of unique mutIDs per IDalign
median_mutIDs_per_IDalign16 <- median(mutIDs_per_IDalign16$unique_mutIDs)
# Print the result
print(paste("Lib16: Median number of unique mutIDs per IDalign:", median_mutIDs_per_IDalign16))
[1] "Lib16: Median number of unique mutIDs per IDalign: 19"
All Shared: Merge based on shared mutID between the two dataset. Count all unique IDalign:
# Merge by shared "mutID"
mutIDinfo15.16.shared <- merge(mutIDinfo15, mutIDinfo16, by = "mutID", all = FALSE)
# Count the number of mutID's shared between both libraries after filtering to 5 mutations
mutIDinfo15.16.shared.filtered.count <- length(unique(mutIDinfo15.16.shared$IDalign.x))
format(mutIDinfo15.16.shared.filtered.count, big.mark = ",")
[1] "646"
All Unique: Merge based on unique mutID between the two dataset. Count all unique IDalign:
# Create a new dataset retaining unique mutIDs from both datasets
mutIDinfo15.16.unique <- bind_rows(
anti_join(mutIDinfo15, mutIDinfo16, by = "mutID"),
anti_join(mutIDinfo16, mutIDinfo15, by = "mutID"))
# Count the number of mutID's shared between both libraries after filtering to 5 mutations
mutIDinfo15.16.unique.count <- length(unique(mutIDinfo15.16.unique$IDalign))
format(mutIDinfo15.16.unique.count, big.mark = ",")
[1] "1,219"
Perfects Shared: Subset for perfects shared between both libraries into a single dataframe:
# Retain only the rows with a specific value in the "specific_column" column
mutIDinfo15.16.shared.filtered.perfects <- mutIDinfo15.16.shared %>%
filter(mutations.x == 0)
# Count the number of perfects (mutations == 0) shared between both libraries
mutIDinfo15.16.shared.filtered.perfects.count <- length(unique(mutIDinfo15.16.shared.filtered.perfects$IDalign.x))
format(mutIDinfo15.16.shared.filtered.perfects.count, big.mark = ",")
[1] "643"
Perfects Unique: Subset the fitness scores for perfects unique to one library or the other into a single dataframe:
# Retain only the rows with a specific value in the "specific_column" column
mutIDinfo15.16.unique.filtered.perfects <- mutIDinfo15.16.unique %>%
filter(mutations == 0)
# Count the number of perfects (mutations == 0) unique between both libraries
mutIDinfo15.16.unique.filtered.perfects.count <- length(unique(mutIDinfo15.16.unique.filtered.perfects$IDalign))
format(mutIDinfo15.16.unique.filtered.perfects.count, big.mark = ",")
[1] "493"
Summarize the number of perfects shared or unique between both libraries.
mutIDinfo15.16.perfect.all.count <- sum(mutIDinfo15.16.shared.filtered.perfects.count +
mutIDinfo15.16.unique.filtered.perfects.count)
format(mutIDinfo15.16.perfect.all.count, big.mark = ",")
[1] "1,136"
Mutants Shared: Subset for mutants shared between both libraries into a single dataframe:
# Retain only the rows with a specific value in the "specific_column" column
mutIDinfo15.16.shared.filtered.mutants <- mutIDinfo15.16.shared %>%
filter(mutations.x != 0)
# Count the number of mutants (mutations != 0) shared between both libraries
mutIDinfo15.16.shared.filtered.mutants.count <- length(unique(mutIDinfo15.16.shared.filtered.mutants$IDalign.x))
format(mutIDinfo15.16.shared.filtered.mutants.count, big.mark = ",")
[1] "196"
Mutants Unique: Subset the fitness scores for mutants unique to one library or the other into a single dataframe:
# Retain only the rows with a specific value in the "specific_column" column
mutIDinfo15.16.unique.filtered.mutants <- mutIDinfo15.16.unique %>%
filter(mutations != 0)
# Count the number of mutants (mutations != 0) unique between both libraries
mutIDinfo15.16.unique.filtered.mutants.count <- length(unique(mutIDinfo15.16.unique.filtered.mutants$IDalign))
format(mutIDinfo15.16.unique.filtered.mutants.count, big.mark = ",")
[1] "1,181"
Summarize the number of mutants shared or unique between both libraries.
mutIDinfo15.16.mutants.all.count <- sum(mutIDinfo15.16.shared.filtered.mutants.count +
mutIDinfo15.16.unique.filtered.mutants.count)
format(mutIDinfo15.16.mutants.all.count, big.mark = ",")
[1] "1,377"
Now, generate a median fitness value for each unique “mutID” based on its’ associated BCs. IMPORTANT: We have to remove “NA” values from each BC associated with each “mutID” when calculating median fitness values. If not, the code will discard the “mutID” if any of its associated BCs have an “NA” value.
# For each ID at each TMP treatment, calculate median fitness based on BCs associated with each ID:
# Lib15
mutIDinfo15 <- BCs15_map %>%
group_by(mutID) %>%
summarise(fitD03D01=median(D03D01fc,na.rm=T), fitD05D03=median(D05D03fc,na.rm=T),
fitD06D03=median(D06D03fc,na.rm=T), fitD07D03=median(D07D03fc,na.rm=T),
fitD08D03=median(D08D03fc,na.rm=T), fitD09D03=median(D09D03fc,na.rm=T),
fitD10D03=median(D10D03fc,na.rm=T), fitD11D03=median(D11D03fc,na.rm=T)) %>%
right_join(mutIDinfo15, by="mutID")
# Lib16
mutIDinfo16 <- BCs16_map %>%
group_by(mutID) %>%
summarise(fitD04D02=median(D04D02fc,na.rm=T), fitD12D04=median(D12D04fc,na.rm=T),
fitE01D04=median(E01D04fc,na.rm=T), fitE02D04=median(E02D04fc,na.rm=T),
fitE03D04=median(E03D04fc,na.rm=T), fitE04D04=median(E04D04fc,na.rm=T),
fitE05D04=median(E05D04fc,na.rm=T), fitE06D04=median(E06D04fc,na.rm=T)) %>%
right_join(mutIDinfo16, by="mutID")
Subset library mutIDinfo objects to retain only the
fitness scores for all TMP treatments at Time Point 1. Then, merge
libraries based on shared “mutID”.
Create a new dataframe that subsets all BCs and their fitness scores for all TMP treatments at Time Point 1:
# Select descriptive data columns and fitness scores at time point 1 from mutIDinfo
# Lib15
BCmutfitness_15 <- mutIDinfo15[, c(1,3:17)]
# Lib16
BCmutfitness_16 <- mutIDinfo16[, c(1,3:17)]
Unique Perfects IDalign up to 5 a.a. Distance: Calculate the number of unique IDalign perfects when considering up to 5 a.a. distance mutants:
# Lib15
# Count unique IDalign for each mutation level from 0 to 5
Lib15.IDalign.unique_counts <- BCmutfitness_15 %>%
filter(mutations %in% 0:5) %>%
summarise(Lib15.IDalign.unique_counts = n_distinct(IDalign))
# Display the results
print(Lib15.IDalign.unique_counts)
# Lib16
# Count unique IDalign for each mutation level from 0 to 5
Lib16.IDalign.unique_counts <- BCmutfitness_16 %>%
filter(mutations %in% 0:5) %>%
summarise(Lib16.IDalign.unique_counts = n_distinct(IDalign))
# Display the results
print(Lib16.IDalign.unique_counts)
All Shared w/ 5 a.a. Distance: Merge the fitness scores derived from each library into a single dataframe based on shared mutID between the two dataset. Summarize the number of unique IDalign with up to 5 a.a. distance mutants:
# Merge by shared "mutID"
BCmut_15.16.mutID.fitness.shared <- merge(BCmutfitness_15, BCmutfitness_16, by = "mutID", all = FALSE)
# Filter up to 5 a.a. mutants
BCmut_15.16.mutID.fitness.shared.filtered.5AA <- BCmut_15.16.mutID.fitness.shared[BCmut_15.16.mutID.fitness.shared$mutations.x < 6 & BCmut_15.16.mutID.fitness.shared$mutations.y < 6, ]
# Count the number of mutID's shared between both libraries after filtering to 5 mutations
BCmut_15.16.mutID.fitness.shared.filtered.5AA.count <- length(unique(BCmut_15.16.mutID.fitness.shared.filtered.5AA$IDalign.x))
format(BCmut_15.16.mutID.fitness.shared.filtered.5AA.count, big.mark = ",")
[1] "643"
All Unique w/ 5 a.a. Distance: Subset the fitness scores for all unique mutIDs derived from one library or the other into a single dataframe. Summarize the number of unique IDalign with up to 5 a.a. distance mutants:
# Create a new dataset retaining unique mutIDs from both datasets
BCmut_15.16.mutID.fitness.unique <- bind_rows(
anti_join(BCmutfitness_15, BCmutfitness_16, by = "mutID"),
anti_join(BCmutfitness_16, BCmutfitness_15, by = "mutID"))
BCmut_15.16.mutID.fitness.unique.filtered.5AA <- BCmut_15.16.mutID.fitness.unique[BCmut_15.16.mutID.fitness.unique$mutations < 6,]
# Count the number of mutID's shared between both libraries after filtering to 5 mutations
BCmut_15.16.mutID.fitness.unique.filtered.5AA.count <- length(unique(BCmut_15.16.mutID.fitness.unique.filtered.5AA$IDalign))
format(BCmut_15.16.mutID.fitness.unique.filtered.5AA.count, big.mark = ",")
[1] "1,146"
Summarize the number shared or unique between both libraries w/ 5 a.a. distance.
BCmut_15.16.mutID.fitness.filtered.5AA.all.count <- sum(BCmut_15.16.mutID.fitness.shared.filtered.5AA.count +
BCmut_15.16.mutID.fitness.unique.filtered.5AA.count)
format(BCmut_15.16.mutID.fitness.filtered.5AA.all.count, big.mark = ",")
[1] "1,789"
The mutID for wildtype E. coli is: NP_414590
Start by pulling out the wildtype E. coli mutID from the full dataset:
# Lib15
BCmut15_NP_414590 <- BCs15_map %>%
filter(mutID=="NP_414590") %>%
select(-cigar)
# Lib16
BCmut16_NP_414590 <- BCs16_map %>%
filter(mutID=="NP_414590") %>%
select(-cigar)
# Lib15
BCmut15_NP_414590_logfc <- BCmut15_NP_414590 %>%
select(BC, D05D03fc, D06D03fc, D07D03fc, D08D03fc, D09D03fc, D10D03fc, D11D03fc)
print(BCmut15_NP_414590_logfc)
# Lib16
BCmut16_NP_414590_logfc <- BCmut16_NP_414590 %>%
select(BC, D12D04fc, E01D04fc, E02D04fc, E03D04fc, E04D04fc, E05D04fc, E06D04fc)
print(BCmut16_NP_414590_logfc)
Subset the logFC data columns for Time Point 1 and define the treatment levels for each comparison:
# Lib15
BCmut15_NP_414590_logfc <- BCmut15_NP_414590 %>%
select(BC, D05D03fc, D06D03fc, D07D03fc, D08D03fc, D09D03fc, D10D03fc, D11D03fc) %>%
pivot_longer(!BC, names_to = "fc", values_to = "val") %>%
mutate(Lib = "Lib15",
TMP = case_when(
fc == "D05D03fc" ~ "0-TMP",
fc == "D06D03fc" ~ "0.058-TMP",
fc == "D07D03fc" ~ "0.5-TMP",
fc == "D08D03fc" ~ "1.0-TMP",
fc == "D09D03fc" ~ "10-TMP",
fc == "D10D03fc" ~ "50-TMP",
fc == "D11D03fc" ~ "200-TMP",
TRUE ~ NA_character_))
# Lib16
BCmut16_NP_414590_logfc <- BCmut16_NP_414590 %>%
select(BC, D12D04fc, E01D04fc, E02D04fc, E03D04fc, E04D04fc, E05D04fc, E06D04fc) %>%
pivot_longer(!BC, names_to = "fc", values_to = "val") %>%
mutate(Lib = "Lib16",
TMP = case_when(
fc == "D12D04fc" ~ "0-TMP",
fc == "E01D04fc" ~ "0.058-TMP",
fc == "E02D04fc" ~ "0.5-TMP",
fc == "E03D04fc" ~ "1.0-TMP",
fc == "E04D04fc" ~ "10-TMP",
fc == "E05D04fc" ~ "50-TMP",
fc == "E06D04fc" ~ "200-TMP",
TRUE ~ NA_character_))
Remove “NA” values
# Lib15
BCmut15_NP_414590_logfc <- na.omit(BCmut15_NP_414590_logfc)
# Lib16
BCmut16_NP_414590_logfc <- na.omit(BCmut16_NP_414590_logfc)
Combine the Library dataframes and remove BC column
# Combine the data frames
BCmut_15_16_NP_414590_logfc <- rbind(BCmut15_NP_414590_logfc %>% select(-BC),
BCmut16_NP_414590_logfc %>% select(-BC))
Generate the boxplot with both libraries
# Define the plotting order
BCmut_15_16_NP_414590_logfc_order <- c("0-TMP", "0.058-TMP", "0.5-TMP", "1.0-TMP", "10-TMP", "50-TMP", "200-TMP")
# Generate the boxplot
BCmut_15_16_NP_414590_logfc_plot <- ggplot(BCmut_15_16_NP_414590_logfc, aes(x = TMP, y = val, fill = Lib)) +
geom_boxplot(position = "dodge") +
scale_x_discrete(limits = BCmut_15_16_NP_414590_logfc_order, drop = FALSE) +
xlab("") +
ylab(expression(atop("Wild Type "~italic("E. coli")~" Fitness", "Log2-Fold Change"))) +
#ggtitle("Wild-Type (WT) E. coli Homolog Fitness") +
theme_minimal() +
theme(axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
#plot.title = element_text(size = 16, hjust = 0, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
panel.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 12),
legend.position = c(1, 1),
legend.justification = c(1, 1),
legend.box.background = element_rect(colour = "black")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("Lib15" = "#0072B2", "Lib16" = "#E69F00")) +
scale_y_continuous(expand = c(0, 0), limits = c(-6, 3))
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2 3.5.0.
Please use the `legend.position.inside` argument of `theme()` instead.
BCmut_15_16_NP_414590_logfc_plot
The following section summarizes the log2-fold change in CONTROL barcodes between A05 vs. A06 sampling conditions.
Load in the controls and their associated barcodes:
#load controls
controls <- read.csv("Count/control_BCs.csv", head=FALSE)
colnames(controls) <- c("ctrlname","colnum","BClong","BC","BClen")
The following code corrects an error in the control BCs before mapping to the Count files:
controls <- controls %>%
mutate(BCstub=str_sub(BClong,1,-4)) %>%
dplyr::rename(BCold=BC) %>%
dplyr::rename(BC=BCstub)
Summarize the number of control barcodes from the full pruned
BCs dataset:
# Library 15
# Map the Control BCs to the BCs.prune dataset for all samples:
BCcontrols_15 <- inner_join(BCs15.prune,controls,by="BC")
# Re-arrange the resulting dataframe to place the descriptive columns at the beginning:
BCcontrols_15 <- BCcontrols_15[, c(1, 11, 15, 2:10)]
# Count the number of unique control barcodes mapped to the full pruned BCs dataset:
BCcontrols_15.count <- inner_join(BCs15.prune,controls,by="BC") %>%
nrow()
BCcontrols_15.count
[1] 21
# Library 16
# Map the Control BCs to the BCs.prune dataset for all samples:
BCcontrols_16 <- inner_join(BCs16.prune,controls,by="BC")
# Re-arrange the resulting dataframe to place the descriptive columns at the beginning:
BCcontrols_16 <- BCcontrols_16[, c(1, 11, 15, 2:10)]
# Count the number of unique control barcodes mapped to the full pruned BCs dataset:
BCcontrols_16.count <- inner_join(BCs16.prune,controls,by="BC") %>%
nrow()
BCcontrols_16.count
[1] 21
Lib15: Scale the control BC counts for each sampling condition (D05n, D06n, etc) based on the scaling values provided:
# Lib15
# Make new columns for the normalized counts in the BCs object:
control.oldnames15 <- colnames(BCcontrols_15)
control.newnamesfrac15 = c('D01n', 'D03n',
'D05n', 'D06n', 'D07n', 'D08n', 'D09n', 'D10n', 'D11n')
# Add the new columns to the BCs object based on the number of original columns for sampling conditions:
control.numcol_15 <- ncol(BCcontrols_15)
control.D01index = which(colnames(BCcontrols_15)=="D01")
# For each sample, re-scale the number of reads by relative sequencing depth of the sample using the scaling values in the norm_vals object:
for (i in 1:9){
BCcontrols_15[,control.numcol_15+i] <- BCcontrols_15[,control.D01index+i-1]*as.numeric(norm_vals_15$scaling[i])
}
# Add the new column names to the BCs object:
colnames(BCcontrols_15) <- c(control.oldnames15,control.newnamesfrac15)
Lib16: Scale the control BC counts for each sampling condition (D012n, E01n, etc) based on the scaling values provided:
# Lib16
# Make new columns for the normalized counts in the BCs object:
control.oldnames16 <- colnames(BCcontrols_16)
control.newnamesfrac16 = c('D02n', 'D04n',
'D12n', 'E01n', 'E02n', 'E03n', 'E04n', 'E05n', 'E06n')
# Add the new columns to the BCs object based on the number of original columns for sampling conditions:
control.numcol_16 <- ncol(BCcontrols_16)
control.D02index = which(colnames(BCcontrols_16)=="D02")
# For each sample, re-scale the number of reads by relative sequencing depth of the sample using the scaling values in the norm_vals object:
for (i in 1:9){
BCcontrols_16[,control.numcol_16+i] <- BCcontrols_16[,control.D02index+i-1]*as.numeric(norm_vals_16$scaling[i])
}
# Add the new column names to the BCs object:
colnames(BCcontrols_16) <- c(control.oldnames16,control.newnamesfrac16)
Standard Approach Determine the log2-fold changes for each control BC relative to the M9-Supplementation condition:
# Lib 15
BCcontrols_15 <- BCcontrols_15 %>%
mutate(D03D01fc=log2(D03n+pseudocount)-log2(D01n+pseudocount),
D05D03fc=log2(D05n+pseudocount)-log2(D03n+pseudocount),
D06D03fc=log2(D06n+pseudocount)-log2(D03n+pseudocount),
D07D03fc=log2(D07n+pseudocount)-log2(D03n+pseudocount),
D08D03fc=log2(D08n+pseudocount)-log2(D03n+pseudocount),
D09D03fc=log2(D09n+pseudocount)-log2(D03n+pseudocount),
D10D03fc=log2(D10n+pseudocount)-log2(D03n+pseudocount),
D11D03fc=log2(D11n+pseudocount)-log2(D03n+pseudocount))
# Lib 16
BCcontrols_16 <- BCcontrols_16 %>%
mutate(D04D02fc=log2(D04n+pseudocount)-log2(D02n+pseudocount),
D12D04fc=log2(D12n+pseudocount)-log2(D04n+pseudocount),
E01D04fc=log2(E01n+pseudocount)-log2(D04n+pseudocount),
E02D04fc=log2(E02n+pseudocount)-log2(D04n+pseudocount),
E03D04fc=log2(E03n+pseudocount)-log2(D04n+pseudocount),
E04D04fc=log2(E04n+pseudocount)-log2(D04n+pseudocount),
E05D04fc=log2(E05n+pseudocount)-log2(D04n+pseudocount),
E06D04fc=log2(E06n+pseudocount)-log2(D04n+pseudocount))
### Lib15
# Create summary table of WT barcode fitness values:
BCcontrols_15_WT <- BCcontrols_15 %>%
filter(ctrlname == "WT") %>%
select(BC, ctrlname, D05D03fc, D06D03fc, D07D03fc, D08D03fc, D09D03fc, D10D03fc, D11D03fc)
print(BCcontrols_15_WT)
### Lib16
# Create summary table of WT barcode fitness values:
BCcontrols_16_WT <- BCcontrols_16 %>%
filter(ctrlname == "WT") %>%
select(BC, ctrlname, D12D04fc, E01D04fc, E02D04fc, E03D04fc, E04D04fc, E05D04fc, E06D04fc)
print(BCcontrols_16_WT)
# Merge Lib15 and Lib16 Controls by BC
BCcontrol_15_16_WT <- inner_join(BCcontrols_15, BCcontrols_16, by = "BC")
# Remove Neg. Ctrls (D27N, mCherry)
BCcontrol_15_16_WT <- BCcontrol_15_16_WT %>%
filter(!(ctrlname.x %in% c("D27N", "mCherry")))
# Keep only select columns
BCcontrol_15_16_WT <- BCcontrol_15_16_WT %>%
select(ctrlname.x, D05D03fc, D06D03fc, D07D03fc, D08D03fc, D09D03fc, D10D03fc, D11D03fc,
D12D04fc, E01D04fc, E02D04fc, E03D04fc, E04D04fc, E05D04fc, E06D04fc)
# Save a copy for Supplemental
write.csv(BCcontrol_15_16_WT, file = "Count/count_files_formatted/BCcontrols_15_16_WT.csv", row.names = FALSE)
#Lib15
BCcontrols_15_median <- BCcontrols_15 %>%
group_by(ctrlname) %>%
summarise(fitD05D03 = median(D05D03fc, na.rm = TRUE),
fitD06D03 = median(D06D03fc, na.rm = TRUE),
fitD07D03 = median(D07D03fc, na.rm = TRUE),
fitD08D03 = median(D08D03fc, na.rm = TRUE),
fitD09D03 = median(D09D03fc, na.rm = TRUE),
fitD10D03 = median(D10D03fc, na.rm = TRUE),
fitD11D03 = median(D11D03fc, na.rm = TRUE))
#Lib16
BCcontrols_16_median <- BCcontrols_16 %>%
group_by(ctrlname) %>%
summarise(fitD12D04 = median(D12D04fc, na.rm = TRUE),
fitE01D04 = median(E01D04fc, na.rm = TRUE),
fitE02D04 = median(E02D04fc, na.rm = TRUE),
fitE03D04 = median(E03D04fc, na.rm = TRUE),
fitE04D04 = median(E04D04fc, na.rm = TRUE),
fitE05D04 = median(E05D04fc, na.rm = TRUE),
fitE06D04 = median(E06D04fc, na.rm = TRUE))
# Merged shared values from both libraries
BCcontrols_15_16_shared_median <- inner_join(BCcontrols_15_median, BCcontrols_16_median, by = "ctrlname")
# Keep a copy of only shared WT (Pos Ctrl) median fitness
BCcontrols_15_16_shared_median_WT <- BCcontrols_15_16_shared_median %>%
filter(!(ctrlname %in% c("D27N", "mCherry")))
# Keep a copy of shared D27N and mCherry (Neg Ctrls) median fitness
BCcontrols_15_16_shared_median_Neg <- BCcontrols_15_16_shared_median %>%
filter(!(ctrlname %in% "WT"))
Plot the WT control fitness plot across the TMP gradient using Day 1 variables only
# Reshape the data from wide to long format
BCcontrol_15_16_WT_long <- BCcontrol_15_16_WT %>%
select(ctrlname.x, ends_with("fc")) %>%
pivot_longer(cols = -ctrlname.x,
names_to = "condition",
values_to = "fitness") %>%
mutate(
Lib = case_when(
grepl("D03fc", condition) ~ "Codon1",
grepl("D04fc", condition) ~ "Codon2"
),
concentration = case_when(
condition %in% c("D05D03fc", "D12D04fc") ~ "0",
condition %in% c("D06D03fc", "E01D04fc") ~ "0.058",
condition %in% c("D07D03fc", "E02D04fc") ~ "0.5",
condition %in% c("D08D03fc", "E03D04fc") ~ "1.0",
condition %in% c("D09D03fc", "E04D04fc") ~ "10",
condition %in% c("D10D03fc", "E05D04fc") ~ "50",
condition %in% c("D11D03fc", "E06D04fc") ~ "200"
)
) %>%
filter(!is.na(Lib))
# Create the plot
BCcontrol_15_16_WT_plot <- ggplot(BCcontrol_15_16_WT_long, aes(x = concentration, y = fitness, fill = Lib)) +
#geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5) +
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.8) +
xlab("Trimethoprim (ug/mL)") +
ylab(expression(paste("Median Fitness (LogFC)"))) +
scale_x_discrete(limits = c("0", "0.058", "0.5", "1.0", "10", "50", "200")) +
theme_minimal() +
theme(
axis.line = element_line(colour = 'black', size = 1.0),
axis.ticks = element_line(colour = "black", size = 1.0),
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
axis.text.x = element_text(size = 14),
axis.text.y = element_text(size = 14),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 12),
legend.position = "bottom"
) +
scale_y_continuous(expand = c(0, 0), limits = c(-15, 1)) +
scale_fill_manual(values = c("Codon1" = "#0072B2", "Codon2" = "#E69F00"))
# Display the plot
print(BCcontrol_15_16_WT_plot)
Lib15 Controls Plot the control barcodes as the log2-fold change comparison between D05 vs. D03:
# Plot the Fraction of Total Barcodes Mapped
ctrl_comp_plot_L15 <- ggplot(data = BCcontrols_15, aes(x = ctrlname, y = D05D03fc, fill = ctrlname)) +
geom_boxplot(alpha = 0.8) +
geom_point() +
xlab("") +
ylab("Control Fitness at Complementation \n(Log2-Fold Change)") +
ggtitle("Library 15 Control Fitness") +
theme_minimal() +
theme(axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
plot.title = element_text(size = 14, hjust = 0.5, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
panel.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "none") +
scale_fill_manual(values = c("D27N" = "#00800080", "mCherry" = "#FF000080", "WT" = "#0000FF80")) +
guides(linetype = "none") +
scale_y_continuous(expand = c(0, 0), limits = c(-3, 0))
ctrl_comp_plot_L15
Lib16 Controls Plot the control barcodes as the log2-fold change comparison between D12 vs. D04:
# Plot the Fraction of Total Barcodes Mapped
ctrl_comp_plot_L16 <- ggplot(data = BCcontrols_16, aes(x = ctrlname, y = D12D04fc, fill = ctrlname)) +
geom_boxplot(alpha = 0.8) +
geom_point() +
xlab("") +
ylab("") +
ggtitle("Library 16 Control Fitness") +
theme_minimal() +
theme(axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
plot.title = element_text(size = 14, hjust = 0.5, face = "bold"),
axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 12),
panel.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = "none") +
scale_fill_manual(values = c("D27N" = "#00800080", "mCherry" = "#FF000080", "WT" = "#0000FF80")) +
guides(linetype = "none") +
scale_y_continuous(expand = c(0, 0), limits = c(-3, 0))
ctrl_comp_plot_L16
Subset the logFC data columns
# Lib15
BCcontrols_15_logfc <- BCcontrols_15 %>%
select(BC, ctrlname, D03D01fc, D05D03fc, D06D03fc, D07D03fc, D08D03fc, D09D03fc, D10D03fc, D11D03fc) %>%
pivot_longer(cols = -c(BC, ctrlname),
names_to = "fc",
names_pattern = "^(.+)$",
values_to = "val") %>%
select(BC, Ctrl = ctrlname, fc, val)
# Lib16
BCcontrols_16_logfc <- BCcontrols_16 %>%
select(BC, ctrlname, D04D02fc, D12D04fc, E01D04fc, E02D04fc, E03D04fc, E04D04fc, E05D04fc, E06D04fc) %>%
pivot_longer(cols = -c(BC, ctrlname),
names_to = "fc",
names_pattern = "^(.+)$",
values_to = "val") %>%
select(BC, Ctrl = ctrlname, fc, val)
Remove “NA” values
# Lib15
BCcontrols_15_logfc <- na.omit(BCcontrols_15_logfc)
# Lib16
BCcontrols_16_logfc <- na.omit(BCcontrols_16_logfc)
Place sample IDs in desired order for plotting
# Lib15
Lib15_CTRL_level_order <- c("D03D01fc", "D05D03fc", "D06D03fc", "D07D03fc", "D08D03fc", "D09D03fc", "D10D03fc", "D11D03fc")
# Lib16
Lib16_CTRL_level_order <- c("D04D02fc", "D12D04fc", "E01D04fc", "E02D04fc", "E03D04fc", "E04D04fc", "E05D04fc", "E06D04fc")
Lib15
Lib15_TMP_level_order <- c("D03D01fc", "D05D03fc", "D06D03fc", "D07D03fc", "D08D03fc", "D09D03fc", "D10D03fc", "D11D03fc")
ctrl_tmp_plot_L15 <- ggplot(BCcontrols_15_logfc, aes(x = factor(fc, levels = Lib15_TMP_level_order), y = val, fill = Ctrl)) +
geom_boxplot(color = "black", width = 0.5, alpha = 0.8) +
geom_point() +
ggtitle("Control Fitness by Trimethoprim Treatment") +
ylab("Codon 1 Control Fitness\n(Log2-Fold Change)") +
xlab("") +
theme_minimal() +
theme(axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
panel.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = "none") +
guides(linetype = "none") +
scale_x_discrete(labels = c("LB", "0-tmp", "0.058-tmp", "0.5-tmp", "1.0-tmp", "10-tmp", "50-tmp", "200-tmp")) +
scale_fill_manual(values = c("D27N" = "#00800080", "mCherry" = "#FF000080", "WT" = "#0000FF80"))
ctrl_tmp_plot_L15
Lib16
Lib16_TMP_level_order <- c("D04D02fc", "D12D04fc", "E01D04fc", "E02D04fc", "E03D04fc", "E04D04fc", "E05D04fc", "E06D04fc")
ctrl_tmp_plot_L16 <- ggplot(BCcontrols_16_logfc, aes(x = factor(fc, levels = Lib16_TMP_level_order), y = val, fill = Ctrl)) +
geom_boxplot(color = "black", width = 0.5, alpha = 0.8) +
geom_point() +
#ggtitle("Library 16 Control Fitness") +
ylab("Codon 2 Control Fitness\n(Log2-Fold Change)") +
xlab("") +
theme_minimal() +
theme(axis.line = element_line(colour = 'black', size = 0.5),
axis.ticks = element_line(colour = "black", size = 0.5),
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1),
axis.text.y = element_text(size = 12),
panel.background = element_blank(),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = "bottom") +
guides(linetype = "none") +
scale_x_discrete(labels = c("LB", "0-tmp", "0.058-tmp", "0.5-tmp", "1.0-tmp", "10-tmp", "50-tmp", "200-tmp")) +
scale_fill_manual(values = c("D27N" = "#00800080", "mCherry" = "#FF000080", "WT" = "#0000FF80"))
ctrl_tmp_plot_L16
Save the formatted count files to import for downstream analyses
# BCs15_map (62.6 MB)
write.csv(BCs15_map, "Count/count_files_formatted/BCs15_map.csv", row.names = FALSE)
# BCs16_map (80.5 MB)
write.csv(BCs16_map, "Count/count_files_formatted/BCs16_map.csv", row.names = FALSE)
###------------------------------------
# mutIDinfo15 (17 MB)
# Select columns of interest to remove duplicates
mutIDinfo15.filtered <- mutIDinfo15 %>%
select(mutID,fitD03D01,fitD05D03,fitD06D03,fitD07D03,fitD08D03,fitD09D03,fitD10D03,fitD11D03,
numprunedBCs,IDalign,numBCs,mutations,seq,pct_ident)
write.csv(mutIDinfo15.filtered, "Count/count_files_formatted/mutIDinfo15.csv", row.names = FALSE)
# mutIDinfo16 (14.7 MB)
# Select columns of interest to remove duplicates
mutIDinfo16.filtered <- mutIDinfo16 %>%
select(mutID,fitD04D02,fitD12D04,fitE01D04,fitE02D04,fitE03D04,fitE04D04,fitE05D04,fitE06D04,
numprunedBCs,IDalign,numBCs,mutations,seq,pct_ident)
write.csv(mutIDinfo16.filtered, "Count/count_files_formatted/mutIDinfo16.csv", row.names = FALSE)
###------------------------------------
# BCcontrols_15_16_shared_median_WT (435 Bytes)
write.csv(BCcontrols_15_16_shared_median_WT, "Count/count_files_formatted/BCcontrols_15_16_shared_median_WT.csv", row.names = FALSE)
# BCcontrols_15_16_shared_median_Neg (697 Bytes)
write.csv(BCcontrols_15_16_shared_median_Neg, "Count/count_files_formatted/BCcontrols_15_16_shared_median_Neg.csv", row.names = FALSE)
###------------------------------------
# BCcontrols_15_median
write.csv(BCcontrols_15_median, "Count/count_files_formatted/BCcontrols_15_median.csv", row.names = FALSE)
The session information is provided for full reproducibility.
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────
setting value
version R version 4.3.2 (2023-10-31)
os macOS 15.5
system aarch64, darwin20
ui RStudio
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Los_Angeles
date 2025-08-14
rstudio 2024.09.0+375 Cranberry Hibiscus (desktop)
pandoc 3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
quarto 1.6.42 @ /usr/local/bin/quarto
─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
ade4 1.7-23 2025-02-14 [1] CRAN (R 4.3.3)
ape * 5.8-1 2024-12-16 [1] CRAN (R 4.3.3)
aplot 0.2.5 2025-02-27 [1] CRAN (R 4.3.3)
bio3d * 2.4-5 2024-10-29 [1] CRAN (R 4.3.3)
BiocGenerics * 0.46.0 2023-06-04 [1] Bioconductor
Biostrings * 2.68.1 2023-05-21 [1] Bioconductor
bitops 1.0-9 2024-10-03 [1] CRAN (R 4.3.3)
bslib 0.9.0 2025-01-30 [1] CRAN (R 4.3.3)
cachem 1.1.0 2024-05-16 [1] CRAN (R 4.3.3)
castor * 1.8.3 2024-11-17 [1] CRAN (R 4.3.3)
cli 3.6.4 2025-02-13 [1] CRAN (R 4.3.3)
codetools 0.2-20 2024-03-31 [1] CRAN (R 4.3.1)
colorspace 2.1-1 2024-07-26 [1] CRAN (R 4.3.3)
cowplot * 1.1.3 2024-01-22 [1] CRAN (R 4.3.1)
crayon 1.5.3 2024-06-20 [1] CRAN (R 4.3.3)
devtools * 2.4.5 2022-10-11 [1] CRAN (R 4.3.0)
digest 0.6.37 2024-08-19 [1] CRAN (R 4.3.3)
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0)
evaluate 1.0.3 2025-01-10 [1] CRAN (R 4.3.3)
farver 2.1.2 2024-05-13 [1] CRAN (R 4.3.3)
fastmap 1.2.0 2024-05-15 [1] CRAN (R 4.3.3)
foreach 1.5.2 2022-02-02 [1] CRAN (R 4.3.0)
fs 1.6.5 2024-10-30 [1] CRAN (R 4.3.3)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
GenomeInfoDb * 1.36.4 2023-10-08 [1] Bioconductor
GenomeInfoDbData 1.2.10 2023-09-13 [1] Bioconductor
ggExtra * 0.10.1 2023-08-21 [1] CRAN (R 4.3.0)
ggfun 0.1.8 2024-12-03 [1] CRAN (R 4.3.3)
ggnewscale * 0.5.1 2025-02-24 [1] CRAN (R 4.3.3)
ggplot2 * 3.5.1 2024-04-23 [1] CRAN (R 4.3.1)
ggplotify 0.1.2 2023-08-09 [1] CRAN (R 4.3.0)
ggridges * 0.5.6 2024-01-23 [1] CRAN (R 4.3.1)
ggtree * 3.8.2 2023-07-30 [1] Bioconductor
ggtreeExtra * 1.10.0 2023-04-25 [1] Bioconductor
glmnet * 4.1-8 2023-08-22 [1] CRAN (R 4.3.0)
glue 1.8.0 2024-09-30 [1] CRAN (R 4.3.3)
gridExtra * 2.3 2017-09-09 [1] CRAN (R 4.3.0)
gridGraphics 0.5-1 2020-12-13 [1] CRAN (R 4.3.0)
gtable 0.3.6 2024-10-25 [1] CRAN (R 4.3.3)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.3.1)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.3.1)
httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.3.1)
igraph * 2.1.4 2025-01-23 [1] CRAN (R 4.3.3)
IRanges * 2.34.1 2023-07-02 [1] Bioconductor
iterators 1.0.14 2022-02-05 [1] CRAN (R 4.3.0)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.0)
jsonlite 1.9.1 2025-03-03 [1] CRAN (R 4.3.3)
knitr * 1.50 2025-03-16 [1] CRAN (R 4.3.3)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.0)
later 1.4.1 2024-11-27 [1] CRAN (R 4.3.3)
lattice 0.22-6 2024-03-20 [1] CRAN (R 4.3.1)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.3.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.1)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
MASS 7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.1)
Matrix * 1.6-5 2024-01-11 [1] CRAN (R 4.3.1)
matrixStats * 1.5.0 2025-01-07 [1] CRAN (R 4.3.3)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
mime 0.13 2025-03-17 [1] CRAN (R 4.3.3)
miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.0)
munsell 0.5.1 2024-04-01 [1] CRAN (R 4.3.1)
naturalsort 0.1.3 2016-08-30 [1] CRAN (R 4.3.0)
nlme 3.1-167 2025-01-27 [1] CRAN (R 4.3.3)
patchwork * 1.3.0 2024-09-16 [1] CRAN (R 4.3.3)
pheatmap * 1.0.12 2019-01-04 [1] CRAN (R 4.3.0)
pillar 1.10.1 2025-01-07 [1] CRAN (R 4.3.3)
pkgbuild 1.4.6 2025-01-16 [1] CRAN (R 4.3.3)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
pkgload 1.4.0 2024-06-28 [1] CRAN (R 4.3.3)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.3.1)
png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0)
profvis 0.4.0 2024-09-20 [1] CRAN (R 4.3.3)
promises 1.3.2 2024-11-28 [1] CRAN (R 4.3.3)
pscl * 1.5.9 2024-01-31 [1] CRAN (R 4.3.1)
purrr * 1.0.4 2025-02-05 [1] CRAN (R 4.3.3)
R6 2.6.1 2025-02-15 [1] CRAN (R 4.3.3)
ragg 1.3.3 2024-09-11 [1] CRAN (R 4.3.3)
RColorBrewer * 1.1-3 2022-04-03 [1] CRAN (R 4.3.0)
Rcpp * 1.0.14 2025-01-12 [1] CRAN (R 4.3.3)
RCurl 1.98-1.17 2025-03-22 [1] CRAN (R 4.3.3)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.3.1)
reshape * 0.8.9 2022-04-12 [1] CRAN (R 4.3.0)
reshape2 * 1.4.4 2020-04-09 [1] CRAN (R 4.3.0)
reticulate * 1.41.0.1 2025-03-09 [1] CRAN (R 4.3.3)
rlang 1.1.5 2025-01-17 [1] CRAN (R 4.3.3)
rmarkdown 2.29 2024-11-04 [1] CRAN (R 4.3.3)
ROCR * 1.0-11 2020-05-02 [1] CRAN (R 4.3.0)
RSpectra 0.16-2 2024-07-18 [1] CRAN (R 4.3.3)
rstudioapi 0.17.1 2024-10-22 [1] CRAN (R 4.3.3)
S4Vectors * 0.38.2 2023-09-24 [1] Bioconductor
sass 0.4.9 2024-03-15 [1] CRAN (R 4.3.1)
scales * 1.3.0 2023-11-28 [1] CRAN (R 4.3.1)
seqinr * 4.2-36 2023-12-08 [1] CRAN (R 4.3.1)
sessioninfo 1.2.3 2025-02-05 [1] CRAN (R 4.3.3)
shape 1.4.6.1 2024-02-23 [1] CRAN (R 4.3.1)
shiny 1.10.0 2024-12-14 [1] CRAN (R 4.3.3)
stringi * 1.8.4 2024-05-06 [1] CRAN (R 4.3.3)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.3.1)
survival 3.8-3 2024-12-17 [1] CRAN (R 4.3.3)
systemfonts 1.1.0 2024-05-15 [1] CRAN (R 4.3.3)
textshaping 1.0.0 2025-01-20 [1] CRAN (R 4.3.3)
tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.3.1)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.1)
tidytree * 0.4.6 2023-12-12 [1] CRAN (R 4.3.1)
treeio 1.24.3 2023-07-30 [1] Bioconductor
urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.0)
usethis * 3.1.0 2024-11-26 [1] CRAN (R 4.3.3)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.1)
viridis * 0.6.5 2024-01-29 [1] CRAN (R 4.3.1)
viridisLite * 0.4.2 2023-05-02 [1] CRAN (R 4.3.0)
withr 3.0.2 2024-10-28 [1] CRAN (R 4.3.3)
xfun 0.51 2025-02-19 [1] CRAN (R 4.3.3)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.0)
XVector * 0.40.0 2023-05-08 [1] Bioconductor
yaml 2.3.10 2024-07-26 [1] CRAN (R 4.3.3)
yulab.utils 0.2.0 2025-01-29 [1] CRAN (R 4.3.3)
zlibbioc 1.46.0 2023-05-08 [1] Bioconductor
[1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
* ── Packages attached to the search path.
──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────