A quick list of the available groups in the datasets.
plot_list_counts <- list()
for (dname in names(meta_list)) {
meta <- meta_list[[dname]]
gcol <- group_cols[[dname]]
# For Malabirade_2018, show panels for each of the new grouping columns
if (dname == "Malabirade_2018" && !is.null(meta)) {
mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
for (mg in mala_groups) {
if (mg %in% names(meta)) {
p <- ggplot(meta, aes(x = .data[[mg]])) +
geom_bar(fill = "#4C78A8") +
labs(x = NULL, y = "Count", title = paste("Malabirade_2018:", mg)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
plot_list_counts[[paste0(dname, "_", mg)]] <- p
}
}
next
}
if (!is.null(meta) && gcol %in% names(meta)) {
p <- ggplot(meta, aes(x = .data[[gcol]])) +
geom_bar(fill = "#4C78A8") +
labs(x = NULL, y = "Count", title = dname) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
plot_list_counts[[dname]] <- p
}
}
ggarrange(plotlist = plot_list_counts, ncol = 3, nrow = 3)On Malabirade_2018, the SPI-1ind-LB was only sequenced in HighOD condition thus the plots lack LowOD samples.
We obtained read lengths directly from the per-sample fastp QC JSON files. The summary statistics is after filtering (type == “after_filtering”), where the length column corresponds to the mean read length reported by fastp.
plot_list_lengths <- list()
for (dname in names(meta_list)) {
meta <- meta_list[[dname]]
gcol <- group_cols[[dname]]
# For Malabirade_2018, show separate panels per new grouping column
if (dname == "Malabirade_2018" && !is.null(meta)) {
mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
for (mg in mala_groups) {
if (!mg %in% names(meta)) next
lengths_df <- fastp_lengths(meta, mg)
if (nrow(lengths_df) == 0) next
if ("type" %in% names(lengths_df) && any(lengths_df$type == "distribution")) {
p <- lengths_df %>%
filter(type == "distribution") %>%
ggplot(aes(x = length, y = count, color = condition)) +
geom_line(alpha = 0.7) +
scale_y_continuous(labels = scales::comma) +
labs(x = "Length (bp)", y = "Count", title = paste("Malabirade_2018:", mg), color = mg) +
theme(plot.title = element_text(size = 10))
} else {
len_summ <- lengths_df %>%
filter(type == "after_filtering") %>%
group_by(sample_id, condition) %>%
summarise(mean_length = first(length), .groups = "drop")
p <- ggplot(len_summ, aes(x = condition, y = mean_length)) +
geom_boxplot(alpha = 0.7, fill = "grey90") +
geom_jitter(width = 0.2, alpha = 0.6, color = "#2ca02c") +
labs(x = NULL, y = "Mean Length (bp)", title = paste("Malabirade_2018:", mg)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
}
plot_list_lengths[[paste0(dname, "_", mg)]] <- p
}
next
}
if (!is.null(meta)) {
lengths_df <- fastp_lengths(meta, gcol)
if (nrow(lengths_df) > 0) {
if ("type" %in% names(lengths_df) && any(lengths_df$type == "distribution")) {
# Distribution plot
p <- lengths_df %>%
filter(type == "distribution") %>%
ggplot(aes(x = length, y = count, color = condition)) +
geom_line(alpha = 0.7) +
scale_y_continuous(labels = scales::comma) +
labs(x = "Length (bp)", y = "Count", title = dname, color = "Group") +
theme(plot.title = element_text(size = 10))
} else {
# Mean length plot
len_summ <- lengths_df %>%
filter(type == "after_filtering") %>%
group_by(sample_id, condition) %>%
summarise(mean_length = first(length), .groups = "drop")
p <- ggplot(len_summ, aes(x = condition, y = mean_length)) +
geom_boxplot(alpha = 0.7, fill = "grey90") +
geom_jitter(width = 0.2, alpha = 0.6, color = "#2ca02c") +
labs(x = NULL, y = "Mean Length (bp)", title = dname) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
}
plot_list_lengths[[dname]] <- p
}
}
}
ggarrange(plotlist = plot_list_lengths, ncol = 3, nrow = 2)## $`1`
##
## $`2`
##
## attr(,"class")
## [1] "list" "ggarrange"
We filtered sRNAs by requiring RPKM ≥ 1 in at least 60% of samples (frac_gt1 >= 0.6) within any biological group before including them in downstream analyses.
filter_summary_list <- list()
pass_ids_list <- list()
for (dname in names(meta_list)) {
meta <- meta_list[[dname]]
gcol <- group_cols[[dname]]
# For Malabirade_2018, produce summaries for each grouping column
if (dname == "Malabirade_2018" && !is.null(meta)) {
counts <- load_counts_long(meta)
mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
for (mg in mala_groups) {
if (!mg %in% names(meta)) next
res <- filter_srna(counts, meta, mg)
pass_ids_list[[paste0(dname, "_", mg)]] <- res$pass_ids
if (length(res$pass_ids) > 0) {
summ <- res$per_group %>%
group_by(group) %>%
summarise(n_srna_pass = n_distinct(srna_id), .groups = "drop") %>%
mutate(dataset = paste(dname, mg, sep = "_"))
filter_summary_list[[paste0(dname, "_", mg)]] <- summ
}
}
next
}
if (!is.null(meta)) {
counts <- load_counts_long(meta)
res <- filter_srna(counts, meta, gcol)
pass_ids_list[[dname]] <- res$pass_ids
if (length(res$pass_ids) > 0) {
summ <- res$per_group %>%
group_by(group) %>%
summarise(n_srna_pass = n_distinct(srna_id), .groups = "drop") %>%
mutate(dataset = dname)
filter_summary_list[[dname]] <- summ
}
}
}
all_filter_summary <- bind_rows(filter_summary_list)
ggplot(all_filter_summary, aes(x = group, y = n_srna_pass, fill = dataset)) +
geom_col() +
facet_wrap(~dataset, scales = "free_x", ncol = 3) +
labs(x = "Group", y = "Number of passing sRNAs", title = "sRNAs with stable expression") +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")Fan_2023 had to be dropped off from the analysis as the pipeline didnt detected any sRNAs The methodology mentioned in the paper states that ” mRNA was purified from total RNA using poly(T) oligonucleotide-attached magnetic beads” which would only retain RNA sequences with poly-A tail (eukaryotic/human mRNAs).
blen_data <- NULL
if ("Blenkiron_2016" %in% names(meta_list)) {
meta <- meta_list$Blenkiron_2016
counts <- load_counts_long(meta)
# Relaxed filter: keep sRNAs with mean RPKM > 0.6 in ANY library size
sRNA_keep <- counts %>%
left_join(meta %>% select(sample_id, library_size), by = "sample_id") %>%
filter(library_size %in% c("small", "medium", "large")) %>%
group_by(srna_id, library_size) %>%
summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
group_by(srna_id) %>%
filter(any(mean_expr > 0.6)) %>%
pull(srna_id)
# Median per library size (small / medium / large)
med <- counts %>%
filter(srna_id %in% sRNA_keep) %>%
left_join(meta %>% select(sample_id, library_size), by = "sample_id") %>%
filter(library_size %in% c("small", "medium", "large")) %>%
group_by(srna_id, library_size) %>%
summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = library_size, values_from = median_RPKM) %>%
mutate(across(where(is.numeric), ~ replace_na(., 0.01))) # avoid zeros for log scales
blen_data <- med
}
p_blen <- if (!is.null(blen_data) && nrow(blen_data) > 0) {
# Build pairwise comparisons: small–large, medium–large, small–medium
blen_pairs <- bind_rows(
blen_data %>%
transmute(x = small, y = large, comparison = "Small vs Large"),
blen_data %>%
transmute(x = medium, y = large, comparison = "Medium vs Large"),
blen_data %>%
transmute(x = small, y = medium, comparison = "Small vs Medium")
)
ggplot(blen_pairs, aes(x = x, y = y)) +
geom_point(alpha = 0.6, color = "#2ca02c") +
scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
labs(
title = "Blenkiron_2016: Pairwise library-size comparisons",
x = "Median RPKM (X library)",
y = "Median RPKM (Y library)"
) +
geom_abline(slope = 1, linetype = "dashed") +
facet_wrap(~ comparison)
} else {
ggplot() + labs(title = "Blenkiron_2016: No Data")
}
p_blensahr2_data <- NULL
if ("Sahr_2022_2" %in% names(meta_list)) {
meta <- meta_list$Sahr_2022_2
counts <- load_counts_long(meta)
sRNA_keep <- counts %>%
left_join(meta %>% select(sample_id, condition), by = "sample_id") %>%
group_by(srna_id, condition) %>%
summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
group_by(srna_id) %>%
filter(any(mean_expr > 0.6)) %>%
pull(srna_id)
med <- counts %>%
filter(srna_id %in% sRNA_keep) %>%
left_join(meta %>% select(sample_id, condition), by = "sample_id") %>%
group_by(srna_id, condition) %>%
summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = condition, values_from = median_RPKM) %>%
mutate(across(where(is.numeric), ~replace_na(., 0.01)))
sahr2_data <- med
}
p_sahr2 <- if (!is.null(sahr2_data) && nrow(sahr2_data) > 0) {
ggplot(sahr2_data, aes(x = RNAseq, y = MVseq)) +
geom_point(alpha = 0.6, color = "#d62728") +
scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
labs(title = "Sahr_2022_2: EV vs Total", x = "Total RNA (RPKM)", y = "EV RNA (RPKM)") +
geom_abline(slope = 1, linetype = "dashed")
} else { ggplot() + labs(title = "Sahr_2022_2: No Data") }
p_sahr2mala_data_list <- list()
if ("Malabirade_2018" %in% names(meta_list)) {
meta <- meta_list$Malabirade_2018
counts <- load_counts_long(meta)
meta_proc <- meta %>%
mutate(
type = case_when(
str_detect(condition, "OMV-RNAs") ~ "OMV",
str_detect(condition, "intracellularRNAs") ~ "Intracellular",
TRUE ~ NA_character_
),
base_condition = condition %>%
str_remove("_OMV-RNAs") %>%
str_remove("_intracellularRNAs") %>%
str_remove(" ") %>% # Remove stray spaces
str_trim()
) %>%
filter(!is.na(type))
conditions <- unique(meta_proc$base_condition)
for (cond in conditions) {
# Subset metadata for this condition
sub_meta <- meta_proc %>% filter(base_condition == cond)
# Mean > 0.6
sRNA_keep <- counts %>%
filter(sample_id %in% sub_meta$sample_id) %>%
left_join(sub_meta %>% select(sample_id, type), by = "sample_id") %>%
group_by(srna_id, type) %>%
summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
group_by(srna_id) %>%
filter(any(mean_expr > 0.6)) %>%
pull(srna_id)
if (length(sRNA_keep) > 0) {
med <- counts %>%
filter(sample_id %in% sub_meta$sample_id) %>%
filter(srna_id %in% sRNA_keep) %>%
left_join(sub_meta %>% select(sample_id, type), by = "sample_id") %>%
group_by(srna_id, type) %>%
summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = type, values_from = median_RPKM) %>%
mutate(across(where(is.numeric), ~replace_na(., 0.01))) %>%
mutate(condition = cond) %>%
left_join(srna_lengths, by = "srna_id")
mala_data_list[[cond]] <- med
}
}
}
if (length(mala_data_list) > 0) {
all_mala <- bind_rows(mala_data_list)
# Annotate with growth metadata for downstream plots
cond_annotations <- meta_proc %>%
distinct(base_condition, growth_medium, growth_phase, rna_sample_type)
all_mala <- all_mala %>%
left_join(cond_annotations, by = c("condition" = "base_condition"))
ggplot(all_mala, aes(x = Intracellular, y = OMV, color = seq_len)) +
geom_point(alpha = 0.6, size = 2) +
scale_color_gradient(low = "blue", high = "red", na.value = "grey70") +
scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
geom_abline(slope = 1, linetype = "dashed") +
facet_wrap(~condition, scales = "fixed") +
labs(title = "Malabirade_2018: OMV vs Intracellular by Condition",
x = "Intracellular (RPKM)", y = "OMV (RPKM)", color = "sRNA length (nt)") +
theme(strip.text = element_text(size = 8))
} else {
ggplot() + labs(title = "Malabirade_2018: No Data")
}# Check if the data frame exists from the previous chunk
if (exists("all_mala")) {
# Get unique conditions (facets)
conditions <- unique(all_mala$condition)
for (cond in conditions) {
# Filter for sRNAs enriched in OMV (OMV > Intracellular)
omv_enriched <- all_mala %>%
filter(condition == cond) %>%
filter(OMV > Intracellular) %>%
select(srna_id, seq_len, OMV, Intracellular) %>%
distinct() %>% # Deduplicate entries created by the metadata join
arrange(desc(OMV))
# Create a Markdown header for each condition
cat(paste("\n####", cond, "\n\n"))
# Print the table if data exists
if (nrow(omv_enriched) > 0) {
print(knitr::kable(omv_enriched, caption = paste("sRNAs with OMV > Intracellular in", cond)))
} else {
cat("No sRNAs found with higher OMV expression in this condition.\n")
}
# Add newlines for spacing between tables
cat("\n")
}
}| srna_id | seq_len | OMV | Intracellular |
|---|---|---|---|
| sRNA-Sente-37 | 46 | 35.45908 | 11.82353 |
| srna_id | seq_len | OMV | Intracellular |
|---|---|---|---|
| sRNA-Sente-37 | 46 | 139.1449 | 4.086566 |
| srna_id | seq_len | OMV | Intracellular |
|---|---|---|---|
| sRNA-Sente-36 | 108 | 8405.86870 | 438.5577361 |
| sRNA-Styph-1 | 296 | 6145.35312 | 10.5823070 |
| sRNA-Sente-46 | 266 | 3876.27595 | 26.6404897 |
| sRNA-Sente-52 | 1236 | 2627.36197 | 410.7585078 |
| sRNA-Sente-33 | 82 | 1965.82992 | 1135.2027459 |
| sRNA-Ecoli-61 | 106 | 1538.10064 | 38.0156916 |
| sRNA-Sflex-21 | 105 | 1511.47483 | 37.6797771 |
| sRNA-Sente-5 | 32 | 1384.16287 | 261.9766245 |
| sRNA-Sente-35 | 90 | 924.38397 | 124.7045432 |
| sRNA-Ecoli-46 | 93 | 788.35502 | 8.9652078 |
| sRNA-Sflex-15 | 92 | 780.72819 | 8.9504852 |
| sRNA-Sente-50 | 191 | 720.88289 | 309.8276334 |
| sRNA-Sflex-41 | 94 | 582.97100 | 79.9381348 |
| manual_IsrA | NA | 578.47832 | 0.5131556 |
| sRNA-Sente-41 | 146 | 564.70392 | 406.5214043 |
| sRNA-Ecoli-60 | 73 | 491.71249 | 141.3234982 |
| sRNA-Ecoli-13 | 84 | 474.64201 | 362.3001297 |
| sRNA-Sente-1 | 144 | 467.73287 | 122.0943470 |
| sRNA-Sente-44 | 144 | 467.73287 | 122.0943470 |
| sRNA-Ecoli-103 | 114 | 443.73451 | 136.1333928 |
| sRNA-Sflex-6 | 114 | 443.73451 | 136.1333928 |
| sRNA-Sente-51 | 284 | 402.64798 | 38.8113518 |
| sRNA-Ecoli-70 | 95 | 402.30762 | 63.8035681 |
| sRNA-Sflex-23 | 79 | 385.80073 | 147.6851742 |
| sRNA-Ecoli-64 | 79 | 357.78880 | 159.8833597 |
| sRNA-Ecoli-17 | 246 | 355.48712 | 29.4636840 |
| sRNA-Sflex-3 | 245 | 355.48712 | 29.4636840 |
| sRNA-Sente-49 | 85 | 349.02792 | 16.3039797 |
| sRNA-Sflex-46 | 109 | 263.61919 | 188.3603141 |
| sRNA-Ecoli-86 | 111 | 263.03041 | 190.3157689 |
| sRNA-Sente-55 | 486 | 254.93458 | 127.3269480 |
| sRNA-Ecoli-87 | 57 | 234.16588 | 63.3461872 |
| sRNA-Sflex-47 | 57 | 234.16588 | 63.3461872 |
| sRNA-Sente-28 | 113 | 200.04483 | 36.4781997 |
| sRNA-Sente-37 | 46 | 183.23240 | 4.0336417 |
| sRNA-Sente-13 | 162 | 180.74064 | 127.6364763 |
| sRNA-Ecoli-2 | 105 | 161.98028 | 44.5074169 |
| sRNA-Sflex-5 | 87 | 151.47334 | 20.7369153 |
| sRNA-Sente-25 | 211 | 134.01291 | 128.7029000 |
| sRNA-Ecoli-52 | 88 | 124.07863 | 18.8340761 |
| sRNA-Sflex-33 | 88 | 124.07863 | 18.4647805 |
| sRNA-Sente-23 | 119 | 108.63634 | 87.9016456 |
| sRNA-Sflex-13 | 77 | 102.32459 | 31.5357441 |
| sRNA-Sente-48 | 196 | 99.52849 | 80.8043643 |
| sRNA-Ecoli-49 | 184 | 93.69258 | 14.6060289 |
| srna_id | seq_len | OMV | Intracellular |
|---|---|---|---|
| sRNA-Ecoli-13 | 84 | 46.593704 | 36.861068 |
| sRNA-Sente-37 | 46 | 33.538715 | 1.304677 |
| sRNA-Styph-1 | 296 | 27.037588 | 9.544384 |
| sRNA-Sente-46 | 266 | 20.178142 | 9.568794 |
| sRNA-Ecoli-46 | 93 | 9.323033 | 8.858069 |
| sRNA-Sflex-15 | 92 | 9.272199 | 8.993306 |
| manual_IsrA | NA | 3.743399 | 0.000000 |
| srna_id | seq_len | OMV | Intracellular |
|---|---|---|---|
| sRNA-Ecoli-13 | 84 | 6266.6253 | 146.18883 |
| sRNA-Sente-37 | 46 | 282.3882 | 17.99624 |
| sRNA-Sente-48 | 196 | 105.1909 | 47.86915 |
# Save OMV-enriched tables to files
if (exists("all_mala")) {
output_dir <- path_from_root("explore/summary/output")
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
conditions <- unique(all_mala$condition)
for (cond in conditions) {
omv_enriched <- all_mala %>%
filter(condition == cond) %>%
filter(OMV > Intracellular) %>%
select(srna_id, seq_len, OMV, Intracellular) %>%
distinct() %>%
arrange(desc(OMV))
# Write CSV per condition
fn <- file.path(output_dir, paste0("omv_enriched_", cond, ".csv"))
readr::write_csv(omv_enriched, fn)
}
}# List sRNAs with OMV RPKM >= 10 per condition (export for downstream analysis)
if (exists("all_mala")) {
output_dir <- path_from_root("explore/summary/output")
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Use the same wide table created in the scatter plot chunk; rebuild if missing
if (!exists("mala_wide")) {
cond_ctrl <- "ControlPCN_HighOD"
cond_spi1 <- "SPI-1ind-LB_HighOD"
cond_spi2 <- "SPI-2ind_HighOD"
cond_ctrl_low <- "ControlPCN_LowOD"
cond_spi2_low <- "SPI-2ind_LowOD"
required_conditions <- c(cond_ctrl, cond_spi1, cond_spi2, cond_ctrl_low, cond_spi2_low)
mala_base <- all_mala %>%
filter(condition %in% required_conditions) %>%
select(srna_id, seq_len, condition, Intracellular, OMV)
mala_wide <- mala_base %>%
group_by(srna_id, seq_len, condition) %>%
summarise(
Intracellular = median(Intracellular, na.rm = TRUE),
OMV = median(OMV, na.rm = TRUE),
.groups = "drop"
) %>%
pivot_wider(
id_cols = c(srna_id, seq_len),
names_from = condition,
values_from = c(Intracellular, OMV),
names_glue = "{.value}_{condition}"
)
}
# Long table with both OMV and Intracellular per condition
mala_long <- mala_wide %>%
pivot_longer(
cols = -c(srna_id, seq_len),
names_to = c("type", "condition"),
names_pattern = "(Intracellular|OMV)_(.*)",
values_to = "RPKM"
)
omv_candidates <- mala_long %>%
filter(type == "OMV", RPKM >= 10) %>%
select(srna_id, seq_len, condition, OMV_RPKM = RPKM) %>%
left_join(
mala_long %>%
filter(type == "Intracellular") %>%
select(srna_id, seq_len, condition, Intracellular_RPKM = RPKM),
by = c("srna_id", "seq_len", "condition")
) %>%
arrange(condition, desc(OMV_RPKM))
# Save combined list
readr::write_csv(omv_candidates, file.path(output_dir, "omv_candidates_rpkm_ge10.csv"))
# Show a concise preview
head_preview <- omv_candidates %>%
group_by(condition) %>%
slice_max(order_by = OMV_RPKM, n = 10, with_ties = FALSE) %>%
ungroup()
knitr::kable(head_preview, caption = "Top OMV-packed sRNAs (RPKM >= 10) by condition")
}| srna_id | seq_len | condition | OMV_RPKM | Intracellular_RPKM |
|---|---|---|---|---|
| manual_10Sa | NA | ControlPCN_HighOD | 180.54065 | 6.543296e+05 |
| sRNA-Sflex-59 | 456 | ControlPCN_HighOD | 158.27740 | 5.254134e+05 |
| sRNA-Sente-36 | 108 | ControlPCN_HighOD | 137.29876 | 3.758984e+02 |
| sRNA-Paeru-10 | 209 | ControlPCN_HighOD | 91.48866 | 2.943083e+05 |
| sRNA-Apleu-10 | 176 | ControlPCN_HighOD | 87.36320 | 6.110659e+02 |
| sRNA-Sente-52 | 1236 | ControlPCN_HighOD | 62.79487 | 4.475461e+02 |
| sRNA-Ecoli-13 | 84 | ControlPCN_HighOD | 46.59370 | 3.686107e+01 |
| sRNA-Sente-29 | 109 | ControlPCN_HighOD | 33.74556 | 2.775994e+02 |
| manual_CsrC | NA | ControlPCN_HighOD | 33.56022 | 2.219529e+04 |
| sRNA-Sente-37 | 46 | ControlPCN_HighOD | 33.53872 | 1.304677e+00 |
| manual_10Sa | NA | ControlPCN_LowOD | 413.62867 | 4.132594e+05 |
| sRNA-Sflex-59 | 456 | ControlPCN_LowOD | 328.70732 | 3.286413e+05 |
| sRNA-Sflex-60 | 184 | ControlPCN_LowOD | 184.28206 | 4.696713e+04 |
| sRNA-Paeru-10 | 209 | ControlPCN_LowOD | 123.74163 | 1.261786e+05 |
| sRNA-Sente-29 | 109 | ControlPCN_LowOD | 97.31942 | 7.213708e+02 |
| manual_SsrS | NA | ControlPCN_LowOD | 93.35581 | 2.438831e+04 |
| sRNA-Sente-34 | 62 | ControlPCN_LowOD | 40.93169 | 3.841463e+03 |
| sRNA-Sente-37 | 46 | ControlPCN_LowOD | 35.45908 | 1.182353e+01 |
| sRNA-Sente-32 | 53 | ControlPCN_LowOD | 16.31676 | 7.743698e+02 |
| manual_CsrC | NA | ControlPCN_LowOD | 15.18939 | 1.064690e+04 |
| sRNA-Sente-36 | 108 | SPI-1ind-LB_HighOD | 8405.86870 | 4.385577e+02 |
| sRNA-Styph-1 | 296 | SPI-1ind-LB_HighOD | 6145.35312 | 1.058231e+01 |
| sRNA-Sente-46 | 266 | SPI-1ind-LB_HighOD | 3876.27595 | 2.664049e+01 |
| manual_10Sa | NA | SPI-1ind-LB_HighOD | 3519.38426 | 1.324867e+06 |
| sRNA-Sflex-59 | 456 | SPI-1ind-LB_HighOD | 3333.01371 | 1.065598e+06 |
| sRNA-Paeru-10 | 209 | SPI-1ind-LB_HighOD | 2939.72955 | 2.347986e+05 |
| sRNA-Sente-52 | 1236 | SPI-1ind-LB_HighOD | 2627.36197 | 4.107585e+02 |
| sRNA-Sente-33 | 82 | SPI-1ind-LB_HighOD | 1965.82992 | 1.135203e+03 |
| sRNA-Sflex-60 | 184 | SPI-1ind-LB_HighOD | 1887.11978 | 6.176864e+04 |
| sRNA-Ecoli-61 | 106 | SPI-1ind-LB_HighOD | 1538.10064 | 3.801569e+01 |
| manual_10Sa | NA | SPI-2ind_HighOD | 11648.83476 | 6.199890e+05 |
| sRNA-Sflex-59 | 456 | SPI-2ind_HighOD | 9320.62878 | 4.938366e+05 |
| sRNA-Ecoli-13 | 84 | SPI-2ind_HighOD | 6266.62528 | 1.461888e+02 |
| sRNA-Paeru-10 | 209 | SPI-2ind_HighOD | 5083.34974 | 2.084876e+05 |
| sRNA-Sflex-60 | 184 | SPI-2ind_HighOD | 978.25380 | 6.537325e+04 |
| manual_SsrS | NA | SPI-2ind_HighOD | 495.56124 | 3.702340e+04 |
| sRNA-Sente-37 | 46 | SPI-2ind_HighOD | 282.38818 | 1.799624e+01 |
| manual_CsrC | NA | SPI-2ind_HighOD | 273.16379 | 1.917958e+04 |
| sRNA-Sente-29 | 109 | SPI-2ind_HighOD | 176.22329 | 2.518915e+02 |
| sRNA-Sente-52 | 1236 | SPI-2ind_HighOD | 164.10322 | 2.584162e+02 |
| manual_10Sa | NA | SPI-2ind_LowOD | 3878.13087 | 3.681235e+05 |
| sRNA-Sflex-59 | 456 | SPI-2ind_LowOD | 3280.31683 | 3.317412e+05 |
| sRNA-Paeru-10 | 209 | SPI-2ind_LowOD | 1792.78847 | 1.072380e+05 |
| sRNA-Sflex-60 | 184 | SPI-2ind_LowOD | 1213.94850 | 7.409502e+04 |
| manual_SsrS | NA | SPI-2ind_LowOD | 611.16085 | 3.802551e+04 |
| sRNA-Sente-29 | 109 | SPI-2ind_LowOD | 540.78830 | 1.286075e+03 |
| sRNA-Sente-34 | 62 | SPI-2ind_LowOD | 168.39499 | 3.445253e+03 |
| sRNA-Sente-37 | 46 | SPI-2ind_LowOD | 139.14487 | 4.086566e+00 |
| manual_CsrC | NA | SPI-2ind_LowOD | 85.24838 | 6.005684e+03 |
| sRNA-Sente-52 | 1236 | SPI-2ind_LowOD | 67.12749 | 1.568392e+02 |
To establish the baseline profile of “normal” sRNA packaging, we examine both ControlPCN_LowOD (log phase) and ControlPCN_HighOD (stationary phase).
sRNA-Sente-37 (This sRNA is
enriched across all conditions)Does general stress increase RNA expression?
Yes.
Does general stress increase selective RNA packaging into
OMVs?
Yes.
We compare SPI-1ind-LB_HighOD against ControlPCN_HighOD.
Biological context:
SPI-1 induction in LB medium mimics the early intestinal infection stage
by activating Salmonella Pathogenicity Island 1.
Key observations: - A greater number of points fall left of the diagonal compared to the control, indicating more sRNAs enriched in OMVs during the invasion-like condition. - Overall expression of sRNAs increases in both intracellular and OMV fractions compared to the control. - These OMV-enriched sRNAs may represent candidates involved in the intestinal invasion phase.
Does sRNA expression increase during intestinal
invasion?
Yes.
Does selective sRNA packaging increase during invasion
compared to the control?
Yes.
Biological context:
SPI-2 medium mimics the intracellular macrophage environment by inducing
SPI-2 genes under acidic (pH 5.8), phosphate-limited PCN conditions.
Key observations: - Three sRNAs are located to the left of the diagonal, indicating OMV-specific enrichment relative to intracellular abundance. - These may represent candidates important for Salmonella survival or interaction with host (human) pathways during the macrophage phase.
if (exists("all_mala")) {
# Define condition names mapping to what is in all_mala$condition
cond_ctrl <- "ControlPCN_HighOD"
cond_spi1 <- "SPI-1ind-LB_HighOD"
cond_spi2 <- "SPI-2ind_HighOD"
cond_ctrl_low <- "ControlPCN_LowOD"
cond_spi2_low <- "SPI-2ind_LowOD"
output_dir <- path_from_root("explore/summary/output")
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
avail_conds <- unique(all_mala$condition)
required_conditions <- c(cond_ctrl, cond_spi1, cond_spi2, cond_ctrl_low, cond_spi2_low)
if (all(required_conditions %in% avail_conds)) {
# Prepare wide data
mala_wide <- all_mala %>%
filter(condition %in% required_conditions) %>%
select(srna_id, seq_len, condition, Intracellular, OMV) %>%
group_by(srna_id, seq_len, condition) %>%
summarise(Intracellular = median(Intracellular, na.rm = TRUE),
OMV = median(OMV, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(id_cols = c(srna_id, seq_len), names_from = condition,
values_from = c(Intracellular, OMV), names_glue = "{.value}_{condition}")
# Uniform axis limits
all_vals <- unlist(mala_wide[, grep("^(OMV|Intracellular)_", names(mala_wide))], use.names = FALSE)
axis_max <- 10^ceiling(log10(max(all_vals, na.rm = TRUE)))
# Ultra-compact scatter plot for 4x3 combined figure
make_scatter <- function(data, x_var, y_var) {
plot_data <- data %>% filter(!is.na(.data[[x_var]]) & !is.na(.data[[y_var]]))
n_pts <- nrow(plot_data)
ggplot(plot_data, aes(x = .data[[x_var]], y = .data[[y_var]])) +
geom_point(alpha = 0.55, color = "#2166AC", size = 0.5, stroke = 0) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50", linewidth = 0.3) +
annotate("text", x = 0, y = axis_max * 0.6, label = paste0("n=", n_pts),
hjust = 0, vjust = 1, size = 1.8, color = "grey30") +
scale_x_continuous(trans = "log1p", breaks = c(0, 1000, 10000),
labels = c("0", "1k", "10k"), limits = c(0, axis_max)) +
scale_y_continuous(trans = "log1p", breaks = c(0, 1000, 10000),
labels = c("0", "1k", "10k"), limits = c(0, axis_max)) +
coord_fixed(ratio = 1) +
theme_bw(base_size = 6) +
theme(axis.title = element_blank(),
axis.text = element_text(size = 3),
axis.ticks = element_line(linewidth = 0.2),
axis.ticks.length = unit(0.5, "mm"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(linewidth = 0.15, color = "grey88"),
panel.border = element_rect(linewidth = 0.25),
plot.margin = margin(1, 1, 1, 1))
}
# Layout: 5 rows x 2 cols
# Row 1: Ctrl vs SPI-1 (Stationary) - treatment effect
# Row 2: Ctrl vs SPI-2 (Stationary) - treatment effect
# Row 3: Ctrl vs SPI-2 (Log phase) - treatment effect in log phase
# Row 4: Control: Stationary vs Log - growth phase effect on baseline
# Row 5: SPI-2: Stationary vs Log - growth phase effect on SPI-2
# Row 1: Ctrl vs SPI-1 (Stationary)
p1 <- make_scatter(mala_wide, paste0("Intracellular_", cond_ctrl),
paste0("Intracellular_", cond_spi1))
p2 <- make_scatter(mala_wide, paste0("OMV_", cond_ctrl),
paste0("OMV_", cond_spi1))
# Row 2: Ctrl vs SPI-2 (Stationary)
p3 <- make_scatter(mala_wide, paste0("Intracellular_", cond_ctrl),
paste0("Intracellular_", cond_spi2))
p4 <- make_scatter(mala_wide, paste0("OMV_", cond_ctrl),
paste0("OMV_", cond_spi2))
# Row 3: Ctrl vs SPI-2 (Log phase)
p5 <- make_scatter(mala_wide, paste0("Intracellular_", cond_ctrl_low),
paste0("Intracellular_", cond_spi2_low))
p6 <- make_scatter(mala_wide, paste0("OMV_", cond_ctrl_low),
paste0("OMV_", cond_spi2_low))
# Row 4: Control: Stationary vs Log (x = stationary, y = log)
p7 <- make_scatter(mala_wide, paste0("Intracellular_", cond_ctrl),
paste0("Intracellular_", cond_ctrl_low))
p8 <- make_scatter(mala_wide, paste0("OMV_", cond_ctrl),
paste0("OMV_", cond_ctrl_low))
# Row 5: SPI-2: Stationary vs Log (x = stationary, y = log)
p9 <- make_scatter(mala_wide, paste0("Intracellular_", cond_spi2),
paste0("Intracellular_", cond_spi2_low))
p10 <- make_scatter(mala_wide, paste0("OMV_", cond_spi2),
paste0("OMV_", cond_spi2_low))
# ============================================================
# COMBINED FIGURE: Panels A & B side by side in 4x3 inches
# ============================================================
# Row labels - full words, no shorthand
row_labs_a <- c("Stationary: Control vs SPI-1",
"Stationary: Control vs SPI-2",
"Log phase: Control vs SPI-2")
row_labs_b <- c("Control: Stationary vs Log",
"SPI-2: Stationary vs Log")
# Helper to make a row (label on top, 2 plots below)
make_row <- function(lab, p_left, p_right, lab_size = 4.5) {
plots <- cowplot::plot_grid(p_left, p_right, ncol = 2)
label <- cowplot::ggdraw() + cowplot::draw_label(lab, size = lab_size)
cowplot::plot_grid(label, plots, ncol = 1, rel_heights = c(0.12, 1))
}
# --- PANEL A: Treatment Effects (3 rows) ---
header_a <- cowplot::plot_grid(
cowplot::ggdraw() + cowplot::draw_label("Intracellular", size = 5, fontface = "bold"),
cowplot::ggdraw() + cowplot::draw_label("OMV", size = 5, fontface = "bold"),
ncol = 2
)
title_a <- cowplot::ggdraw() +
cowplot::draw_label("A. Treatment Effects", size = 6, fontface = "bold", hjust = 0, x = 0.02, y = 0.3)
panel_a <- cowplot::plot_grid(
NULL,
title_a,
NULL,
header_a,
make_row(row_labs_a[1], p1, p2),
make_row(row_labs_a[2], p3, p4),
make_row(row_labs_a[3], p5, p6),
ncol = 1, rel_heights = c(0.02, 0.05, 0.05, 0.04, 1, 1, 1)
)
# --- PANEL B: Growth Phase Effects (2 rows) ---
header_b <- cowplot::plot_grid(
cowplot::ggdraw() + cowplot::draw_label("Intracellular", size = 5, fontface = "bold"),
cowplot::ggdraw() + cowplot::draw_label("OMV", size = 5, fontface = "bold"),
ncol = 2
)
title_b <- cowplot::ggdraw() +
cowplot::draw_label("B. Phase Effects", size = 6, fontface = "bold", hjust = 0, x = 0.02, y = 0.3)
panel_b <- cowplot::plot_grid(
NULL,
title_b,
NULL,
header_b,
make_row(row_labs_b[1], p7, p8),
make_row(row_labs_b[2], p9, p10),
NULL, # empty row to align with Panel A's 3 rows
ncol = 1, rel_heights = c(0.02, 0.05, 0.05, 0.04, 1, 1, 1)
)
# Combine panels side by side
main_grid <- cowplot::plot_grid(panel_a, panel_b, ncol = 2, rel_widths = c(1, 1))
# Shared axis labels
x_lab <- cowplot::ggdraw() + cowplot::draw_label("RPKM (condition 1)", size = 5, hjust = 0.5)
y_lab <- cowplot::ggdraw() + cowplot::draw_label("RPKM (condition 2)", size = 5, angle = 90)
# Assemble final plot
with_y <- cowplot::plot_grid(y_lab, main_grid, ncol = 2, rel_widths = c(0.03, 1))
combined_plot <- cowplot::plot_grid(with_y, x_lab, ncol = 1, rel_heights = c(1, 0.04))
print(combined_plot)
# Save combined figure at 4x3 inches, 600 dpi (all formats)
ggplot2::ggsave(filename = file.path(output_dir, "scatter_combined.png"),
plot = combined_plot, width = 4, height = 3, dpi = 600)
ggplot2::ggsave(filename = file.path(output_dir, "scatter_combined.svg"),
plot = combined_plot, width = 4, height = 3, dpi = 600)
ggplot2::ggsave(filename = file.path(output_dir, "scatter_combined.pdf"),
plot = combined_plot, width = 4, height = 3, dpi = 600)
} else {
message("Missing conditions. Available: ", paste(avail_conds, collapse = ", "))
}
}Because the corresponding intracellular panel is stable, it may mean selective export/packaging preference of the sRNA during intestinal infection mimic stage.
if (exists("all_mala") && exists("mala_wide")) {
output_dir <- path_from_root("explore/summary/output")
if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
# Pivot wide data back to long for filtering
mala_long <- mala_wide %>%
pivot_longer(
cols = -c(srna_id, seq_len),
names_to = c("type", "condition"),
names_pattern = "(Intracellular|OMV)_(.*)",
values_to = "RPKM"
)
# Filter sRNAs with OMV RPKM >= 10, include matched intracellular RPKM
omv_candidates <- mala_long %>%
filter(type == "OMV", RPKM >= 10) %>%
select(srna_id, seq_len, condition, OMV_RPKM = RPKM) %>%
left_join(
mala_long %>%
filter(type == "Intracellular") %>%
select(srna_id, seq_len, condition, Intracellular_RPKM = RPKM),
by = c("srna_id", "seq_len", "condition")
) %>%
arrange(condition, desc(OMV_RPKM))
# Save full list
readr::write_csv(omv_candidates, file.path(output_dir, "omv_candidates_rpkm_ge10.csv"))
# Preview: top 10 per condition
head_preview <- omv_candidates %>%
group_by(condition) %>%
slice_max(order_by = OMV_RPKM, n = 10, with_ties = FALSE) %>%
ungroup()
knitr::kable(head_preview, caption = "Top OMV-packed sRNAs (RPKM >= 10) by condition")
}| srna_id | seq_len | condition | OMV_RPKM | Intracellular_RPKM |
|---|---|---|---|---|
| manual_10Sa | NA | ControlPCN_HighOD | 180.54065 | 6.543296e+05 |
| sRNA-Sflex-59 | 456 | ControlPCN_HighOD | 158.27740 | 5.254134e+05 |
| sRNA-Sente-36 | 108 | ControlPCN_HighOD | 137.29876 | 3.758984e+02 |
| sRNA-Paeru-10 | 209 | ControlPCN_HighOD | 91.48866 | 2.943083e+05 |
| sRNA-Apleu-10 | 176 | ControlPCN_HighOD | 87.36320 | 6.110659e+02 |
| sRNA-Sente-52 | 1236 | ControlPCN_HighOD | 62.79487 | 4.475461e+02 |
| sRNA-Ecoli-13 | 84 | ControlPCN_HighOD | 46.59370 | 3.686107e+01 |
| sRNA-Sente-29 | 109 | ControlPCN_HighOD | 33.74556 | 2.775994e+02 |
| manual_CsrC | NA | ControlPCN_HighOD | 33.56022 | 2.219529e+04 |
| sRNA-Sente-37 | 46 | ControlPCN_HighOD | 33.53872 | 1.304677e+00 |
| manual_10Sa | NA | ControlPCN_LowOD | 413.62867 | 4.132594e+05 |
| sRNA-Sflex-59 | 456 | ControlPCN_LowOD | 328.70732 | 3.286413e+05 |
| sRNA-Sflex-60 | 184 | ControlPCN_LowOD | 184.28206 | 4.696713e+04 |
| sRNA-Paeru-10 | 209 | ControlPCN_LowOD | 123.74163 | 1.261786e+05 |
| sRNA-Sente-29 | 109 | ControlPCN_LowOD | 97.31942 | 7.213708e+02 |
| manual_SsrS | NA | ControlPCN_LowOD | 93.35581 | 2.438831e+04 |
| sRNA-Sente-34 | 62 | ControlPCN_LowOD | 40.93169 | 3.841463e+03 |
| sRNA-Sente-37 | 46 | ControlPCN_LowOD | 35.45908 | 1.182353e+01 |
| sRNA-Sente-32 | 53 | ControlPCN_LowOD | 16.31676 | 7.743698e+02 |
| manual_CsrC | NA | ControlPCN_LowOD | 15.18939 | 1.064690e+04 |
| sRNA-Sente-36 | 108 | SPI-1ind-LB_HighOD | 8405.86870 | 4.385577e+02 |
| sRNA-Styph-1 | 296 | SPI-1ind-LB_HighOD | 6145.35312 | 1.058231e+01 |
| sRNA-Sente-46 | 266 | SPI-1ind-LB_HighOD | 3876.27595 | 2.664049e+01 |
| manual_10Sa | NA | SPI-1ind-LB_HighOD | 3519.38426 | 1.324867e+06 |
| sRNA-Sflex-59 | 456 | SPI-1ind-LB_HighOD | 3333.01371 | 1.065598e+06 |
| sRNA-Paeru-10 | 209 | SPI-1ind-LB_HighOD | 2939.72955 | 2.347986e+05 |
| sRNA-Sente-52 | 1236 | SPI-1ind-LB_HighOD | 2627.36197 | 4.107585e+02 |
| sRNA-Sente-33 | 82 | SPI-1ind-LB_HighOD | 1965.82992 | 1.135203e+03 |
| sRNA-Sflex-60 | 184 | SPI-1ind-LB_HighOD | 1887.11978 | 6.176864e+04 |
| sRNA-Ecoli-61 | 106 | SPI-1ind-LB_HighOD | 1538.10064 | 3.801569e+01 |
| manual_10Sa | NA | SPI-2ind_HighOD | 11648.83476 | 6.199890e+05 |
| sRNA-Sflex-59 | 456 | SPI-2ind_HighOD | 9320.62878 | 4.938366e+05 |
| sRNA-Ecoli-13 | 84 | SPI-2ind_HighOD | 6266.62528 | 1.461888e+02 |
| sRNA-Paeru-10 | 209 | SPI-2ind_HighOD | 5083.34974 | 2.084876e+05 |
| sRNA-Sflex-60 | 184 | SPI-2ind_HighOD | 978.25380 | 6.537325e+04 |
| manual_SsrS | NA | SPI-2ind_HighOD | 495.56124 | 3.702340e+04 |
| sRNA-Sente-37 | 46 | SPI-2ind_HighOD | 282.38818 | 1.799624e+01 |
| manual_CsrC | NA | SPI-2ind_HighOD | 273.16379 | 1.917958e+04 |
| sRNA-Sente-29 | 109 | SPI-2ind_HighOD | 176.22329 | 2.518915e+02 |
| sRNA-Sente-52 | 1236 | SPI-2ind_HighOD | 164.10322 | 2.584162e+02 |
| manual_10Sa | NA | SPI-2ind_LowOD | 3878.13087 | 3.681235e+05 |
| sRNA-Sflex-59 | 456 | SPI-2ind_LowOD | 3280.31683 | 3.317412e+05 |
| sRNA-Paeru-10 | 209 | SPI-2ind_LowOD | 1792.78847 | 1.072380e+05 |
| sRNA-Sflex-60 | 184 | SPI-2ind_LowOD | 1213.94850 | 7.409502e+04 |
| manual_SsrS | NA | SPI-2ind_LowOD | 611.16085 | 3.802551e+04 |
| sRNA-Sente-29 | 109 | SPI-2ind_LowOD | 540.78830 | 1.286075e+03 |
| sRNA-Sente-34 | 62 | SPI-2ind_LowOD | 168.39499 | 3.445253e+03 |
| sRNA-Sente-37 | 46 | SPI-2ind_LowOD | 139.14487 | 4.086566e+00 |
| manual_CsrC | NA | SPI-2ind_LowOD | 85.24838 | 6.005684e+03 |
| sRNA-Sente-52 | 1236 | SPI-2ind_LowOD | 67.12749 | 1.568392e+02 |