Combined Analysis Summary

1. Sample Counts by Dataset and Group

A quick list of the available groups in the datasets.

plot_list_counts <- list()

for (dname in names(meta_list)) {
  meta <- meta_list[[dname]]
  gcol <- group_cols[[dname]]
  
  # For Malabirade_2018, show panels for each of the new grouping columns
  if (dname == "Malabirade_2018" && !is.null(meta)) {
    mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
    for (mg in mala_groups) {
      if (mg %in% names(meta)) {
        p <- ggplot(meta, aes(x = .data[[mg]])) +
          geom_bar(fill = "#4C78A8") +
          labs(x = NULL, y = "Count", title = paste("Malabirade_2018:", mg)) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
        plot_list_counts[[paste0(dname, "_", mg)]] <- p
      }
    }
    next
  }
  
  if (!is.null(meta) && gcol %in% names(meta)) {
    p <- ggplot(meta, aes(x = .data[[gcol]])) +
      geom_bar(fill = "#4C78A8") +
      labs(x = NULL, y = "Count", title = dname) +
      theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
    plot_list_counts[[dname]] <- p
  }
}

ggarrange(plotlist = plot_list_counts, ncol = 3, nrow = 3)

On Malabirade_2018, the SPI-1ind-LB was only sequenced in HighOD condition thus the plots lack LowOD samples.

2. Read Length Distributions

We obtained read lengths directly from the per-sample fastp QC JSON files. The summary statistics is after filtering (type == “after_filtering”), where the length column corresponds to the mean read length reported by fastp.

plot_list_lengths <- list()

for (dname in names(meta_list)) {
  meta <- meta_list[[dname]]
  gcol <- group_cols[[dname]]
  
  # For Malabirade_2018, show separate panels per new grouping column
  if (dname == "Malabirade_2018" && !is.null(meta)) {
    mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
    for (mg in mala_groups) {
      if (!mg %in% names(meta)) next
      lengths_df <- fastp_lengths(meta, mg)
      if (nrow(lengths_df) == 0) next
      
      if ("type" %in% names(lengths_df) && any(lengths_df$type == "distribution")) {
        p <- lengths_df %>%
          filter(type == "distribution") %>%
          ggplot(aes(x = length, y = count, color = condition)) +
          geom_line(alpha = 0.7) +
          scale_y_continuous(labels = scales::comma) +
          labs(x = "Length (bp)", y = "Count", title = paste("Malabirade_2018:", mg), color = mg) +
          theme(plot.title = element_text(size = 10))
      } else {
        len_summ <- lengths_df %>%
          filter(type == "after_filtering") %>%
          group_by(sample_id, condition) %>%
          summarise(mean_length = first(length), .groups = "drop")
        
        p <- ggplot(len_summ, aes(x = condition, y = mean_length)) +
          geom_boxplot(alpha = 0.7, fill = "grey90") +
          geom_jitter(width = 0.2, alpha = 0.6, color = "#2ca02c") +
          labs(x = NULL, y = "Mean Length (bp)", title = paste("Malabirade_2018:", mg)) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
      }
      plot_list_lengths[[paste0(dname, "_", mg)]] <- p
    }
    next
  }
  
  if (!is.null(meta)) {
    lengths_df <- fastp_lengths(meta, gcol)
    
    if (nrow(lengths_df) > 0) {
      if ("type" %in% names(lengths_df) && any(lengths_df$type == "distribution")) {
        # Distribution plot
        p <- lengths_df %>%
          filter(type == "distribution") %>%
          ggplot(aes(x = length, y = count, color = condition)) +
          geom_line(alpha = 0.7) +
          scale_y_continuous(labels = scales::comma) +
          labs(x = "Length (bp)", y = "Count", title = dname, color = "Group") +
          theme(plot.title = element_text(size = 10))
      } else {
        # Mean length plot
        len_summ <- lengths_df %>%
          filter(type == "after_filtering") %>%
          group_by(sample_id, condition) %>%
          summarise(mean_length = first(length), .groups = "drop")
        
        p <- ggplot(len_summ, aes(x = condition, y = mean_length)) +
          geom_boxplot(alpha = 0.7, fill = "grey90") +
          geom_jitter(width = 0.2, alpha = 0.6, color = "#2ca02c") +
          labs(x = NULL, y = "Mean Length (bp)", title = dname) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1), plot.title = element_text(size = 10))
      }
      plot_list_lengths[[dname]] <- p
    }
  }
}

ggarrange(plotlist = plot_list_lengths, ncol = 3, nrow = 2)
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

3. sRNA Filtering Summary (>1 RPKM in ≥60% samples)

We filtered sRNAs by requiring RPKM ≥ 1 in at least 60% of samples (frac_gt1 >= 0.6) within any biological group before including them in downstream analyses.

filter_summary_list <- list()
pass_ids_list <- list()

for (dname in names(meta_list)) {
  meta <- meta_list[[dname]]
  gcol <- group_cols[[dname]]
  
  # For Malabirade_2018, produce summaries for each grouping column
  if (dname == "Malabirade_2018" && !is.null(meta)) {
    counts <- load_counts_long(meta)
    mala_groups <- c("growth_medium", "growth_phase", "rna_sample_type")
    for (mg in mala_groups) {
      if (!mg %in% names(meta)) next
      res <- filter_srna(counts, meta, mg)
      pass_ids_list[[paste0(dname, "_", mg)]] <- res$pass_ids
      
      if (length(res$pass_ids) > 0) {
        summ <- res$per_group %>%
          group_by(group) %>%
          summarise(n_srna_pass = n_distinct(srna_id), .groups = "drop") %>%
          mutate(dataset = paste(dname, mg, sep = "_"))
        filter_summary_list[[paste0(dname, "_", mg)]] <- summ
      }
    }
    next
  }
  
  if (!is.null(meta)) {
    counts <- load_counts_long(meta)
    res <- filter_srna(counts, meta, gcol)
    
    pass_ids_list[[dname]] <- res$pass_ids
    
    if (length(res$pass_ids) > 0) {
      summ <- res$per_group %>%
        group_by(group) %>%
        summarise(n_srna_pass = n_distinct(srna_id), .groups = "drop") %>%
        mutate(dataset = dname)
      filter_summary_list[[dname]] <- summ
    }
  }
}

all_filter_summary <- bind_rows(filter_summary_list)

ggplot(all_filter_summary, aes(x = group, y = n_srna_pass, fill = dataset)) +
  geom_col() +
  facet_wrap(~dataset, scales = "free_x", ncol = 3) +
  labs(x = "Group", y = "Number of passing sRNAs", title = "sRNAs with stable expression") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")

4. Scatterplots: Enrichment Analysis

Fan_2023:

Fan_2023 had to be dropped off from the analysis as the pipeline didnt detected any sRNAs The methodology mentioned in the paper states that ” mRNA was purified from total RNA using poly(T) oligonucleotide-attached magnetic beads” which would only retain RNA sequences with poly-A tail (eukaryotic/human mRNAs).

Blenkiron_2016: Small vs Large Fraction

blen_data <- NULL
if ("Blenkiron_2016" %in% names(meta_list)) {
  meta   <- meta_list$Blenkiron_2016
  counts <- load_counts_long(meta)
  
  # Relaxed filter: keep sRNAs with mean RPKM > 0.6 in ANY library size
  sRNA_keep <- counts %>%
    left_join(meta %>% select(sample_id, library_size), by = "sample_id") %>%
    filter(library_size %in% c("small", "medium", "large")) %>%
    group_by(srna_id, library_size) %>%
    summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
    group_by(srna_id) %>%
    filter(any(mean_expr > 0.6)) %>%
    pull(srna_id)
  
  # Median per library size (small / medium / large)
  med <- counts %>%
    filter(srna_id %in% sRNA_keep) %>%
    left_join(meta %>% select(sample_id, library_size), by = "sample_id") %>%
    filter(library_size %in% c("small", "medium", "large")) %>%
    group_by(srna_id, library_size) %>%
    summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(names_from = library_size, values_from = median_RPKM) %>%
    mutate(across(where(is.numeric), ~ replace_na(., 0.01)))  # avoid zeros for log scales
  
  blen_data <- med
}

p_blen <- if (!is.null(blen_data) && nrow(blen_data) > 0) {
  
  # Build pairwise comparisons: small–large, medium–large, small–medium
  blen_pairs <- bind_rows(
    blen_data %>%
      transmute(x = small,  y = large,  comparison = "Small vs Large"),
    blen_data %>%
      transmute(x = medium, y = large,  comparison = "Medium vs Large"),
    blen_data %>%
      transmute(x = small,  y = medium, comparison = "Small vs Medium")
  )
  
  ggplot(blen_pairs, aes(x = x, y = y)) +
    geom_point(alpha = 0.6, color = "#2ca02c") +
    scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
    scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
    labs(
      title = "Blenkiron_2016: Pairwise library-size comparisons",
      x = "Median RPKM (X library)",
      y = "Median RPKM (Y library)"
    ) +
    geom_abline(slope = 1, linetype = "dashed") +
    facet_wrap(~ comparison)
  
} else {
  ggplot() + labs(title = "Blenkiron_2016: No Data")
}

p_blen

Sahr_2022_2: MVseq vs RNAseq

sahr2_data <- NULL
if ("Sahr_2022_2" %in% names(meta_list)) {
  meta <- meta_list$Sahr_2022_2
  counts <- load_counts_long(meta)
  
  sRNA_keep <- counts %>%
    left_join(meta %>% select(sample_id, condition), by = "sample_id") %>%
    group_by(srna_id, condition) %>%
    summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
    group_by(srna_id) %>%
    filter(any(mean_expr > 0.6)) %>%
    pull(srna_id)
  
  med <- counts %>%
    filter(srna_id %in% sRNA_keep) %>%
    left_join(meta %>% select(sample_id, condition), by = "sample_id") %>%
    group_by(srna_id, condition) %>%
    summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(names_from = condition, values_from = median_RPKM) %>%
    mutate(across(where(is.numeric), ~replace_na(., 0.01)))
  
  sahr2_data <- med
}

p_sahr2 <- if (!is.null(sahr2_data) && nrow(sahr2_data) > 0) {
  ggplot(sahr2_data, aes(x = RNAseq, y = MVseq)) +
    geom_point(alpha = 0.6, color = "#d62728") +
    scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) + 
    scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
    labs(title = "Sahr_2022_2: EV vs Total", x = "Total RNA (RPKM)", y = "EV RNA (RPKM)") +
    geom_abline(slope = 1, linetype = "dashed")
} else { ggplot() + labs(title = "Sahr_2022_2: No Data") }
p_sahr2

Malabirade_2018: Comparison by Biological Condition

mala_data_list <- list()
if ("Malabirade_2018" %in% names(meta_list)) {
  meta <- meta_list$Malabirade_2018
  counts <- load_counts_long(meta)
  
  meta_proc <- meta %>%
    mutate(
      type = case_when(
        str_detect(condition, "OMV-RNAs") ~ "OMV",
        str_detect(condition, "intracellularRNAs") ~ "Intracellular",
        TRUE ~ NA_character_
      ),

      base_condition = condition %>%
        str_remove("_OMV-RNAs") %>%
        str_remove("_intracellularRNAs") %>%
        str_remove(" ") %>% # Remove stray spaces
        str_trim()
    ) %>%
    filter(!is.na(type))
  
  conditions <- unique(meta_proc$base_condition)
  
  for (cond in conditions) {
    # Subset metadata for this condition
    sub_meta <- meta_proc %>% filter(base_condition == cond)

    # Mean > 0.6
    sRNA_keep <- counts %>%
      filter(sample_id %in% sub_meta$sample_id) %>%
      left_join(sub_meta %>% select(sample_id, type), by = "sample_id") %>%
      group_by(srna_id, type) %>%
      summarise(mean_expr = mean(RPKM, na.rm = TRUE), .groups = "drop") %>%
      group_by(srna_id) %>%
      filter(any(mean_expr > 0.6)) %>%
      pull(srna_id)
    
    if (length(sRNA_keep) > 0) {
      med <- counts %>%
        filter(sample_id %in% sub_meta$sample_id) %>%
        filter(srna_id %in% sRNA_keep) %>%
        left_join(sub_meta %>% select(sample_id, type), by = "sample_id") %>%
        group_by(srna_id, type) %>%
        summarise(median_RPKM = median(RPKM, na.rm = TRUE), .groups = "drop") %>%
        pivot_wider(names_from = type, values_from = median_RPKM) %>%
        mutate(across(where(is.numeric), ~replace_na(., 0.01))) %>%
        mutate(condition = cond) %>%
        left_join(srna_lengths, by = "srna_id")
      
      mala_data_list[[cond]] <- med
    }
  }
}

if (length(mala_data_list) > 0) {
  all_mala <- bind_rows(mala_data_list)
  
  # Annotate with growth metadata for downstream plots
  cond_annotations <- meta_proc %>%
    distinct(base_condition, growth_medium, growth_phase, rna_sample_type)
  all_mala <- all_mala %>%
    left_join(cond_annotations, by = c("condition" = "base_condition"))
  
  ggplot(all_mala, aes(x = Intracellular, y = OMV, color = seq_len)) +
    geom_point(alpha = 0.6, size = 2) +
    scale_color_gradient(low = "blue", high = "red", na.value = "grey70") +
    scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
    scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000)) +
    geom_abline(slope = 1, linetype = "dashed") +
    facet_wrap(~condition, scales = "fixed") +
    labs(title = "Malabirade_2018: OMV vs Intracellular by Condition",
         x = "Intracellular (RPKM)", y = "OMV (RPKM)", color = "sRNA length (nt)") +
    theme(strip.text = element_text(size = 8))
} else {
  ggplot() + labs(title = "Malabirade_2018: No Data")
}

Tables containing OMV-enriched RNAs per condition

# Check if the data frame exists from the previous chunk
if (exists("all_mala")) {
  # Get unique conditions (facets)
  conditions <- unique(all_mala$condition)
  
  for (cond in conditions) {
    # Filter for sRNAs enriched in OMV (OMV > Intracellular)
    omv_enriched <- all_mala %>%
      filter(condition == cond) %>%
      filter(OMV > Intracellular) %>%
      select(srna_id, seq_len, OMV, Intracellular) %>%
      distinct() %>% # Deduplicate entries created by the metadata join
      arrange(desc(OMV))
    
    # Create a Markdown header for each condition
    cat(paste("\n####", cond, "\n\n"))
    
    # Print the table if data exists
    if (nrow(omv_enriched) > 0) {
      print(knitr::kable(omv_enriched, caption = paste("sRNAs with OMV > Intracellular in", cond)))
    } else {
      cat("No sRNAs found with higher OMV expression in this condition.\n")
    }
    
    # Add newlines for spacing between tables
    cat("\n")
  }
}

ControlPCN_LowOD

sRNAs with OMV > Intracellular in ControlPCN_LowOD
srna_id seq_len OMV Intracellular
sRNA-Sente-37 46 35.45908 11.82353

SPI-2ind_LowOD

sRNAs with OMV > Intracellular in SPI-2ind_LowOD
srna_id seq_len OMV Intracellular
sRNA-Sente-37 46 139.1449 4.086566

SPI-1ind-LB_HighOD

sRNAs with OMV > Intracellular in SPI-1ind-LB_HighOD
srna_id seq_len OMV Intracellular
sRNA-Sente-36 108 8405.86870 438.5577361
sRNA-Styph-1 296 6145.35312 10.5823070
sRNA-Sente-46 266 3876.27595 26.6404897
sRNA-Sente-52 1236 2627.36197 410.7585078
sRNA-Sente-33 82 1965.82992 1135.2027459
sRNA-Ecoli-61 106 1538.10064 38.0156916
sRNA-Sflex-21 105 1511.47483 37.6797771
sRNA-Sente-5 32 1384.16287 261.9766245
sRNA-Sente-35 90 924.38397 124.7045432
sRNA-Ecoli-46 93 788.35502 8.9652078
sRNA-Sflex-15 92 780.72819 8.9504852
sRNA-Sente-50 191 720.88289 309.8276334
sRNA-Sflex-41 94 582.97100 79.9381348
manual_IsrA NA 578.47832 0.5131556
sRNA-Sente-41 146 564.70392 406.5214043
sRNA-Ecoli-60 73 491.71249 141.3234982
sRNA-Ecoli-13 84 474.64201 362.3001297
sRNA-Sente-1 144 467.73287 122.0943470
sRNA-Sente-44 144 467.73287 122.0943470
sRNA-Ecoli-103 114 443.73451 136.1333928
sRNA-Sflex-6 114 443.73451 136.1333928
sRNA-Sente-51 284 402.64798 38.8113518
sRNA-Ecoli-70 95 402.30762 63.8035681
sRNA-Sflex-23 79 385.80073 147.6851742
sRNA-Ecoli-64 79 357.78880 159.8833597
sRNA-Ecoli-17 246 355.48712 29.4636840
sRNA-Sflex-3 245 355.48712 29.4636840
sRNA-Sente-49 85 349.02792 16.3039797
sRNA-Sflex-46 109 263.61919 188.3603141
sRNA-Ecoli-86 111 263.03041 190.3157689
sRNA-Sente-55 486 254.93458 127.3269480
sRNA-Ecoli-87 57 234.16588 63.3461872
sRNA-Sflex-47 57 234.16588 63.3461872
sRNA-Sente-28 113 200.04483 36.4781997
sRNA-Sente-37 46 183.23240 4.0336417
sRNA-Sente-13 162 180.74064 127.6364763
sRNA-Ecoli-2 105 161.98028 44.5074169
sRNA-Sflex-5 87 151.47334 20.7369153
sRNA-Sente-25 211 134.01291 128.7029000
sRNA-Ecoli-52 88 124.07863 18.8340761
sRNA-Sflex-33 88 124.07863 18.4647805
sRNA-Sente-23 119 108.63634 87.9016456
sRNA-Sflex-13 77 102.32459 31.5357441
sRNA-Sente-48 196 99.52849 80.8043643
sRNA-Ecoli-49 184 93.69258 14.6060289

ControlPCN_HighOD

sRNAs with OMV > Intracellular in ControlPCN_HighOD
srna_id seq_len OMV Intracellular
sRNA-Ecoli-13 84 46.593704 36.861068
sRNA-Sente-37 46 33.538715 1.304677
sRNA-Styph-1 296 27.037588 9.544384
sRNA-Sente-46 266 20.178142 9.568794
sRNA-Ecoli-46 93 9.323033 8.858069
sRNA-Sflex-15 92 9.272199 8.993306
manual_IsrA NA 3.743399 0.000000

SPI-2ind_HighOD

sRNAs with OMV > Intracellular in SPI-2ind_HighOD
srna_id seq_len OMV Intracellular
sRNA-Ecoli-13 84 6266.6253 146.18883
sRNA-Sente-37 46 282.3882 17.99624
sRNA-Sente-48 196 105.1909 47.86915
# Save OMV-enriched tables to files
if (exists("all_mala")) {
  output_dir <- path_from_root("explore/summary/output")
  if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  
  conditions <- unique(all_mala$condition)
  
  for (cond in conditions) {
    omv_enriched <- all_mala %>%
      filter(condition == cond) %>%
      filter(OMV > Intracellular) %>%
      select(srna_id, seq_len, OMV, Intracellular) %>%
      distinct() %>%
      arrange(desc(OMV))
    
    # Write CSV per condition
    fn <- file.path(output_dir, paste0("omv_enriched_", cond, ".csv"))
    readr::write_csv(omv_enriched, fn)
  }
}

Observations

1. Baseline Condition (ControlPCN)

To establish the baseline profile of “normal” sRNA packaging, we examine both ControlPCN_LowOD (log phase) and ControlPCN_HighOD (stationary phase).

(a) LowOD: Logarithmic growth phase

  • sRNA RPKMs are strongly skewed to the right (higher intracellular abundance).
  • Intracellular sRNAs generally have higher RPKM values than OMV-associated sRNAs.
  • Only a single point lies to the left of the diagonal, indicating OMV enrichment which is sRNA-Sente-37 (This sRNA is enriched across all conditions)
    This point may represent an sRNA preferentially packaged into OMVs even during active growth. (See table above)

(b) HighOD: Stationary phase

  • The global distribution shifts leftward, and many points move closer to the diagonal.
  • Most sRNAs remain on the right side, confirming higher intracellular abundance.
  • Approximately five sRNAs fall to the left of the diagonal, indicating OMV enrichment under stationary-phase stress.
  • These OMV-enriched sRNAs may be associated with general stress responses (e.g., nutrient limitation).

Does general stress increase RNA expression?
Yes.

Does general stress increase selective RNA packaging into OMVs?
Yes.


2. “Invasion” Mimic Condition (SPI-1 Induction)

We compare SPI-1ind-LB_HighOD against ControlPCN_HighOD.

Biological context:
SPI-1 induction in LB medium mimics the early intestinal infection stage by activating Salmonella Pathogenicity Island 1.

Key observations: - A greater number of points fall left of the diagonal compared to the control, indicating more sRNAs enriched in OMVs during the invasion-like condition. - Overall expression of sRNAs increases in both intracellular and OMV fractions compared to the control. - These OMV-enriched sRNAs may represent candidates involved in the intestinal invasion phase.

Does sRNA expression increase during intestinal invasion?
Yes.

Does selective sRNA packaging increase during invasion compared to the control?
Yes.


3. “Macrophage Survival” Mimic (SPI-2 Induction)

Biological context:
SPI-2 medium mimics the intracellular macrophage environment by inducing SPI-2 genes under acidic (pH 5.8), phosphate-limited PCN conditions.

Key observations: - Three sRNAs are located to the left of the diagonal, indicating OMV-specific enrichment relative to intracellular abundance. - These may represent candidates important for Salmonella survival or interaction with host (human) pathways during the macrophage phase.

Scatter Plots comparing conditions

if (exists("all_mala")) {
  
  # Define condition names mapping to what is in all_mala$condition
  # High OD (stationary) comparisons and Low OD (log phase) control vs SPI-2
  cond_ctrl      <- "ControlPCN_HighOD"
  cond_spi1      <- "SPI-1ind-LB_HighOD"
  cond_spi2      <- "SPI-2ind_HighOD"
  cond_ctrl_low  <- "ControlPCN_LowOD"
  cond_spi2_low  <- "SPI-2ind_LowOD"
  
  # Where to save outputs
  output_dir <- path_from_root("explore/summary/output")
  if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
  
  # Check if these conditions exist in the data
  avail_conds <- unique(all_mala$condition)
  required_conditions <- c(cond_ctrl, cond_spi1, cond_spi2, cond_ctrl_low, cond_spi2_low)
  has_conds <- required_conditions %in% avail_conds
  
  if (all(has_conds)) {
    
    # --- Pairwise scatter panels ---
    
    conditions_keep <- required_conditions
    
    # Collapse any duplicates per (srna_id, seq_len, condition) to avoid list-cols
    mala_base <- all_mala %>%
      filter(condition %in% conditions_keep) %>%
      select(srna_id, seq_len, condition, Intracellular, OMV)
    
    mala_wide <- mala_base %>%
      group_by(srna_id, seq_len, condition) %>%
      summarise(
        Intracellular = median(Intracellular, na.rm = TRUE),
        OMV = median(OMV, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      pivot_wider(
        id_cols = c(srna_id, seq_len),
        names_from = condition,
        values_from = c(Intracellular, OMV),
        names_glue = "{.value}_{condition}"
      )
    
    # Helper for consistency
    plot_pairwise <- function(data, x_var, y_var, title_str) {
      ggplot(data, aes(x = .data[[x_var]], y = .data[[y_var]])) +
        geom_point(alpha = 0.6, color = "#4C78A8") +
        scale_x_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000, 10000)) +
        scale_y_continuous(trans = "log1p", breaks = c(0, 1, 10, 100, 1000, 10000)) +
        geom_abline(slope = 1, linetype = "dashed", color = "grey50") +
        labs(title = title_str, x = x_var, y = y_var) +
        theme_minimal() +
        theme(plot.title = element_text(size = 11))
    }
    
    # 1. Intracellular: Control vs SPI1
    p_ic_spi1 <- plot_pairwise(mala_wide, 
                               paste0("Intracellular_", cond_ctrl), 
                               paste0("Intracellular_", cond_spi1), 
                               "Intracellular: Control vs SPI1")
    
    # 2. OMV: Control vs SPI1
    p_omv_spi1 <- plot_pairwise(mala_wide, 
                                paste0("OMV_", cond_ctrl), 
                                paste0("OMV_", cond_spi1), 
                                "OMV: Control vs SPI1")
    
    # 3. Intracellular: Control vs SPI2
    p_ic_spi2 <- plot_pairwise(mala_wide, 
                               paste0("Intracellular_", cond_ctrl), 
                               paste0("Intracellular_", cond_spi2), 
                               "Intracellular: Control vs SPI2")
    
    # 4. OMV: Control vs SPI2
    p_omv_spi2 <- plot_pairwise(mala_wide, 
                                paste0("OMV_", cond_ctrl), 
                                paste0("OMV_", cond_spi2), 
                                "OMV: Control vs SPI2")
    
    # 5. Intracellular: Control LowOD vs SPI2 LowOD
    p_ic_low_spi2 <- plot_pairwise(mala_wide,
                                   paste0("Intracellular_", cond_ctrl_low),
                                   paste0("Intracellular_", cond_spi2_low),
                                   "Intracellular: Control LowOD vs SPI2 LowOD")
    
    # 6. OMV: Control LowOD vs SPI2 LowOD
    p_omv_low_spi2 <- plot_pairwise(mala_wide,
                                    paste0("OMV_", cond_ctrl_low),
                                    paste0("OMV_", cond_spi2_low),
                                    "OMV: Control LowOD vs SPI2 LowOD")
    
    combined_plot <- ggpubr::ggarrange(
      p_ic_spi1, p_omv_spi1,
      p_ic_spi2, p_omv_spi2,
      p_ic_low_spi2, p_omv_low_spi2,
      ncol = 2, nrow = 3
    )
    
    print(combined_plot)
    
    # Save combined panel (PNG)
    ggplot2::ggsave(
      filename = file.path(output_dir, "malabirade_pairwise_scatter.png"),
      plot = combined_plot,
      width = 10, height = 16, dpi = 300
    )
  } else {
    print(paste("Missing required conditions for plots. Available:", paste(avail_conds, collapse=", ")))
  }
}

Observation:

    1. Intracellular baseline is generally stable accross conditions. SPI-1 and SPI-2 induction do not strongly change intracellular sRNA levels. There are no clear upward or downward shift at both log phase and stationary phase.
    1. SPI-1 causes a global increase of sRNAs in OMVs. All points are above the diagonal compared to Control stationary phase.

Because the corresponding intracellular panel is stable, it may mean selective export/packaging preference of the sRNA during intestinal infection mimic stage.

    1. SPI-2 heterogenously increases/decreases OMV sRNAs at stationary phase. Many points lie above the diagonal, but the pattern is dispersed. There are a few sRNAs which are expressed in the control but NOT expressed at all on the SPI-2ind samples at stationary phase.
    1. Most sRNA abundance increases in SPI-2ind samples compared to the Control samples during log growth phase. There are few sRNAs that are only detected in the SPI-2ind samples but NOT in the control samples.