Overview

Purpose: Identify differentially abundant lipids across leaf developmental stages and phosphorus treatments using total ion current (TIC) normalization.

Approach: 1. Filter lipids by missingness (<20% missing values) 2. Filter samples by total ion current (library size) 3. Normalize to molar ion fractions (TIC normalization) 4. Apply limma-voom for differential abundance analysis 5. Model spatial and technical covariates

Experimental factors: - Leaf tissue position: Continuous gradient (leaf 1-4) - Phosphorus treatment: High P (+P) vs Low P (-P) - Genotype: Control (CTRL) vs Inversion 4m (INV4M)

Effect size thresholds: - Leaf predictors: |logFC| > 0.5 - Categorical predictors: |logFC| > 1.5

Libraries

library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(ggpubr)
library(ggtext)

Data Loading

Define Exclusions

Purpose: Remove internal standards and odd-chain lipids from analysis.

internal_standards <- c( 
  "CUDA", "LPE_17_1", "Sphingosine_17_1", "PC_25_0", "DG_18_1",
  "PG_34_0", "DG_12_0", "TG_17_0", "LPC_17_0", "PE_34", "Cholesterol"
)

odd_carbon <- c("PG_17_0", "SM_35_1", "TG_57_6")
to_exclude <- c(internal_standards, odd_carbon)

Load Raw MS Data

Purpose: Load MS-DIAL peak areas with internal standard integration.

lipid_csv <- "../data/PSU_RawData_MSDial_NewStdInt_240422.csv"

raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf")) %>%
  select(-all_of(to_exclude[to_exclude %in% colnames(.)])) %>%
  filter(!grepl("Methanol|Pool", sampleID))

cat("Loaded samples:", nrow(raw), "\n")

## Loaded samples: 43

Load Sample Metadata

Purpose: Merge phenotypic data with RNA-seq metadata and MS injection order.

plant_csv <- "/Users/fvrodriguez/Desktop/Desktop/22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv"

psu <- read.csv(plant_csv) %>%
  dplyr::rename(Genotype = "Who.What", rowid = "P22.") %>%
  filter(Genotype %in% c("CTRL", "INV4M")) %>%
  droplevels()

sampleInfo <- read.csv('../data/inv4mRNAseq_metadata.csv') %>%
  dplyr::rename(Genotype = genotype, rowid = row)

sampleInfo <- psu %>%
  select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
  inner_join(sampleInfo) %>%
  filter(!is.na(DTA))

sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"

sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)

ms_order <- read.csv("../data/PSU-PHO22_ms_order.csv")
ms_order$tube <- gsub("L|R", "S", ms_order$top_tag, perl = TRUE)

sampleInfo <- sampleInfo %>%
  dplyr::right_join(ms_order) %>%
  distinct()

cat("Sample info loaded:", nrow(sampleInfo), "samples\n")

## Sample info loaded: 53 samples

Merge Lipid Data with Metadata

Purpose: Combine MS peak areas with sample metadata.

raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)

pheno <- sampleInfo %>%
  select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue, 
         leaf_group, injection_order) %>%
  inner_join(raw %>% select(tube, DGDG_34_0:TG_58_5)) %>%
  mutate(
    block = as.factor(Rep),
    Treatment = factor(Treatment, levels = c("High_P", "Low_P"))
  ) %>%
  pivot_wider(
    names_from = Rep,
    values_from = Rep,
    names_prefix = "block_",
    values_fn = function(x) 1,
    values_fill = 0
  ) %>%
  select(rowid, tube:Plot_Row, block, block_6:block_12, 
         injection_order, everything())

levels(pheno$Treatment) <- c("+P", "-P")

cat("Final dataset:", nrow(pheno), "samples\n")

## Final dataset: 43 samples

Define Lipid Classifications

Purpose: Annotate lipids by head group and class for downstream analysis.

m <- pheno %>%
  select(DGDG_34_0:TG_58_5) %>%
  as.matrix()

head_group <- c(
  DG = "neutral", DGDG = "glycolipid", DGGA = "glycolipid",
  LPC = "phospholipid", LPE = "phospholipid", MGDG = "glycolipid",
  PC = "phospholipid", PE = "phospholipid", PG = "phospholipid",
  PI = "phospholipid", SQDG = "glycolipid", TG = "neutral"
)

sp <- data.frame(
  colname = colnames(m),
  class = gsub("_.*", "", colnames(m), perl = TRUE)
)
sp$head_group <- head_group[sp$class]

cat("\nLipid class distribution:\n")

## 
## Lipid class distribution:

print(with(sp, table(head_group, class)))

##               class
## head_group     DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
##   glycolipid    0   12    9   0   0    8  0  0  0  0    9  0
##   neutral      22    0    0   0   0    0  0  0  0  0    0 25
##   phospholipid  0    0    0   6   3    0 19 12  6  4    0  0

Quality Control

Create Count Matrix

Purpose: Prepare lipid abundance matrix for filtering and normalization.

counts <- t(m)
colnames(counts) <- pheno$tube

cat("Count matrix dimensions:", dim(counts), "\n")

## Count matrix dimensions: 135 43

cat("Lipids:", nrow(counts), "\n")

## Lipids: 135

cat("Samples:", ncol(counts), "\n")

## Samples: 43

Raw Data Distribution

hist(counts, 
     main = "Distribution of Raw Lipid Abundances", 
     xlab = "Peak Area", 
     breaks = 50,
     col = "lightblue")

Missing Data Analysis

Purpose: Identify lipids with excessive missing values for removal.

Approach: Calculate proportion of missing/zero values per lipid and set threshold at 20%.

missing_per_lipid <- rowSums(is.na(counts) | counts == 0) / ncol(counts)

cat("Missing data summary:\n")

## Missing data summary:

print(summary(missing_per_lipid))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.01585 0.00000 0.27907

hist(missing_per_lipid * 100, 
     main = "Missing Data Distribution Across Lipids",
     xlab = "% Missing", 
     breaks = 30,
     col = "coral")
abline(v = 20, col = "red", lwd = 2, lty = 2)

Filter Lipids by Missingness

Purpose: Remove lipids with >20% missing values across samples.

missing_threshold <- 0.20
keep_lipids <- missing_per_lipid < missing_threshold

cat("Lipid filtering (< 20% missing):\n")

## Lipid filtering (< 20% missing):

cat("  Before:", nrow(counts), "\n")

##   Before: 135

cat("  After:", sum(keep_lipids), "\n")

##   After: 132

cat("  Removed:", sum(!keep_lipids), "\n")

##   Removed: 3

counts_filtered <- counts[keep_lipids, ]

Match Sample Metadata

Purpose: Ensure sample metadata matches filtered count matrix.

sampleNames <- colnames(counts_filtered)

sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]

sampleInfo_matched$Treatment <- factor(
  pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
  levels = c("+P", "-P")
)

cat("Treatment levels:\n")

## Treatment levels:

print(table(sampleInfo_matched$Treatment))

## 
## +P -P 
## 16 27

Library Size Diagnostics

Purpose: Identify samples with insufficient total ion current for removal.

Approach: Calculate total peak area per sample (library size) and visualize distribution to determine filtering threshold.

genes <- data.frame(gene = rownames(counts_filtered))

y_raw <- DGEList(counts = counts_filtered, samples = sampleInfo_matched)
y_raw$group <- interaction(y_raw$samples$Treatment, y_raw$samples$Genotype)

cat("Library size summary (before filtering):\n")

## Library size summary (before filtering):

print(summary(y_raw$samples$lib.size))

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 6.026e+09 4.078e+10 5.495e+10 5.752e+10 7.071e+10 1.110e+11

hist(
  y_raw$samples$lib.size / 1e6,
  main = "Library Size Distribution",
  xlab = "Total Ion Current (millions)",
  breaks = 30,
  col = "lightgreen"
)

lib_size_threshold <- quantile(y_raw$samples$lib.size, 0.10)
abline(v = lib_size_threshold / 1e6, col = "red", lwd = 2, lty = 2)

cat("Suggested threshold (10th percentile):", lib_size_threshold, "\n")

## Suggested threshold (10th percentile): 28898886284

Filter Samples by Library Size

Purpose: Remove samples with low total ion current that may compromise data quality.

Expected outcome: Retain samples with sufficient signal for reliable quantification.

# min_lib_size <- 3e10
min_lib_size <- 1

keep_samples <- y_raw$samples$lib.size >= min_lib_size

cat("Sample filtering (TIC >=", min_lib_size, "):\n")

## Sample filtering (TIC >= 1 ):

cat("  Before:", ncol(counts_filtered), "\n")

##   Before: 43

cat("  After:", sum(keep_samples), "\n")

##   After: 43

cat("  Removed:", sum(!keep_samples), "\n")

##   Removed: 0

if (sum(!keep_samples) > 0) {
  cat("\nRemoved samples:\n")
  print(y_raw$samples[!keep_samples, 
                       c("tube", "lib.size", "Treatment", 
                         "Genotype", "leaf_tissue")])
}

y <- y_raw[, keep_samples]
y$samples <- droplevels(y$samples)

MDS: Library Size Quality Control

Purpose: Visualize sample relationships colored by library size to assess filtering quality.

mds_all <- plotMDS(y_raw, plot = FALSE)

mds_qc_data <- y_raw$samples %>%
  mutate(
    dim1 = mds_all$x,
    dim2 = mds_all$y,
    lib_size = lib.size / 1e6,
    filtered_out = !keep_samples
  )

ggplot(mds_qc_data, 
       aes(x = dim1, y = dim2, 
           color = lib_size, 
           shape = filtered_out)) +
  geom_point(size = 4) +
  scale_color_viridis_c(option = "plasma") +
  scale_shape_manual(
    values = c("TRUE" = 4, "FALSE" = 19),
    labels = c("Kept", "Removed")
  ) +
  labs(
    x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
    y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
    color = "Library Size\n(millions)",
    shape = "Sample Status",
    title = "MDS: Library Size Quality Control"
  ) +
  theme_classic(base_size = 15)

MDS: Injection Order Effect

Purpose: Assess technical drift associated with MS injection order.

ggplot(mds_qc_data %>% filter(!filtered_out), 
       aes(x = dim1, y = dim2, 
           fill = injection_order, 
           shape = Treatment)) +
  geom_point(size = 4, color ="transparent") +
  scale_fill_viridis_c(option = "plasma") +
  scale_shape_manual(values = c(21, 25)) +
  labs(
    x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
    y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
    color = "Injection Order",
    title = "MDS: Injection Order Effect"
  ) +
  theme_classic(base_size = 15)

Total Ion Current Normalization

Calculate Molar Ion Fractions

Purpose: Normalize lipid abundances to total ion current (TIC), converting raw peak areas to molar ion fractions.

Approach: For each sample, divide each lipid’s peak area by the total peak area. This accounts for differences in total signal between samples and expresses abundances as proportions.

Expected outcome: Molar ion fractions sum to 1.0 per sample, representing each lipid’s proportion of the total detected lipid pool.

cat("=== Calculating Molar Ion Fractions ===\n")

## === Calculating Molar Ion Fractions ===

raw_counts <- y$counts

molar_ion_fractions <- sweep(raw_counts, 2, colSums(raw_counts), FUN = "/")

cat("Molar ion fraction range:", range(molar_ion_fractions), "\n")

## Molar ion fraction range: 0 0.3637007

cat("Sample sums (should be 1):", 
    unique(round(colSums(molar_ion_fractions), 10)), "\n")

## Sample sums (should be 1): 1

Scale for Voom

Purpose: Convert molar ion fractions to count-like scale for voom transformation.

Approach: Multiply fractions by 1e9 to create numerically stable values that voom can process while maintaining the TIC normalization.

scaled_fractions <- molar_ion_fractions * 1e9

y_tic <- DGEList(counts = scaled_fractions, samples = y$samples)
y_tic$samples$norm.factors <- 1

cat("Scaled molar ion fractions created\n")

## Scaled molar ion fractions created

cat("Library size summary (should be ~1e9):\n")

## Library size summary (should be ~1e9):

print(summary(y_tic$samples$lib.size))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1e+09   1e+09   1e+09   1e+09   1e+09   1e+09

Multidimensional Scaling

Compute MDS Dimensions

Purpose: Reduce dimensionality to visualize major sources of variation in lipid profiles.

Approach: Apply multidimensional scaling to TIC-normalized data, extracting first 4 dimensions.

mds <- plotMDS(y_tic, plot = FALSE)

d <- y_tic$samples
d$dim1 <- mds$x
d$dim2 <- mds$y
d$dim3 <- mds$eigen.vectors[, 3] * sqrt(mds$var.explained[3])
d$dim4 <- mds$eigen.vectors[, 4] * sqrt(mds$var.explained[4])

d$Treatment <- factor(d$Treatment)
d$Genotype <- factor(d$Genotype)

cat("Variance explained by MDS dimensions:\n")

## Variance explained by MDS dimensions:

print(tibble(
  dimension = paste("Dim", 1:4),
  var_explained = round(mds$var.explained[1:4], 4)
))

## # A tibble: 4 × 2
##   dimension var_explained
##   <chr>             <dbl>
## 1 Dim 1            0.337 
## 2 Dim 2            0.170 
## 3 Dim 3            0.116 
## 4 Dim 4            0.0753

MDS: Experimental Factors

Purpose: Identify which experimental and technical factors drive major axes of variation.

p1 <- ggplot(d, aes(x = dim2, y = dim3, color = Treatment)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Treatment")

p2 <- ggplot(d, aes(x = dim2, y = dim3, color = injection_order, 
                    shape = Treatment)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Injection Order")

p3 <- ggplot(d, aes(x = dim2, y = dim3, color = decimal_time)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Collection Time")

p4 <- ggplot(d, aes(x = dim2, y = dim3, color = COLLECTOR)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Collector")

p5 <- ggplot(d, aes(x = dim2, y = dim3, color = Genotype)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Genotype")

p6 <- d %>%
  mutate(leaf = factor(leaf_tissue)) %>%
  ggplot(aes(x = dim2, y = dim3, color = leaf)) +
  geom_point(size = 3) +
  theme_classic(base_size = 15) +
  ggtitle("Leaf Tissue")

ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)

MDS: Primary Plot

Purpose: Publication-quality MDS showing main biological factors.

base_size <- 30

lipid_MDS_23 <- d %>%
  mutate(leaf = factor(leaf_tissue)) %>%
  ggplot(aes(x = dim2, y = dim3)) +
  ggtitle("MDS Lipids") +
  xlab(paste0("dim2 (", round(100 * mds$var.explained[2]), "%)")) +
  ylab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
  geom_point(aes(fill = leaf, shape = Treatment), size = 4) +
  scale_fill_viridis_d() +
  scale_shape_manual(values = c(21, 25)) +
  guides(
    shape = guide_legend(
      title = element_blank(), 
      override.aes = list(fill = "black"),
      reverse=TRUE
    ),
    fill = guide_legend(
      title = "Leaf",
      order = 2,
      override.aes = list(geom = "point", shape = 22, size = 7)
    )
  ) +
  theme_classic2(base_size = base_size) +
  theme(
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_blank(),  # Remove x-axis labels
    axis.ticks.x = element_blank(), # Remove x-axis ticks
    axis.text.y = element_blank(),  # Remove y-axis labels
    axis.ticks.y = element_blank(),  # Remove y-axis ticks
    # legend.position = "left",
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "in"),
    legend.background = element_rect(fill = "transparent")
  )

saveRDS(lipid_MDS_23 , "../results/lipid_MDS_23.rds")

cat("MDS plots saved to output/\n")

## MDS plots saved to output/

print(lipid_MDS_23)

MDS: Dimensions 3 and 4

d %>%
  ggplot(aes(x = dim3, y = dim4, fill = leaf_tissue, shape = Treatment)) +
  xlab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
  ylab(paste0("dim4 (", round(100 * mds$var.explained[4]), "%)")) +
  geom_point(size = 4) +
  scale_fill_viridis_c() +
  scale_shape_manual(values = c(21, 25)) +
  guides(
    shape = guide_legend(
      title = element_blank(), 
      override.aes = list(fill = "black")
    ),
    fill = guide_legend(
      title = "Leaf",
      order = 2,
      override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
    )
  ) +
  theme_classic2(base_size = 25) +
  theme(
    legend.box = "horizontal",
    legend.position = c(0.9, 0.8),
    legend.text = element_markdown(),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line")
  )

MDS Correlation Analysis

Purpose: Quantify relationships between MDS dimensions and experimental factors.

mds_cor_results <- tibble(
  dimension = c("Dim2", "Dim3", "Dim4"),
  factor = c("Treatment", "Leaf tissue", "Leaf tissue"),
  correlation = c(
    cor(d$dim2, as.numeric(d$Treatment)),
    cor(d$dim3, as.numeric(d$leaf_tissue)),
    cor(d$dim4, as.numeric(d$leaf_tissue))
  ),
  p_value = c(
    cor.test(d$dim2, as.numeric(d$Treatment))$p.value,
    cor.test(d$dim3, d$leaf_tissue)$p.value,
    cor.test(d$dim4, d$leaf_tissue)$p.value
  )
) %>%
  mutate(
    adj_p_value = p.adjust(p_value, method = "fdr"),
    correlation = round(correlation, 3),
    p_value = signif(p_value, 3),
    adj_p_value = signif(adj_p_value, 3)
  )

print(mds_cor_results)

## # A tibble: 3 × 5
##   dimension factor      correlation      p_value adj_p_value
##   <chr>     <chr>             <dbl>        <dbl>       <dbl>
## 1 Dim2      Treatment         0.54  0.000185      0.000277  
## 2 Dim3      Leaf tissue      -0.733 0.0000000233  0.00000007
## 3 Dim4      Leaf tissue       0.346 0.0231        0.0231

Differential Abundance Analysis

Design Matrix

Purpose: Construct design matrix incorporating biological factors and technical covariates.

Approach: Model includes: - Spatial covariates: Plot_Row (field position) - Technical covariate: injection_order (MS drift) - Biological factors: leaf_tissue (centered), Treatment, Genotype - Interactions: leaf_tissue × Treatment, Genotype × Treatment, leaf_tissue × Genotype

d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$leaf_tissue_c <- scale(d$leaf_tissue, center = TRUE, scale = FALSE)

design <- model.matrix(
  ~ Plot_Row + injection_order + leaf_tissue_c * Treatment + 
    Genotype * Treatment + leaf_tissue_c * Genotype,
  d
)

cat("Design matrix dimensions:", dim(design), "\n")

## Design matrix dimensions: 43 9

cat("\nCoefficients in model:\n")

## 
## Coefficients in model:

print(colnames(design))

## [1] "(Intercept)"                "Plot_Row"                  
## [3] "injection_order"            "leaf_tissue_c"             
## [5] "Treatment-P"                "GenotypeINV4"              
## [7] "leaf_tissue_c:Treatment-P"  "Treatment-P:GenotypeINV4"  
## [9] "leaf_tissue_c:GenotypeINV4"

Voom Transformation

Purpose: Model mean-variance relationship and calculate precision weights for linear modeling.

Approach: Apply voom without additional normalization since data is already TIC-normalized.

voomR <- voom(y_tic, design = design, normalize.method = "none", plot = TRUE)

cat("Voom transformation complete\n")

## Voom transformation complete

Estimate Block Correlation

Purpose: Account for within-block correlation from field design.

Approach: Define blocks as Treatment × Plot_Column interaction and estimate consensus correlation.

block <- interaction(d$Treatment, d$Plot_Column)

corfit <- duplicateCorrelation(voomR, design, block = block)

cat("Consensus correlation:", corfit$consensus.correlation, "\n")

## Consensus correlation: 0.05080766

Fit Linear Model

Purpose: Fit linear model for each lipid accounting for block structure.

fit <- lmFit(voomR, design, block = block, 
             correlation = corfit$consensus.correlation)

ebfit <- eBayes(fit)

cat("Model fitted:", nrow(fit$coefficients), "lipids ×", 
    ncol(fit$coefficients), "coefficients\n")

## Model fitted: 132 lipids × 9 coefficients

cat("\nSignificant lipids per coefficient (FDR < 0.05):\n")

## 
## Significant lipids per coefficient (FDR < 0.05):

print(colSums(abs(decideTests(ebfit))))

##                (Intercept)                   Plot_Row 
##                        121                          0 
##            injection_order              leaf_tissue_c 
##                         42                          8 
##                Treatment-P               GenotypeINV4 
##                         40                          0 
##  leaf_tissue_c:Treatment-P   Treatment-P:GenotypeINV4 
##                          2                          0 
## leaf_tissue_c:GenotypeINV4 
##                          0

Extract Results

Extract Predictor Effects

Purpose: Extract differential abundance statistics for biological predictors of interest.

#' Extract differential expression results for specified predictors
#'
#' @param ebfit An eBayes fitted model object from limma
#' @param predictor_map Named vector mapping coefficient names to display names
#'
#' @return Data frame with DE results for all specified predictors
extract_predictor_effects <- function(ebfit, predictor_map) {
  coef_names <- colnames(coef(ebfit))
  coef_indices <- match(names(predictor_map), coef_names)
  
  if (any(is.na(coef_indices))) {
    missing <- names(predictor_map)[is.na(coef_indices)]
    stop("Coefficients not found: ", paste(missing, collapse = ", "),
         "\nAvailable: ", paste(coef_names, collapse = ", "))
  }
  
  result_list <- lapply(seq_along(coef_indices), function(i) {
    idx <- coef_indices[i]
    tt <- topTable(ebfit, coef = idx, sort.by = "none", n = Inf)
    
    crit_value <- qt(0.975, ebfit$df.residual + ebfit$df.prior)
    std_errors <- ebfit$stdev.unscaled[, idx] * sqrt(ebfit$s2.post)
    
    tt$upper <- tt$logFC + crit_value * std_errors
    tt$lower <- tt$logFC - crit_value * std_errors
    tt$predictor <- predictor_map[i]
    tt$response <- rownames(tt)
    tt$std_err <- std_errors
    tt
  })
  
  results <- do.call(rbind, result_list)
  rownames(results) <- NULL
  
  results %>%
    select(predictor, response, everything()) %>%
    arrange(adj.P.Val)
}

Map Coefficients to Predictors

predictor_map <- c(
  "leaf_tissue_c" = "Leaf",
  "Treatment-P" = "-P",
  "leaf_tissue_c:Treatment-P" = "Leaf:-P"
)

cat("Available coefficients:\n")

## Available coefficients:

print(colnames(coef(ebfit)))

## [1] "(Intercept)"                "Plot_Row"                  
## [3] "injection_order"            "leaf_tissue_c"             
## [5] "Treatment-P"                "GenotypeINV4"              
## [7] "leaf_tissue_c:Treatment-P"  "Treatment-P:GenotypeINV4"  
## [9] "leaf_tissue_c:GenotypeINV4"

effects_df <- extract_predictor_effects(ebfit, predictor_map)

cat("\nTotal tests:", nrow(effects_df), "\n")

## 
## Total tests: 396

cat("Tests per predictor:\n")

## Tests per predictor:

print(table(effects_df$predictor))

## 
##      -P    Leaf Leaf:-P 
##     132     132     132

cat("\nSignificant per predictor (FDR < 0.05):\n")

## 
## Significant per predictor (FDR < 0.05):

print(table(effects_df$predictor[effects_df$adj.P.Val < 0.05]))

## 
##      -P    Leaf Leaf:-P 
##      40       8       2

Classify Differential Lipids

Purpose: Apply two-tier classification system for differential lipids.

Approach: - Tier 1: Significant (FDR < 0.05) - Tier 2: High-confidence (significant + effect size threshold)

Effect size thresholds: - Leaf predictors: |logFC| > 0.5 - Categorical predictors: |logFC| > 1.5

#' Classify differential lipids with predictor-specific thresholds
classify_differential_lipids <- function(effects_df) {
  leaf_predictors <- c("Leaf", "Leaf:-P")
  
  effects_df %>%
    mutate(
      is_DL = adj.P.Val < 0.05,
      is_hiconf_DL = case_when(
        predictor %in% leaf_predictors & is_DL & abs(logFC) > 0.5 ~ TRUE,
        predictor == "-P" & is_DL & abs(logFC) > 1.5 ~ TRUE,
        .default = FALSE
      ),
      regulation = case_when(
        !is_hiconf_DL ~ "Not significant",
        logFC > 0 ~ "Upregulated",
        logFC < 0 ~ "Downregulated",
        TRUE ~ "Not significant"
      ),
      neglogP = -log10(adj.P.Val),
      label = if_else(is_DL, response, "")
    )
}

effect_order <- c("Leaf", "-P", "Leaf:-P")

effects_df <- effects_df %>%
  mutate(predictor = factor(predictor, levels = effect_order)) %>%
  classify_differential_lipids()

effects_df$label <- sub("_", "", effects_df$label, perl = TRUE)
effects_df$label <- sub("_", ":", effects_df$label, perl = TRUE)

Summary Statistics

cat("=== Classification Summary ===\n")

## === Classification Summary ===

cat("Total tests:", nrow(effects_df), "\n\n")

## Total tests: 396

cat("Tier 1 - Significant DLs (FDR < 0.05):\n")

## Tier 1 - Significant DLs (FDR < 0.05):

print(table(effects_df$predictor, effects_df$is_DL))

##          
##           FALSE TRUE
##   Leaf      124    8
##   -P         92   40
##   Leaf:-P   130    2

cat("\nTier 2 - High-confidence DLs:\n")

## 
## Tier 2 - High-confidence DLs:

print(table(effects_df$predictor, effects_df$is_hiconf_DL))

##          
##           FALSE TRUE
##   Leaf      124    8
##   -P        105   27
##   Leaf:-P   130    2

cat("\nDirection of significant effects:\n")

## 
## Direction of significant effects:

print(with(effects_df %>% filter(is_DL), 
           table(predictor, regulation)))

##          regulation
## predictor Downregulated Not significant Upregulated
##   Leaf                3               0           5
##   -P                 12              13          15
##   Leaf:-P             1               0           1

Export Results

write.csv(
  effects_df,
  file = "~/Desktop/lipid_differential_abundance_TIC_normalized.csv",
  row.names = FALSE
)

write.csv(
  molar_ion_fractions,
  file = "~/Desktop/lipid_molar_ion_fractions.csv",
  row.names = TRUE
)

write.csv(
  voomR$E,
  file = "~/Desktop/voom_log2_TIC_normalized_lipids.csv",
  row.names = TRUE
)

cat("=== Analysis Complete ===\n")
cat("Results exported to Desktop\n")

Volcano Plots

Custom Legend Function

Purpose: Create custom legend key displaying line segments instead of points.

#' Custom key glyph for legend
#'
#' @param data Aesthetic data for the legend key
#' @param params Additional parameters
#' @param size Size of the key
#'
#' @return A grid grob object
draw_key_custom <- function(data, params, size) {
  colour <- data$colour
  data <- data[data$colour == colour, ]
  grid::segmentsGrob(
    0, 1 / 2, 1, 1 / 2,
    gp = grid::gpar(
      col = data$colour,
      lwd = data$linewidth * 4
    )
  )
}

Prepare Plot Data

Purpose: Join differential abundance results with lipid class annotations.

plot_data <- effects_df %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))

label_data <- plot_data %>%
  filter(is_hiconf_DL)

force <- 1
label_size <- 6
base_size <- 30

Three-Panel Volcano Plot

Purpose: Visualize differential abundance across all three predictors with faceted layout.

Approach: Create volcano plots for Leaf, -P, and Leaf:-P predictors with staged label placement to avoid overlaps. Downregulated lipids labeled on left, upregulated on right.

Expected outcome: Publication-quality three-panel figure showing effect sizes and significance levels with lipid class color coding.

force <- 3

predictor_labels <- c( "Leaf", "-P", "Leaf × P") 
names(predictor_labels) <- levels(plot_data$predictor)
 

lipid_volcano_3 <- plot_data %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "Leaf", regulation == "Downregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    segment.color = "grey",
    min.segment.length = 0,
    xlim = c(NA, -0.5),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "Leaf", regulation == "Upregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    segment.color = "grey",
    min.segment.length = 0,
    xlim = c(0.5, NA),
    #max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "-P", regulation == "Downregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    direction = "y",
    segment.color = "grey",
    xlim = c(NA, -5),
    #max.overlaps = 0,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "-P", regulation == "Upregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    direction = "y",
    segment.color = "grey",
    xlim = c(3, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "Leaf:-P", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    segment.color = "grey",
    min.segment.length = 0,
    max.overlaps = 2,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycoglycerolipid", "Neutral lipid", "Glycerophospholipid")
  ) +
  facet_wrap(. ~ predictor,
             labeller = labeller(predictor = predictor_labels), 
             scales = "free_x", ncol = 3) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5)),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
  scale_x_continuous(expand = expansion(mult = c(0.5, 0.3))) +
  theme_classic2(base_size = base_size) +
  theme(
    plot.title = element_blank(),
    strip.text = element_markdown(size = 35),
    strip.background = element_blank(),
    legend.position = c(0.85, 0.95),
    legend.background = element_rect(fill = "transparent"),
    axis.title.x = element_blank(),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 20),
    axis.title.y = element_text(margin = margin(r = 5,"pt")),
    # plot.margin = margin(5.5, 5.5, 0, 5.5, "pt"),
  )
ggsave(
  filename = "~/Desktop/lipid_volcano_3_TIC.png",
  plot = lipid_volcano_3,
  height = 6,
  width = 12,
  units = "in",
  dpi = 150
)

saveRDS(lipid_volcano_3, "../results/lipid_volcano_3_TIC.rds")

print(lipid_volcano_3)

Two-Panel Volcano Plot

Purpose: Focused visualization of Leaf and -P effects without interaction term.

Approach: Subset to primary biological predictors and create two-panel layout with optimized label placement.

plot_data_2 <- effects_df %>%
  filter(predictor != "Leaf:-P") %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))

volcano_2 <- plot_data_2 %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Bellow threshold", "Upregulated")
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "Leaf", regulation == "Downregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    segment.color = "grey",
    min.segment.length = 0,
    xlim = c(NA, -0.5),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "Leaf", regulation == "Upregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    segment.color = "grey",
    min.segment.length = 0,
    xlim = c(0.5, NA),
    #max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "-P", regulation == "Downregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    direction = "y",
    segment.color = "grey",
    xlim = c(NA, -5),
    #max.overlaps = 0,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = label_data %>% 
      filter(predictor == "-P", regulation == "Upregulated", 
             !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = label_size,
    direction = "y",
    segment.color = "grey",
    xlim = c(3, NA),
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycoglycerolipid", "Neutral lipid", "Glycerophospholipid")
  ) +
  facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5), 
                         reverse = TRUE),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
  scale_x_continuous(expand = expansion(mult = c(0.3, 0.02))) +
  theme_classic2(base_size = base_size) +
  theme(
    plot.title = element_blank(),
    strip.text = element_markdown(size = 35),
    strip.background = element_blank(),
    axis.title = element_text(size = 25),
    legend.position = c(0.19, 0.99),
    legend.background = element_rect(fill = "transparent"),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 20),
  )

print(volcano_2)

saveRDS(volcano_2, "../results/lipid_volcano_2_TIC.rds")
ggsave(
  filename = "~/Desktop/lipid_volcano_2_TIC.png",
  plot = volcano_2,
  height = 6,
  width = 10,
  units = "in",
  dpi = 150
)

One-Panel Volcano Plot: Interaction Term

Purpose: Highlight lipids showing differential response to phosphorus treatment across leaf developmental stages.

Approach: Isolate Leaf:-P interaction term and create single volcano plot without x-axis label restrictions.

plot_data_leafP <- effects_df %>%
  filter(predictor == "Leaf:-P") %>%
  droplevels() %>%
  inner_join(sp, by = c(response = "colname"))

volcano_leafP <- plot_data_leafP %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  ylim(0, 5) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Bellow threshold", "Upregulated")
  ) +
  geom_text_repel(
    data = plot_data_leafP %>% 
      filter(regulation == "Downregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = 7,
    segment.color = "grey",
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  geom_text_repel(
    data = plot_data_leafP %>% 
      filter(regulation == "Upregulated", !is.na(label), label != ""),
    force = force,
    fontface = "bold",
    size = 7,
    segment.color = "grey",
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("orange2", "darkred", "darkblue"),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5), 
                         reverse = TRUE),
    fill = "none"
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
  theme_classic2(base_size = base_size) +
  theme(
    plot.title = element_blank(),
    strip.text = element_markdown(size = 35),
    legend.position = "none",
    legend.background = element_rect(fill = "transparent"),
    axis.title = element_text(size = 25),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 20),
    axis.title.y = element_text(margin = margin(r = -5)),
    strip.background = element_blank()
  )

print(volcano_leafP)

ggsave(
  filename = "~/Desktop/lipid_volcano_leafP_TIC.png",
  plot = volcano_leafP,
  height = 7,
  width = 7,
  units = "in",
  dpi = 150
)

Export High-Confidence Lipids

write.csv(
  label_data,
  file = "~/Desktop/high_confidence_DLs_TIC.csv",
  row.names = FALSE
)

cat("Volcano plots saved:\n")

## Volcano plots saved:

cat("  - lipid_volcano_3_TIC.png (3-panel, 12×6 in, 300 dpi)\n")

##   - lipid_volcano_3_TIC.png (3-panel, 12×6 in, 300 dpi)

cat("  - lipid_volcano_2_TIC.png (2-panel, 5×3 in, 150 dpi)\n")

##   - lipid_volcano_2_TIC.png (2-panel, 5×3 in, 150 dpi)

cat("  - lipid_volcano_leafP_TIC.png (1-panel, 7×7 in, 150 dpi)\n")

##   - lipid_volcano_leafP_TIC.png (1-panel, 7×7 in, 150 dpi)

cat("  - high_confidence_DLs_TIC.csv\n")

##   - high_confidence_DLs_TIC.csv

Interaction Plots: Leaf × Phosphorus

Identify Leaf:-P Interaction Lipids

Purpose: Extract high-confidence lipids showing significant interaction between leaf developmental stage and phosphorus treatment.

interaction_lipids <- effects_df %>%
  filter(predictor == "Leaf:-P", is_hiconf_DL) %>%
  arrange(adj.P.Val)

cat("High-confidence Leaf:-P interaction lipids:", 
    nrow(interaction_lipids), "\n")

## High-confidence Leaf:-P interaction lipids: 2

print(interaction_lipids %>% 
        select(response, logFC, adj.P.Val, regulation))

##   response      logFC   adj.P.Val    regulation
## 1  PC_34_2 -0.5753961 0.008235879 Downregulated
## 2  TG_54_9  1.2513465 0.008235879   Upregulated

interaction_lipid_ids <- interaction_lipids$response

Data Transformation for Visualization

Purpose: Convert analysis-ready data back to biologically interpretable units for visualization.

Approach: The statistical analysis uses voom-normalized log2(CPM) values. For visualization, we convert these back to total ion current (TIC) fractions, which represent each lipid’s proportion of the total detected lipid pool.

Three possible input formats: 1. Raw counts: Peak areas directly from MS-DIAL 2. TIC fractions: Proportions summing to 1.0 per sample (molar ion fractions) 3. log2(CPM): Voom-normalized values used in statistical models

cat("=== Available Data Formats ===\n")

## === Available Data Formats ===

cat("1. Raw counts (y$counts): Peak areas from MS detector\n")

## 1. Raw counts (y$counts): Peak areas from MS detector

cat("2. TIC fractions: Proportions of total signal per sample\n")

## 2. TIC fractions: Proportions of total signal per sample

cat("3. log2(CPM) (voomR$E): Voom-normalized for statistics\n\n")

## 3. log2(CPM) (voomR$E): Voom-normalized for statistics

cat("Statistical analysis used:", class(voomR$E), "\n")

## Statistical analysis used: matrix array

cat("For visualization, we convert to TIC fractions\n")

## For visualization, we convert to TIC fractions

Conversion Functions

Purpose: Provide explicit functions to convert between data formats.

#' Convert raw counts to TIC fractions
#'
#' @param count_matrix Matrix with raw peak areas (lipids × samples)
#'
#' @return Matrix of TIC fractions (0-1 scale)
counts_to_TIC_fraction <- function(count_matrix) {
  TIC_fractions <- sweep(count_matrix, 2, colSums(count_matrix), FUN = "/")
  
  sample_sums <- colSums(TIC_fractions)
  if (!all(abs(sample_sums - 1) < 1e-10)) {
    warning("TIC fractions don't sum to 1. Range: ", 
            paste(range(sample_sums), collapse = " - "))
  }
  
  TIC_fractions
}

#' Convert log2(CPM) to TIC fractions
#'
#' @param log2_cpm_matrix Matrix with log2(CPM) values from voom
#' @param pseudo_count Pseudo-count added before log transformation
#'
#' @return Matrix of TIC fractions (0-1 scale)
log2cpm_to_TIC_fraction <- function(log2_cpm_matrix, pseudo_count = 1e-9) {
  # Back-transform from log2(CPM)
  cpm <- 2^log2_cpm_matrix - pseudo_count
  
  # CPM to counts (proportional, scaling doesn't matter for fractions)
  # Since CPM = (counts/library_size) * 1e6
  # We can work directly with CPM as pseudo-counts
  counts_proxy <- cpm
  
  # Convert to TIC fractions
  TIC_fractions <- sweep(counts_proxy, 2, colSums(counts_proxy), FUN = "/")
  
  TIC_fractions
}

#' Convert TIC fractions to log10 for visualization
#'
#' @param TIC_fraction_matrix Matrix of TIC fractions (0-1 scale)
#' @param offset Small value to avoid log(0)
#'
#' @return Matrix of log10(TIC fraction)
TIC_fraction_to_log10 <- function(TIC_fraction_matrix, offset = 1e-9) {
  log10(TIC_fraction_matrix + offset)
}

Apply Conversions

Purpose: Transform voom-normalized data to TIC fractions for interpretable visualization.

Approach: 1. Start with voomR$E (log2 CPM from statistical analysis) 2. Back-transform to pseudo-counts 3. Convert to TIC fractions (proportions) 4. Apply log10 transformation for plotting

cat("=== Data Transformation Pipeline ===\n")

## === Data Transformation Pipeline ===

cat("Input: voomR$E (log2 CPM)\n")

## Input: voomR$E (log2 CPM)

# Step 1: Convert log2(CPM) to TIC fractions
TIC_fraction <- log2cpm_to_TIC_fraction(
  log2_cpm_matrix = voomR$E,
  pseudo_count = 0
)

cat("Intermediate: TIC fractions\n")

## Intermediate: TIC fractions

cat("  Range:", range(TIC_fraction, na.rm = TRUE), "\n")

##   Range: 5e-10 0.3637006

cat("  Sample sums:", 
    range(colSums(TIC_fraction, na.rm = TRUE)), "\n")

##   Sample sums: 1 1

# Step 2: Log10 transform for visualization
log10_TIC <- TIC_fraction_to_log10(
  TIC_fraction_matrix = TIC_fraction,
  offset = 1e-9
)

cat("Output: log10(TIC fraction)\n")

## Output: log10(TIC fraction)

cat("  Range:", range(log10_TIC, na.rm = TRUE), "\n")

##   Range: -8.823909 -0.4392559

Plotting Function: Lipid Trajectories

Purpose: Visualize lipid abundance trajectories across leaf stages, separated by phosphorus treatment.

Input format: The function accepts any of: - log10(TIC fraction) - recommended for visualization - log2(CPM) - directly from voom - TIC fraction - will apply log10 internally

#' Plot lipid trajectories by treatment
#'
#' @param lipid_data Matrix or data frame with lipid abundances
#'   Can be: log10(TIC fraction), log2(CPM), or TIC fraction
#' @param lipid_ids Character vector of lipid IDs to plot
#' @param metadata Data frame with sample information (tube, leaf_tissue, 
#'   Treatment columns required)
#' @param data_type Character: "log10_TIC", "log2_CPM", or "TIC_fraction"
#'
#' @return ggplot object with treatment-colored trajectories
plot_lipid_by_treatment <- function(lipid_data,
                                     lipid_ids,
                                     metadata,
                                     data_type = "log10_TIC") {
  
  # Validate data type
  stopifnot(
    data_type %in% c("log10_TIC", "log2_CPM", "TIC_fraction")
  )
  
  # Extract and format data
  plot_data <- lipid_data %>%
    as.data.frame() %>%
    tibble::rownames_to_column("lipid") %>%
    filter(lipid %in% lipid_ids) %>%
    pivot_longer(
      cols = -lipid,
      names_to = "tube", 
      values_to = "abundance"
    ) %>%
    left_join(
      metadata %>% select(tube, leaf_tissue, Treatment),
      by = "tube"
    ) %>%
    filter(!is.na(Treatment)) %>%
    mutate(
      Treatment = factor(
        Treatment,
        levels = c("High_P", "Low_P"),
        labels = c("+P", "-P")
      )
    )
  
  # Format lipid labels: PC_34_2 -> PC 34:2
  plot_data$lipid_label <- sub("_", "", plot_data$lipid, perl = TRUE)
  plot_data$lipid_label <- sub("_", ":", plot_data$lipid_label, perl = TRUE)
  
  # Maintain input order
  lipid_levels <- sub("_", "", lipid_ids, perl = TRUE)
  lipid_levels <- sub("_", ":", lipid_levels, perl = TRUE)
  
  plot_data$lipid_label <- factor(
    plot_data$lipid_label,
    levels = lipid_levels
  )
  
  # Calculate summary statistics
  summary_data <- plot_data %>%
    group_by(lipid, lipid_label, leaf_tissue, Treatment) %>%
    summarise(
      mean = mean(abundance, na.rm = TRUE),
      se = sd(abundance, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    )
  
  # Set y-axis label based on input data type
  y_label <- switch(
    data_type,
    "log10_TIC" = expression(log[10]("TIC fraction")),
    "log2_CPM" = expression(log[2]("CPM")),
    "TIC_fraction" = "TIC fraction"
  )
  
  # Plot parameters
  base_size <- 35
  pd <- position_dodge(width = 0.2)
  
  # Create plot
  summary_data %>%
    ggplot(aes(
      x = leaf_tissue,
      y = mean,
      color = Treatment,
      fill = Treatment,
      linetype = Treatment,
      shape = Treatment,
      group = Treatment
    )) +
    geom_line(linewidth = 1.5) +
    geom_point(position = pd, size = 5) +
    geom_errorbar(
      aes(ymin = mean - se, ymax = mean + se),
      position = pd,
      width = 0.2,
      linewidth = 1,
      linetype = "solid"
    ) +
    scale_color_manual(
      values = c("+P" = "gold", "-P" = "purple4")
    ) +
    scale_fill_manual(
      values = c("+P" = "gold", "-P" = "purple4")
    ) +
    scale_linetype_manual(
      values = c("+P" = "solid", "-P" = "dashed")
    ) +
    scale_shape_manual(
      values = c("+P" = 19, "-P" = 25)
    ) +
    facet_wrap(~ lipid_label, scales = "free_y", ncol = 2) +
    labs(
      x = "Leaf",
      y = y_label
    ) +
    scale_y_continuous(
      expand = expansion(add = c(0.1, 0.1))
    ) +
    guides(
      fill = "none",
      linetype = "none",
      shape = guide_legend(
        override.aes = list(
          fill = c("gold","purple4"),
          color= c("gold","purple4"),
          linetype = "solid",
          size = 5)
      ),
    ) +
    theme_classic(base_size = base_size) +
    theme(
      strip.background = element_blank(),
      strip.text = element_text(face = "bold", size = 25),
      # legend.position = c(0.1, 0.95),
      legend.title = element_blank(),
      legend.background = element_rect(fill = "transparent")
    )
}

Plot Leaf:-P Interaction Lipids

Purpose: Visualize high-confidence lipids showing significant interaction between leaf stage and phosphorus treatment.

Approach: Plot log10(TIC fraction) to show proportions on interpretable scale. Divergent trajectories between treatments indicate interaction.

Expected outcome: Treatment-specific responses across leaf developmental gradient, revealing context-dependent regulation.

#' Create horizontal 6-panel lipid trajectory plot with lipid class colors
#'
#' @param lipid_data Matrix with lipid abundances (log10 TIC fraction)
#' @param effects_df Data frame with differential abundance results
#' @param metadata Sample metadata with tube, leaf_tissue, Treatment
#' @param sp Data frame with lipid class annotations
#' @param data_type Character indicating input data format
#'
#' @return ggplot object with 6-panel horizontal layout
create_trajectory_6panel <- function(lipid_data,
                                      effects_df,
                                      metadata,
                                      sp,
                                      data_type = "log10_TIC",
                                     nrow=1,
                                     ncol=6) {
  
  # Select top lipid per predictor×regulation combination
  selected_lipids <- effects_df %>%
    filter(is_hiconf_DL) %>%
    group_by(predictor, regulation) %>%
    slice_min(adj.P.Val, n = 1, with_ties = FALSE) %>%
    ungroup() %>%
    mutate(
      panel_label = case_when(
        predictor == "Leaf" & regulation == "Downregulated" ~ "Leaf Down",
        predictor == "Leaf" & regulation == "Upregulated" ~ "Leaf Up",
        predictor == "-P" & regulation == "Downregulated" ~ "-P Down",
        predictor == "-P" & regulation == "Upregulated" ~ "-P Up",
        predictor == "Leaf:-P" & regulation == "Downregulated" ~ 
          "Negative\nLeaf × -P",
        predictor == "Leaf:-P" & regulation == "Upregulated" ~ 
          "Positive\nLeaf × -P"
      )
    ) %>%
    arrange(predictor, desc(regulation)) %>%
    select(response, predictor, regulation, panel_label, logFC, adj.P.Val)
  
  # Join with lipid class annotations
  selected_lipids <- selected_lipids %>%
    left_join(sp, by = c("response" = "colname"))
  
  # Format lipid labels for titles
  selected_lipids$lipid_label <- sub("_", "", selected_lipids$response, 
                                       perl = TRUE)
  selected_lipids$lipid_label <- sub("_", ":", selected_lipids$lipid_label, 
                                       perl = TRUE)
  
  # Combine panel label with lipid name
  selected_lipids$full_label <- paste0(selected_lipids$panel_label, "\n", 
                                        selected_lipids$lipid_label)
  
  # Extract abundances and join with metadata
  plot_data <- lipid_data %>%
    as.data.frame() %>%
    tibble::rownames_to_column("lipid") %>%
    filter(lipid %in% selected_lipids$response) %>%
    pivot_longer(
      cols = -lipid,
      names_to = "tube",
      values_to = "abundance"
    ) %>%
    left_join(
      metadata %>% select(tube, leaf_tissue, Treatment),
      by = "tube"
    ) %>%
    filter(!is.na(Treatment)) %>%
    mutate(
      Treatment = factor(
        Treatment,
        levels = c("High_P", "Low_P"),
        labels = c("+P", "-P")
      )
    ) %>%
    left_join(
      selected_lipids %>% select(response, full_label, head_group),
      by = c("lipid" = "response")
    )
  
  # Set panel order
  panel_order <- c(
    paste0("Leaf Down\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "Leaf Down"])),
    paste0("Leaf Up\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "Leaf Up"])),
    paste0("-P Down\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "-P Down"])),
    paste0("-P Up\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "-P Up"])),
    paste0("Negative\nLeaf × -P\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "Negative\nLeaf × -P"])),
    paste0("Positive\nLeaf × -P\n", unique(selected_lipids$lipid_label[
      selected_lipids$panel_label == "Positive\nLeaf × -P"]))
  )
  
  plot_data$full_label <- factor(
    plot_data$full_label,
    levels = panel_order
  )
  
  # Calculate summary statistics
  summary_data <- plot_data %>%
    group_by(lipid, full_label, head_group, leaf_tissue, Treatment) %>%
    summarise(
      mean = mean(abundance, na.rm = TRUE),
      se = sd(abundance, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    )
  
  # Y-axis label
  y_label <- switch(
    data_type,
    "log10_TIC" = expression(log[10]("fraction")),
    "log2_CPM" = expression(log[2]("CPM")),
    "TIC_fraction" = "TIC fraction"
  )
  
  # Plot
  base_size <- 30
  pd <- position_dodge(width = 0.2)
  
  summary_data %>%
    ggplot(aes(
      x = leaf_tissue,
      y = mean,
      color = head_group,
      fill = head_group,
      linetype = Treatment,
      shape = Treatment,
      group = Treatment
    )) +
    geom_line(linewidth = 1.2) +
    geom_point(position = pd, size = 4) +
    geom_errorbar(
      aes(ymin = mean - se, ymax = mean + se),
      position = pd,
      width = 0.2,
      linewidth = 0.8,
      linetype = "solid"
    ) +
    scale_color_manual(
      name = "",
      values = c("glycolipid" = "darkblue", "neutral" = "orange2", 
                 "phospholipid" = "darkred"),
      labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
    ) +
    scale_fill_manual(
      name = "",
      values = c("glycolipid" = "darkblue", "neutral" = "orange2", 
                 "phospholipid" = "darkred"),
      labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
    ) +
    scale_linetype_manual(
      values = c("+P" = "solid", "-P" = "dashed")
    ) +
    scale_shape_manual(
      values = c("+P" = 19, "-P" = 25)
    ) +
    facet_wrap(
      ~ full_label,
      scales = "free_y",
      nrow = nrow,
      ncol = ncol
    ) +
    labs(
      x = "Leaf",
      y = y_label
    ) +
    scale_y_continuous(
      expand = expansion(mult = c(0.1, 0.1))
    ) +
    guides(
      color = "none",
      fill = "none",
      linetype = guide_legend(
        title = element_blank(),
        override.aes = list(color = "black", linewidth = 1.5)
      ),
      shape = guide_legend(
        title = element_blank(),
        override.aes = list(fill = "black", color = "black", size = 5)
      )
    ) +
    theme_classic(base_size = base_size) +
    theme(
      strip.background = element_blank(),
      strip.text = element_text( size = 25),
      legend.position = "top",
      legend.title = element_blank(),
      legend.background = element_rect(fill = "transparent"),
      panel.spacing = unit(0.5, "lines")
    )
}


p_trajectory_6 <- create_trajectory_6panel(
  lipid_data = log10_TIC,
  effects_df = effects_df,
  metadata = sampleInfo,
  sp = sp,
  data_type = "log10_TIC"
)

ggsave(
  filename = "~/Desktop/lipid_trajectory_6panel.png",
  plot = p_trajectory_6,
  width = 18,
  height = 6,
  dpi = 300
)

saveRDS(p_trajectory_6, "../results/lipid_trajectory_6panel.rds")

# Print Selected Lipids
selected_for_plot <- effects_df %>%
  filter(is_hiconf_DL) %>%
  group_by(predictor, regulation) %>%
  slice_min(adj.P.Val, n = 1, with_ties = FALSE) %>%
  ungroup() %>%
  left_join(sp, by = c("response" = "colname")) %>%
  arrange(predictor, desc(regulation)) %>%
  select(predictor, regulation, response, class, head_group, logFC, adj.P.Val)

cat("\n=== Lipids Selected for 6-Panel Plot ===\n")

## 
## === Lipids Selected for 6-Panel Plot ===

print(selected_for_plot)

## # A tibble: 6 × 7
##   predictor regulation    response  class head_group    logFC adj.P.Val
##   <fct>     <chr>         <chr>     <chr> <chr>         <dbl>     <dbl>
## 1 Leaf      Upregulated   LPC_18_3  LPC   phospholipid  1.47   0.00200 
## 2 Leaf      Downregulated DG_36_4   DG    neutral      -0.560  0.00164 
## 3 -P        Upregulated   DGGA_36_4 DGGA  glycolipid    2.03   0.00139 
## 4 -P        Downregulated LPC_18_2  LPC   phospholipid -3.77   0.000567
## 5 Leaf:-P   Upregulated   TG_54_9   TG    neutral       1.25   0.00824 
## 6 Leaf:-P   Downregulated PC_34_2   PC    phospholipid -0.575  0.00824

create_trajectory_6panel(
  lipid_data = log10_TIC,
  effects_df = effects_df,
  metadata = sampleInfo,
  nrow=2,
  ncol=3,
  sp = sp,
  data_type = "log10_TIC"
)

# Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggtext_0.1.2  ggpubr_0.6.2  ggrepel_0.9.6 forcats_1.0.1 tidyr_1.3.1  
## [6] dplyr_1.1.4   ggplot2_4.0.0 edgeR_4.7.6   limma_3.65.7 
## 
## loaded via a namespace (and not attached):
##  [1] utf8_1.2.6         sass_0.4.10        generics_0.1.4     rstatix_0.7.3     
##  [5] xml2_1.4.0         stringi_1.8.7      lattice_0.22-7     digest_0.6.37     
##  [9] magrittr_2.0.4     evaluate_1.0.5     grid_4.5.1         RColorBrewer_1.1-3
## [13] fastmap_1.2.0      jsonlite_2.0.0     backports_1.5.0    Formula_1.2-5     
## [17] purrr_1.1.0        viridisLite_0.4.2  scales_1.4.0       textshaping_1.0.4 
## [21] jquerylib_0.1.4    abind_1.4-8        cli_3.6.5          rlang_1.1.6       
## [25] litedown_0.7       commonmark_2.0.0   cowplot_1.2.0      withr_3.0.2       
## [29] cachem_1.1.0       yaml_2.3.10        tools_4.5.1        ggsignif_0.6.4    
## [33] locfit_1.5-9.12    broom_1.0.10       vctrs_0.6.5        R6_2.6.1          
## [37] lifecycle_1.0.4    stringr_1.5.2      car_3.1-3          ragg_1.5.0        
## [41] pkgconfig_2.0.3    pillar_1.11.1      bslib_0.9.0        gtable_0.3.6      
## [45] glue_1.8.0         Rcpp_1.1.0         systemfonts_1.3.1  statmod_1.5.1     
## [49] xfun_0.53          tibble_3.3.0       tidyselect_1.2.1   rstudioapi_0.17.1 
## [53] knitr_1.50         farver_2.1.2       htmltools_0.5.8.1  labeling_0.4.3    
## [57] rmarkdown_2.30     carData_3.0-5      compiler_4.5.1     S7_0.2.0          
## [61] markdown_2.0       gridtext_0.1.5

Lipid Differential Abundance Analysis

Maize Genetics Analysis

2025-11-10