Overview

This analysis performs differential expression analysis on lipid profiles from maize leaf samples across multiple experimental factors:

  • Leaf tissue position: Continuous gradient from apical to basal (leaf 1-4)
  • Phosphorus treatment: High P (+P) vs Low P (-P)
  • Genotype: Control (CTRL) vs Inversion 4m (INV4M)

The analysis uses limma-voom to identify lipids that respond to:

  1. leaf: Leaf tissue position effect
  2. -P: Phosphorus deficiency effect
  3. Inv4m: Genotype effect
  4. -P:Inv4m: Interaction between P treatment and genotype

Fold change thresholds:

  • Leaf effect: ±0.7 log2FC
  • Other effects: ±2 log2FC

Libraries

library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(ggpubr)
library(ggtext)

Data Loading and Preprocessing

Define Internal Standards to Exclude

# Internal standards and odd-chain lipids to exclude
to_exclude <- c(
  "DG_12_0", "LPC_17_0", "LPE_17_1",
  "PC_25_0", "PG_17_0", "SM_35_1",
  "Sphingosine_17_1", "TG_17_0",
  "TG_57_6"  # exclude the odd number lipid
)

Load Raw Lipid Data

# Load MS-Dial raw data with new internal standard integration
lipid_csv <- "../data/PSU_RawData_MSDial_NewStdInt_240422.csv"

raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf"))

# Remove internal standards and quality control samples
to_exclude <- to_exclude[to_exclude %in% colnames(raw)]

raw <- raw %>%
  select(-all_of(to_exclude)) %>%
  filter(!grepl("Methanol", sampleID)) %>%
  filter(!grepl("Pool", sampleID))

cat("Loaded", nrow(raw), "samples\n")
## Loaded 43 samples

Load Sample Metadata

# Load plant phenotypic data
plant_csv <- "/Users/fvrodriguez/Desktop/Desktop/22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv"

psu <- read.csv(plant_csv) %>%
  rename(Genotype = "Who.What", rowid = "P22.") %>%
  filter(Genotype %in% c("CTRL", "INV4M")) %>%
  droplevels()

# Load RNA-seq metadata
sampleInfo <- read.csv('../data/inv4mRNAseq_metadata.csv') %>%
  rename(Genotype = genotype) %>%
  rename(rowid = row)

# Merge metadata
sampleInfo <- psu %>%
  select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
  inner_join(sampleInfo) %>%
  filter(!is.na(DTA))

# Create leaf position groups
sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"

# Standardize sample IDs
sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)
sampleInfo$rowid <- sampleInfo$row

cat("Sample info loaded for", nrow(sampleInfo), "samples\n")
## Sample info loaded for 64 samples

Merge Lipid Data with Metadata

# Extract tube ID from raw data
raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)

# Merge phenotypic data with lipid measurements
pheno <- sampleInfo %>%
  select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue, leaf_group) %>%
  inner_join(
    raw %>%
      select(tube, DGDG_34_0:TG_58_5)
  ) %>%
  mutate(block = as.factor(Rep)) %>%
  droplevels() %>%
  pivot_wider(
    names_from = Rep,
    values_from = Rep,
    names_prefix = "block_",
    values_fn = function(x) 1,
    values_fill = 0
  ) %>%
  select(rowid, tube:Plot_Row, block, block_6:block_12, everything()) %>%
  mutate(Treatment = factor(Treatment, levels = c("High_P", "Low_P")))

# Simplify treatment labels
levels(pheno$Treatment) <- c("+P", "-P")

# Remove problematic sample
pheno <- pheno[-29, ]  # what's wrong with this sample?

cat("Final dataset:", nrow(pheno), "samples\n")
## Final dataset: 42 samples

Define Lipid Classifications

# Extract lipid measurements matrix
m <- pheno %>%
  select(DGDG_34_0:TG_58_5) %>%
  as.matrix()

# Define lipid head groups
head_group <- c(
  DG = "neutral",
  DGDG = "glycolipid",
  DGGA = "glycolipid",
  LPC = "phospholipid",
  LPE = "phospholipid",
  MGDG = "glycolipid",
  PC = "phospholipid",
  PE = "phospholipid",
  PG = "phospholipid",
  PI = "phospholipid",
  SQDG = "glycolipid",
  TG = "neutral"
)

# Create lipid species annotation
sp <- data.frame(
  colname = colnames(m),
  class = gsub("_.*", "", colnames(m), perl = TRUE)
)

sp$head_group <- head_group[sp$class]

# Summary of lipid classes
cat("\nLipid class distribution:\n")
## 
## Lipid class distribution:
print(with(sp, table(head_group, class)))
##               class
## head_group     DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
##   glycolipid    0   12    9   0   0    8  0  0  0  0    9  0
##   neutral      23    0    0   0   0    0  0  0  0  0    0 25
##   phospholipid  0    0    0   6   3    0 19 12  7  4    0  0

Quality Control

Create DGEList Object

Critical step: The counts matrix comes from pheno, but we need to ensure the sample metadata in the DGEList matches the processed pheno data, especially for Treatment levels which were renamed from “High_P”/“Low_P” to “+P”/“-P”.

# Prepare count matrix
counts <- pheno[, colnames(m)] %>%
  as.matrix() %>%
  t()

# Visualize data distribution
hist(counts, main = "Distribution of Lipid Abundances", xlab = "Abundance")

# Set column names to match sample IDs
colnames(counts) <- pheno$tube

cat("\nCount matrix dimensions:", dim(counts), "\n")
## 
## Count matrix dimensions: 137 42
cat("Lipids:", nrow(counts), "\n")
## Lipids: 137
cat("Samples:", ncol(counts), "\n")
## Samples: 42

Match Sample Metadata to Counts

# Extract sample names from counts matrix
sampleNames <- pheno$tube

# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
## 
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]

# CRITICAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
  pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
  levels = c("+P", "-P")
)

cat("\nTreatment levels after update:\n")
## 
## Treatment levels after update:
print(levels(sampleInfo_matched$Treatment))
## [1] "+P" "-P"
print(table(sampleInfo_matched$Treatment))
## 
## +P -P 
## 15 27

Create DGEList with Matched Metadata

# Create gene annotation
genes <- data.frame(gene = rownames(counts))

# Create DGEList with matched sample information
y <- DGEList(counts = counts, samples = sampleInfo_matched)

# Create sample groups from Treatment and Genotype
y$group <- interaction(y$samples$Treatment, y$samples$Genotype)

# Identify low count samples (library size < 1)
y$samples$lowCount <- y$samples$lib.size < 1

cat("\nDGEList created successfully\n")
## 
## DGEList created successfully
cat("Groups:", levels(y$group), "\n")
## Groups: +P.CTRL -P.CTRL +P.INV4 -P.INV4
cat("\nLibrary size summary:\n")
## 
## Library size summary:
print(summary(y$samples$lib.size))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 1.242e+10 4.205e+10 5.682e+10 5.905e+10 7.104e+10 1.114e+11
cat("\nLow count samples:\n")
## 
## Low count samples:
print(table(y$samples$lowCount))
## 
## FALSE 
##    42

Initial MDS Plot

# Compute MDS on all samples
mds <- plotMDS(y, pch = 21, label = y$samples$side_tag, plot = TRUE)
title("Initial MDS - All Samples")

Filter Samples by Library Size

# Create filtered object
y_filtered <- y

cat("\nSamples before filtering:", ncol(y_filtered), "\n")
## 
## Samples before filtering: 42
# Filter by sample (remove low count samples)
y_filtered_bySample <- y_filtered[, !y_filtered$samples$lowCount]

cat("Samples after filtering:", ncol(y_filtered_bySample), "\n")
## Samples after filtering: 42
# Diagnostic tables - check sample distribution
cat("\n=== Diagnostic Tables ===\n")
## 
## === Diagnostic Tables ===
cat("\nTreatment by Leaf Tissue:\n")
## 
## Treatment by Leaf Tissue:
print(table(
  y_filtered_bySample$samples$Treatment,
  y_filtered_bySample$samples$leaf_tissue
))
##     
##      1 2 3 4
##   +P 4 3 4 4
##   -P 8 4 8 7
cat("\nGenotype by Leaf Tissue:\n")
## 
## Genotype by Leaf Tissue:
print(table(
  y_filtered_bySample$samples$Genotype,
  y_filtered_bySample$samples$leaf_tissue
))
##       
##        1 2 3 4
##   CTRL 6 3 6 5
##   INV4 6 4 6 6
cat("\nTreatment by Genotype by Leaf Tissue:\n")
## 
## Treatment by Genotype by Leaf Tissue:
print(table(
  y_filtered_bySample$samples$Treatment,
  y_filtered_bySample$samples$Genotype,
  y_filtered_bySample$samples$leaf_tissue
))
## , ,  = 1
## 
##     
##      CTRL INV4
##   +P    2    2
##   -P    4    4
## 
## , ,  = 2
## 
##     
##      CTRL INV4
##   +P    1    2
##   -P    2    2
## 
## , ,  = 3
## 
##     
##      CTRL INV4
##   +P    2    2
##   -P    4    4
## 
## , ,  = 4
## 
##     
##      CTRL INV4
##   +P    2    2
##   -P    3    4

MDS After Filtering

# Compute MDS on filtered samples
mds2 <- plotMDS(
  y_filtered_bySample,
  pch = 21,
  label = y_filtered_bySample$samples$side_tag,
  bg = (as.factor(y_filtered_bySample$samples$lowCount) == TRUE) + 1,
  plot = TRUE
)
title("MDS After Filtering")

Prepare Data Frame for ggplot MDS Visualizations

CRITICAL: We need to use the ORIGINAL y_filtered object for the metadata to ensure all samples are included in the MDS visualization, but use the coordinates from the MDS computed on all samples.

# Extract sample metadata from filtered object
d <- y_filtered$samples

# Add MDS coordinates from initial MDS
d$x <- mds$x
d$y <- mds$y

# Verify metadata is present
cat("\n=== Verifying Metadata ===\n")
## 
## === Verifying Metadata ===
cat("\nTreatment levels in d:\n")
## 
## Treatment levels in d:
print(levels(d$Treatment))
## [1] "+P" "-P"
cat("\nTreatment counts:\n")
## 
## Treatment counts:
print(table(d$Treatment))
## 
## +P -P 
## 15 27
cat("\nGenotype levels in d:\n")
## 
## Genotype levels in d:
print(levels(factor(d$Genotype)))
## [1] "CTRL" "INV4"
cat("\nGenotype counts:\n")
## 
## Genotype counts:
print(table(d$Genotype))
## 
## CTRL INV4 
##   20   22
cat("\nLeaf tissue levels in d:\n")
## 
## Leaf tissue levels in d:
print(levels(factor(d$leaf_tissue)))
## [1] "1" "2" "3" "4"
cat("\nLeaf tissue counts:\n")
## 
## Leaf tissue counts:
print(table(d$leaf_tissue))
## 
##  1  2  3  4 
## 12  7 12 11
# Ensure factors are properly set
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$Rep <- factor(d$Rep)

MDS Plots with ggplot

# MDS by Treatment
p1 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = Treatment), size = 3) +
  ggtitle("MDS Plot: Treatment") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  theme_classic(base_size = 14)

# MDS by decimal time (collection time)
p2 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = decimal_time), size = 3) +
  ggtitle("MDS Plot: Collection Time") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  theme_classic(base_size = 14)

# MDS by Collector
p3 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = COLLECTOR), size = 3) +
  ggtitle("MDS Plot: Collector") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  theme_classic(base_size = 14)

# MDS by row ID and Treatment
p4 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = as.factor(rowid), shape = Treatment), size = 3) +
  ggtitle("MDS Plot: Row ID and Treatment") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  scale_color_discrete(name = "Row ID") +
  theme_classic(base_size = 14) +
  theme(legend.position = "none")

# MDS by Genotype
p5 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = Genotype), size = 3) +
  ggtitle("MDS Plot: Genotype") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  theme_classic(base_size = 14)

# MDS by Leaf Tissue and Treatment
p6 <- ggplot(d, aes(x = x, y = y)) +
  geom_point(aes(color = factor(leaf_tissue), shape = Treatment), size = 3) +
  ggtitle("MDS Plot: Leaf Tissue and Treatment") +
  scale_color_discrete(name = "Leaf Tissue") +
  xlab("Leading logFC dim 1") +
  ylab("Leading logFC dim 2") +
  theme_classic(base_size = 14)

ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)

Base R MDS Plots for Verification

par(mfrow = c(1, 2))

# MDS colored by Treatment
plot(
  mds2$x, mds2$y,
  pch = 21,
  bg = factor(y_filtered_bySample$samples$Treatment),
  col = 0,
  main = "MDS: Treatment",
  xlab = "Leading logFC dim 1",
  ylab = "Leading logFC dim 2"
)
legend(
  "topright",
  legend = levels(factor(y_filtered_bySample$samples$Treatment)),
  pch = 21,
  pt.bg = 1:nlevels(factor(y_filtered_bySample$samples$Treatment))
)

# MDS colored by Genotype
plot(
  mds2$x, mds2$y,
  col = factor(y_filtered_bySample$samples$Genotype),
  pch = 19,
  main = "MDS: Genotype",
  xlab = "Leading logFC dim 1",
  ylab = "Leading logFC dim 2"
)
legend(
  "topright",
  legend = levels(factor(y_filtered_bySample$samples$Genotype)),
  col = 1:nlevels(factor(y_filtered_bySample$samples$Genotype)),
  pch = 19
)

Differential Expression Analysis

Final Data Preparation for Model

CRITICAL: Before creating the design matrix, ensure Treatment levels are correctly set to match the original factor levels for proper interpretation.

# Verify Plot_Row is available
cat("\nPlot_Row present?", "Plot_Row" %in% colnames(d), "\n")
## 
## Plot_Row present? TRUE
cat("Plot_Row values:\n")
## Plot_Row values:
print(table(d$Plot_Row))
## 
##  1  2  3  4  5  6  7  8 
##  3  4  4  4 10  9  4  4
# Set Treatment with original levels for model matrix
# This ensures the contrast is interpreted correctly
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))

cat("\nTreatment levels for model:\n")
## 
## Treatment levels for model:
print(levels(d$Treatment))
## [1] "+P" "-P"
print(table(d$Treatment))
## 
## +P -P 
## 15 27
# IMPORTANT: Reset y_filtered_bySample to original y for modeling
# This ensures we use all samples (not just the filtered ones)
y_filtered_bySample <- y

Design Matrix and Normalization

# Create design matrix
# Model accounts for:
# - Plot_Row: spatial variation (row in field)
# - Plot_Column: spatial variation (column in field)  
# - Rep: block effects
# - leaf_tissue: continuous leaf position effect (1-4, apical to basal)
# - Treatment: P treatment (+P vs -P)
# - Genotype: CTRL vs INV4M
# - Treatment:Genotype: interaction between P and genotype
design <- model.matrix(
  ~ Plot_Row + Plot_Column + Rep + leaf_tissue + Treatment * Genotype,
  d
)

cat("\nDesign matrix dimensions:", dim(design), "\n")
## 
## Design matrix dimensions: 42 11
cat("\nCoefficients in model:\n")
## 
## Coefficients in model:
print(colnames(design))
##  [1] "(Intercept)"              "Plot_Row"                
##  [3] "Plot_Column"              "Rep6"                    
##  [5] "Rep7"                     "Rep11"                   
##  [7] "Rep12"                    "leaf_tissue"             
##  [9] "Treatment-P"              "GenotypeINV4"            
## [11] "Treatment-P:GenotypeINV4"
cat("\nDesign matrix summary:\n")
## 
## Design matrix summary:
print(head(design))
##     (Intercept) Plot_Row Plot_Column Rep6 Rep7 Rep11 Rep12 leaf_tissue
## S01           1        3           3    1    0     0     0           1
## S02           1        3           3    1    0     0     0           2
## S03           1        3           3    1    0     0     0           3
## S04           1        3           3    1    0     0     0           4
## S05           1        4           3    1    0     0     0           1
## S06           1        4           3    1    0     0     0           2
##     Treatment-P GenotypeINV4 Treatment-P:GenotypeINV4
## S01           1            0                        0
## S02           1            0                        0
## S03           1            0                        0
## S04           1            0                        0
## S05           1            1                        1
## S06           1            1                        1
# Calculate normalization factors
y_filtered_bySample <- calcNormFactors(y_filtered_bySample)

cat("\nNormalization factors calculated\n")
## 
## Normalization factors calculated
cat("Norm factors summary:\n")
## Norm factors summary:
print(summary(y_filtered_bySample$samples$norm.factors))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.5275  0.8574  1.0529  1.0237  1.1859  1.5273

Voom Transformation and Linear Modeling

# Apply voom transformation
voomR <- voom(y_filtered_bySample, design = design, plot = TRUE)

# Fit linear model
fit <- lmFit(voomR)
ebfit <- eBayes(fit)

cat("\nModel fitted successfully\n")
## 
## Model fitted successfully
cat("Number of lipids:", nrow(ebfit$coefficients), "\n")
## Number of lipids: 137

Extract Results for Each Predictor

# Extract results for coefficients 8-11
# Coef 8: leaf_tissue
# Coef 9: Treatment-P
# Coef 10: GenotypeINV4M
# Coef 11: Treatment-P:GenotypeINV4M

r <- list()

for (x in 8:11) {
  # Get top table for coefficient
  r[[as.character(x)]] <- topTable(
    ebfit,
    coef = x,
    sort.by = 'none',
    n = Inf
  )
  
  # Calculate confidence intervals
  cr <- qt(0.975, ebfit$df.residual + ebfit$df.prior) *
    ebfit$stdev.unscaled[, x] * sqrt(ebfit$s2.post)
  
  r[[as.character(x)]][["upper"]] <- r[[as.character(x)]][["logFC"]] + cr
  r[[as.character(x)]][["lower"]] <- r[[as.character(x)]][["logFC"]] - cr
}

# Assign predictor names and lipid IDs
r$`8`["predictor"] <- "leaf"
r$`8`["Response"] <- rownames(counts)
r$`9`["predictor"] <- "-P"
r$`9`["Response"] <- rownames(counts)
r$`10`["predictor"] <- "Inv4m"
r$`10`["Response"] <- rownames(counts)
r$`11`["predictor"] <- "-P:Inv4m"
r$`11`["Response"] <- rownames(counts)

# Combine all results
effects <- rbind(r$`8`, r$`9`, r$`10`, r$`11`)

cat("\nTotal number of tests:", nrow(effects), "\n")
## 
## Total number of tests: 548

Process Effects and Define Significance

# Define predictor order
effect_order <- c("leaf", "-P", "Inv4m", "-P:Inv4m")

# Apply predictor-specific fold change thresholds
effects <- effects %>%
  mutate(predictor = factor(predictor, levels = effect_order)) %>%
  mutate(neglogP = -log10(adj.P.Val)) %>%
  mutate(is_significant = adj.P.Val < 0.05) %>%
  mutate(upregulated = case_when(
    (predictor != "leaf") & (logFC >= 2) & is_significant ~ TRUE,
    (predictor == "leaf") & (logFC >= 0.7) & is_significant ~ TRUE,
    .default = FALSE
  )) %>%
  mutate(downregulated = case_when(
    (predictor != "leaf") & (logFC <= -2) & is_significant ~ TRUE,
    (predictor == "leaf") & (logFC <= -0.7) & is_significant ~ TRUE,
    .default = FALSE
  )) %>%
  mutate(
    regulation = case_when(
      is_significant & upregulated ~ "Upregulated",
      is_significant & downregulated ~ "Downregulated",
      .default = "Unregulated"
    ),
    is_DEG = is_significant & regulation != "Unregulated",
    label = ifelse(is_DEG, Response, "")
  )

# Format labels for plotting
effects$label <- sub("_", "", effects$label, perl = TRUE)
effects$label <- sub("_", ":", effects$label, perl = TRUE)

# Summary of differential lipids
cat("\nDifferentially expressed lipids per predictor:\n")
## 
## Differentially expressed lipids per predictor:
print(with(effects, table(predictor, regulation)))
##           regulation
## predictor  Downregulated Unregulated Upregulated
##   leaf                 4         124           9
##   -P                   9         122           6
##   Inv4m                0         137           0
##   -P:Inv4m             0         137           0

Volcano Plots

Custom Legend Key Function

# Custom key glyph for legend
draw_key_custom <- function(data, params, size) {
  colour <- data$colour
  data <- data[data$colour == colour, ]
  grid::segmentsGrob(
    0, 1 / 2, 1, 1 / 2,
    gp = grid::gpar(
      col = data$colour,
      lwd = data$linewidth * 2
    )
  )
}

Volcano Plot: All Three Predictors (Excluding Interaction)

volcano_all <- effects %>%
  filter(predictor != "-P:Inv4m") %>%
  droplevels() %>%
  inner_join(sp, by = c(Response = "colname")) %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  ylim(0, 10) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  geom_text_repel(
    force = 3,
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5)),
    fill = "none"
  ) +
  theme_classic2(base_size = 25) +
  theme(
    plot.title = element_blank(),
    strip.text = element_markdown(),
    legend.position = c(0.5, 0.95),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 15),
    legend.direction = "horizontal",
    strip.background = element_blank()
  )

print(volcano_all)

Volcano Plot: Leaf and -P Only (Excluding Inv4m)

This focused plot excludes Inv4m results to better visualize differential lipids in response to leaf tissue position and phosphorus deficiency.

volcano_leaf_p <- effects %>%
  filter(predictor %in% c("leaf", "-P")) %>%
  droplevels() %>%
  inner_join(sp, by = c(Response = "colname")) %>%
  ggplot(aes(
    x = logFC,
    y = -log10(adj.P.Val),
    color = head_group,
    fill = regulation,
    label = label
  )) +
  ylab(expression(-log[10](italic(FDR)))) +
  xlab(expression(log[2]("Fold Change"))) +
  ylim(0, 10) +
  geom_point(alpha = 0.7, shape = 21, color = "white") +
  scale_fill_manual(
    name = "",
    values = c("#00AFBB", "grey", "#bb0c00"),
    labels = c("Downregulated", "Not significant", "Upregulated")
  ) +
  geom_text_repel(
    force = 3,
    min.segment.length = 0,
    max.overlaps = 20,
    key_glyph = draw_key_custom
  ) +
  scale_color_manual(
    name = "",
    values = c("darkblue", "orange2", "darkred"),
    labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
  ) +
  facet_wrap(. ~ predictor, scales = "free_x", ncol = 2) +
  guides(
    color = guide_legend(override.aes = list(linewidth = 1.5)),
    fill = "none"
  ) +
  theme_classic2(base_size = 25) +
  theme(
    plot.title = element_blank(),
    strip.text = element_markdown(),
    legend.position = c(0.5, 0.95),
    legend.spacing = unit(0, "line"),
    legend.box.spacing = unit(0, "line"),
    legend.text = element_text(size = 15),
    legend.direction = "horizontal",
    strip.background = element_blank()
  )

print(volcano_leaf_p)

Effect Size Plots

Prepare Data for Effect Plots

# Filter significant effects and prepare for plotting
to_plot <- effects %>%
  mutate(Response = gsub("_glyco", "", Response)) %>%
  mutate(predictor = factor(
    predictor,
    levels = c("leaf", "-P", "Inv4m", "-P:Inv4m")
  )) %>%
  filter(P.Value < 0.05) %>%
  inner_join(sp, by = c(Response = "colname")) %>%
  mutate(sign = sign(logFC)) %>%
  ungroup() %>%
  group_by(Response) %>%
  mutate(max_effect = max(abs(logFC))) %>%
  ungroup() %>%
  group_by(predictor, head_group, class) %>%
  mutate(head_group = fct_reorder(head_group, adj.P.Val)) %>%
  ungroup() %>%
  group_by(predictor) %>%
  arrange(predictor, head_group, class, adj.P.Val) %>%
  mutate(.r = row_number()) %>%
  ungroup()

cat("\nSignificant effects per predictor:\n")
## 
## Significant effects per predictor:
print(with(to_plot, table(predictor, head_group)))
##           head_group
## predictor  glycolipid neutral phospholipid
##   leaf             20      15           32
##   -P               15      10           28
##   Inv4m             5       4           11
##   -P:Inv4m          6       9            9

Effect Size Plot: All Predictors

pd <- position_dodge(0.4)

effect_plot_all <- to_plot %>%
  droplevels() %>%
  mutate(Response = fct_reorder(Response, .r)) %>%
  ungroup() %>%
  ggplot(aes(
    x = logFC,
    y = Response,
    col = head_group
  )) +
  xlab("Effect (log2 Fold Change)") +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = pd, size = 3) +
  geom_errorbar(
    aes(xmin = upper, xmax = lower),
    position = pd,
    width = 0.2,
    size = 0.7
  ) +
  guides(
    shape = guide_legend(title = NULL),
    color = guide_legend(reverse = TRUE)
  ) +
  facet_wrap(. ~ predictor, scales = "free", ncol = 4) +
  scale_y_discrete(limits = rev) +
  scale_color_manual(values = c("blue", "red", "gold")) +
  theme_light(base_size = 15) +
  theme(
    legend.position = "top",
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(color = "black", size = 15),
    axis.title.y = element_blank(),
    axis.text.y = element_text(hjust = 0, face = "bold"),
    plot.caption = element_text(hjust = 0)
  )

print(effect_plot_all)

Effect Size Plot: Leaf and -P Comparison

# Create separate plots for leaf and -P
response_leaf <- to_plot %>%
  filter(predictor == "leaf") %>%
  droplevels() %>%
  mutate(Response = fct_reorder(Response, .r)) %>%
  ungroup() %>%
  ggplot(aes(
    x = logFC,
    y = Response,
    col = head_group
  )) +
  xlab("Effect (log2 Fold Change)") +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = pd, size = 3) +
  geom_errorbar(
    aes(xmin = upper, xmax = lower),
    position = pd,
    width = 0.2,
    size = 0.7
  ) +
  guides(
    shape = guide_legend(title = NULL),
    color = guide_legend(reverse = TRUE)
  ) +
  facet_wrap(. ~ predictor, scales = "free") +
  scale_y_discrete(limits = rev) +
  scale_color_manual(values = c("blue", "red", "gold")) +
  theme_light(base_size = 15) +
  theme(
    legend.position = "top",
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(color = "black", size = 15),
    axis.title.y = element_blank(),
    axis.text.y = element_text(hjust = 0, face = "bold"),
    plot.caption = element_text(hjust = 0)
  )

response_p <- to_plot %>%
  filter(predictor == "-P") %>%
  droplevels() %>%
  mutate(Response = fct_reorder(Response, .r)) %>%
  ungroup() %>%
  ggplot(aes(
    x = logFC,
    y = Response,
    col = head_group
  )) +
  xlab("Effect (log2 Fold Change)") +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point(position = pd, size = 3) +
  geom_errorbar(
    aes(xmin = upper, xmax = lower),
    position = pd,
    width = 0.2,
    size = 0.7
  ) +
  guides(
    shape = guide_legend(title = NULL),
    color = guide_legend(reverse = TRUE)
  ) +
  facet_wrap(. ~ predictor, scales = "free") +
  scale_y_discrete(limits = rev) +
  scale_color_manual(values = c("blue",  "gold","red")) +
  theme_light(base_size = 15) +
  theme(
    legend.position = "top",
    strip.background = element_rect(fill = "white"),
    strip.text = element_text(color = "black", size = 15),
    axis.title.y = element_blank(),
    axis.text.y = element_text(hjust = 0, face = "bold"),
    plot.caption = element_text(hjust = 0)
  )

ggarrange(response_leaf, response_p, ncol = 2, common.legend = TRUE)

Export Results

# Export differential expression results
write.csv(
  effects,
  file = "~/Desktop/lipid_differential_expression_results.csv",
  row.names = FALSE
)

# Save volcano plots
ggsave(
  volcano_all,
  file = "~/Desktop/lipid_volcano_all.svg",
  height = 6,
  width = 12
)

ggsave(
  volcano_leaf_p,
  file = "~/Desktop/lipid_volcano_leaf_P.svg",
  height = 6,
  width = 10
)

# Save effect plots
ggsave(
  effect_plot_all,
  file = "~/Desktop/lipid_effects_all.svg",
  height = 10,
  width = 12
)

Summary Statistics

# Count differential lipids by class and regulation
summary_table <- effects %>%
  filter(is_DEG) %>%
  inner_join(sp, by = c(Response = "colname")) %>%
  group_by(predictor, head_group, regulation) %>%
  summarise(n_lipids = n(), .groups = "drop") %>%
  arrange(predictor, head_group, regulation)

print(summary_table)
## # A tibble: 9 × 4
##   predictor head_group   regulation    n_lipids
##   <fct>     <chr>        <chr>            <int>
## 1 leaf      glycolipid   Downregulated        1
## 2 leaf      glycolipid   Upregulated          3
## 3 leaf      neutral      Downregulated        1
## 4 leaf      neutral      Upregulated          4
## 5 leaf      phospholipid Downregulated        2
## 6 leaf      phospholipid Upregulated          2
## 7 -P        glycolipid   Upregulated          3
## 8 -P        neutral      Upregulated          3
## 9 -P        phospholipid Downregulated        9
# Significant lipids per predictor
sig_summary <- effects %>%
  group_by(predictor) %>%
  summarise(
    total_tested = n(),
    n_significant = sum(is_significant),
    n_upregulated = sum(upregulated),
    n_downregulated = sum(downregulated),
    pct_significant = round(100 * n_significant / total_tested, 1)
  )

print(sig_summary)
## # A tibble: 4 × 6
##   predictor total_tested n_significant n_upregulated n_downregulated
##   <fct>            <int>         <int>         <int>           <int>
## 1 leaf               137            53             9               4
## 2 -P                 137            29             6               9
## 3 Inv4m              137             0             0               0
## 4 -P:Inv4m           137             0             0               0
## # ℹ 1 more variable: pct_significant <dbl>

Session Information

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggtext_0.1.2  ggpubr_0.6.2  ggrepel_0.9.6 forcats_1.0.1 tidyr_1.3.1  
## [6] dplyr_1.1.4   ggplot2_4.0.0 edgeR_4.7.6   limma_3.65.7 
## 
## loaded via a namespace (and not attached):
##  [1] utf8_1.2.6         sass_0.4.10        generics_0.1.4     rstatix_0.7.3     
##  [5] xml2_1.4.0         stringi_1.8.7      lattice_0.22-7     digest_0.6.37     
##  [9] magrittr_2.0.4     evaluate_1.0.5     grid_4.5.1         RColorBrewer_1.1-3
## [13] fastmap_1.2.0      jsonlite_2.0.0     backports_1.5.0    Formula_1.2-5     
## [17] gridExtra_2.3      purrr_1.1.0        scales_1.4.0       jquerylib_0.1.4   
## [21] abind_1.4-8        cli_3.6.5          rlang_1.1.6        litedown_0.7      
## [25] commonmark_2.0.0   cowplot_1.2.0      withr_3.0.2        cachem_1.1.0      
## [29] yaml_2.3.10        tools_4.5.1        ggsignif_0.6.4     locfit_1.5-9.12   
## [33] broom_1.0.10       vctrs_0.6.5        R6_2.6.1           lifecycle_1.0.4   
## [37] stringr_1.5.2      car_3.1-3          pkgconfig_2.0.3    pillar_1.11.1     
## [41] bslib_0.9.0        gtable_0.3.6       glue_1.8.0         Rcpp_1.1.0        
## [45] statmod_1.5.1      xfun_0.53          tibble_3.3.0       tidyselect_1.2.1  
## [49] rstudioapi_0.17.1  knitr_1.50         farver_2.1.2       htmltools_0.5.8.1 
## [53] labeling_0.4.3     rmarkdown_2.30     carData_3.0-5      compiler_4.5.1    
## [57] S7_0.2.0           markdown_2.0       gridtext_0.1.5