This analysis performs differential expression analysis on lipid profiles from maize leaf samples across multiple experimental factors:
The analysis uses limma-voom to identify lipids that respond to:
Fold change thresholds:
library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(ggpubr)
library(ggtext)
# Internal standards and odd-chain lipids to exclude
to_exclude <- c(
"DG_12_0", "LPC_17_0", "LPE_17_1",
"PC_25_0", "PG_17_0", "SM_35_1",
"Sphingosine_17_1", "TG_17_0",
"TG_57_6" # exclude the odd number lipid
)
# Load MS-Dial raw data with new internal standard integration
lipid_csv <- "../data/PSU_RawData_MSDial_NewStdInt_240422.csv"
raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf"))
# Remove internal standards and quality control samples
to_exclude <- to_exclude[to_exclude %in% colnames(raw)]
raw <- raw %>%
select(-all_of(to_exclude)) %>%
filter(!grepl("Methanol", sampleID)) %>%
filter(!grepl("Pool", sampleID))
cat("Loaded", nrow(raw), "samples\n")
## Loaded 43 samples
# Load plant phenotypic data
plant_csv <- "/Users/fvrodriguez/Desktop/Desktop/22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv"
psu <- read.csv(plant_csv) %>%
rename(Genotype = "Who.What", rowid = "P22.") %>%
filter(Genotype %in% c("CTRL", "INV4M")) %>%
droplevels()
# Load RNA-seq metadata
sampleInfo <- read.csv('../data/inv4mRNAseq_metadata.csv') %>%
rename(Genotype = genotype) %>%
rename(rowid = row)
# Merge metadata
sampleInfo <- psu %>%
select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
inner_join(sampleInfo) %>%
filter(!is.na(DTA))
# Create leaf position groups
sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"
# Standardize sample IDs
sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)
sampleInfo$rowid <- sampleInfo$row
cat("Sample info loaded for", nrow(sampleInfo), "samples\n")
## Sample info loaded for 64 samples
# Extract tube ID from raw data
raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)
# Merge phenotypic data with lipid measurements
pheno <- sampleInfo %>%
select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue, leaf_group) %>%
inner_join(
raw %>%
select(tube, DGDG_34_0:TG_58_5)
) %>%
mutate(block = as.factor(Rep)) %>%
droplevels() %>%
pivot_wider(
names_from = Rep,
values_from = Rep,
names_prefix = "block_",
values_fn = function(x) 1,
values_fill = 0
) %>%
select(rowid, tube:Plot_Row, block, block_6:block_12, everything()) %>%
mutate(Treatment = factor(Treatment, levels = c("High_P", "Low_P")))
# Simplify treatment labels
levels(pheno$Treatment) <- c("+P", "-P")
# Remove problematic sample
pheno <- pheno[-29, ] # what's wrong with this sample?
cat("Final dataset:", nrow(pheno), "samples\n")
## Final dataset: 42 samples
# Extract lipid measurements matrix
m <- pheno %>%
select(DGDG_34_0:TG_58_5) %>%
as.matrix()
# Define lipid head groups
head_group <- c(
DG = "neutral",
DGDG = "glycolipid",
DGGA = "glycolipid",
LPC = "phospholipid",
LPE = "phospholipid",
MGDG = "glycolipid",
PC = "phospholipid",
PE = "phospholipid",
PG = "phospholipid",
PI = "phospholipid",
SQDG = "glycolipid",
TG = "neutral"
)
# Create lipid species annotation
sp <- data.frame(
colname = colnames(m),
class = gsub("_.*", "", colnames(m), perl = TRUE)
)
sp$head_group <- head_group[sp$class]
# Summary of lipid classes
cat("\nLipid class distribution:\n")
##
## Lipid class distribution:
print(with(sp, table(head_group, class)))
## class
## head_group DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
## glycolipid 0 12 9 0 0 8 0 0 0 0 9 0
## neutral 23 0 0 0 0 0 0 0 0 0 0 25
## phospholipid 0 0 0 6 3 0 19 12 7 4 0 0
Critical step: The counts matrix comes from pheno, but
we need to ensure the sample metadata in the DGEList matches the
processed pheno data, especially for Treatment levels which
were renamed from “High_P”/“Low_P” to “+P”/“-P”.
# Prepare count matrix
counts <- pheno[, colnames(m)] %>%
as.matrix() %>%
t()
# Visualize data distribution
hist(counts, main = "Distribution of Lipid Abundances", xlab = "Abundance")
# Set column names to match sample IDs
colnames(counts) <- pheno$tube
cat("\nCount matrix dimensions:", dim(counts), "\n")
##
## Count matrix dimensions: 137 42
cat("Lipids:", nrow(counts), "\n")
## Lipids: 137
cat("Samples:", ncol(counts), "\n")
## Samples: 42
# Extract sample names from counts matrix
sampleNames <- pheno$tube
# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
##
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]
# CRITICAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
levels = c("+P", "-P")
)
cat("\nTreatment levels after update:\n")
##
## Treatment levels after update:
print(levels(sampleInfo_matched$Treatment))
## [1] "+P" "-P"
print(table(sampleInfo_matched$Treatment))
##
## +P -P
## 15 27
# Create gene annotation
genes <- data.frame(gene = rownames(counts))
# Create DGEList with matched sample information
y <- DGEList(counts = counts, samples = sampleInfo_matched)
# Create sample groups from Treatment and Genotype
y$group <- interaction(y$samples$Treatment, y$samples$Genotype)
# Identify low count samples (library size < 1)
y$samples$lowCount <- y$samples$lib.size < 1
cat("\nDGEList created successfully\n")
##
## DGEList created successfully
cat("Groups:", levels(y$group), "\n")
## Groups: +P.CTRL -P.CTRL +P.INV4 -P.INV4
cat("\nLibrary size summary:\n")
##
## Library size summary:
print(summary(y$samples$lib.size))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.242e+10 4.205e+10 5.682e+10 5.905e+10 7.104e+10 1.114e+11
cat("\nLow count samples:\n")
##
## Low count samples:
print(table(y$samples$lowCount))
##
## FALSE
## 42
# Compute MDS on all samples
mds <- plotMDS(y, pch = 21, label = y$samples$side_tag, plot = TRUE)
title("Initial MDS - All Samples")
# Create filtered object
y_filtered <- y
cat("\nSamples before filtering:", ncol(y_filtered), "\n")
##
## Samples before filtering: 42
# Filter by sample (remove low count samples)
y_filtered_bySample <- y_filtered[, !y_filtered$samples$lowCount]
cat("Samples after filtering:", ncol(y_filtered_bySample), "\n")
## Samples after filtering: 42
# Diagnostic tables - check sample distribution
cat("\n=== Diagnostic Tables ===\n")
##
## === Diagnostic Tables ===
cat("\nTreatment by Leaf Tissue:\n")
##
## Treatment by Leaf Tissue:
print(table(
y_filtered_bySample$samples$Treatment,
y_filtered_bySample$samples$leaf_tissue
))
##
## 1 2 3 4
## +P 4 3 4 4
## -P 8 4 8 7
cat("\nGenotype by Leaf Tissue:\n")
##
## Genotype by Leaf Tissue:
print(table(
y_filtered_bySample$samples$Genotype,
y_filtered_bySample$samples$leaf_tissue
))
##
## 1 2 3 4
## CTRL 6 3 6 5
## INV4 6 4 6 6
cat("\nTreatment by Genotype by Leaf Tissue:\n")
##
## Treatment by Genotype by Leaf Tissue:
print(table(
y_filtered_bySample$samples$Treatment,
y_filtered_bySample$samples$Genotype,
y_filtered_bySample$samples$leaf_tissue
))
## , , = 1
##
##
## CTRL INV4
## +P 2 2
## -P 4 4
##
## , , = 2
##
##
## CTRL INV4
## +P 1 2
## -P 2 2
##
## , , = 3
##
##
## CTRL INV4
## +P 2 2
## -P 4 4
##
## , , = 4
##
##
## CTRL INV4
## +P 2 2
## -P 3 4
# Compute MDS on filtered samples
mds2 <- plotMDS(
y_filtered_bySample,
pch = 21,
label = y_filtered_bySample$samples$side_tag,
bg = (as.factor(y_filtered_bySample$samples$lowCount) == TRUE) + 1,
plot = TRUE
)
title("MDS After Filtering")
CRITICAL: We need to use the ORIGINAL y_filtered object for the metadata to ensure all samples are included in the MDS visualization, but use the coordinates from the MDS computed on all samples.
# Extract sample metadata from filtered object
d <- y_filtered$samples
# Add MDS coordinates from initial MDS
d$x <- mds$x
d$y <- mds$y
# Verify metadata is present
cat("\n=== Verifying Metadata ===\n")
##
## === Verifying Metadata ===
cat("\nTreatment levels in d:\n")
##
## Treatment levels in d:
print(levels(d$Treatment))
## [1] "+P" "-P"
cat("\nTreatment counts:\n")
##
## Treatment counts:
print(table(d$Treatment))
##
## +P -P
## 15 27
cat("\nGenotype levels in d:\n")
##
## Genotype levels in d:
print(levels(factor(d$Genotype)))
## [1] "CTRL" "INV4"
cat("\nGenotype counts:\n")
##
## Genotype counts:
print(table(d$Genotype))
##
## CTRL INV4
## 20 22
cat("\nLeaf tissue levels in d:\n")
##
## Leaf tissue levels in d:
print(levels(factor(d$leaf_tissue)))
## [1] "1" "2" "3" "4"
cat("\nLeaf tissue counts:\n")
##
## Leaf tissue counts:
print(table(d$leaf_tissue))
##
## 1 2 3 4
## 12 7 12 11
# Ensure factors are properly set
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$Rep <- factor(d$Rep)
# MDS by Treatment
p1 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = Treatment), size = 3) +
ggtitle("MDS Plot: Treatment") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
theme_classic(base_size = 14)
# MDS by decimal time (collection time)
p2 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = decimal_time), size = 3) +
ggtitle("MDS Plot: Collection Time") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
theme_classic(base_size = 14)
# MDS by Collector
p3 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = COLLECTOR), size = 3) +
ggtitle("MDS Plot: Collector") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
theme_classic(base_size = 14)
# MDS by row ID and Treatment
p4 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = as.factor(rowid), shape = Treatment), size = 3) +
ggtitle("MDS Plot: Row ID and Treatment") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
scale_color_discrete(name = "Row ID") +
theme_classic(base_size = 14) +
theme(legend.position = "none")
# MDS by Genotype
p5 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = Genotype), size = 3) +
ggtitle("MDS Plot: Genotype") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
theme_classic(base_size = 14)
# MDS by Leaf Tissue and Treatment
p6 <- ggplot(d, aes(x = x, y = y)) +
geom_point(aes(color = factor(leaf_tissue), shape = Treatment), size = 3) +
ggtitle("MDS Plot: Leaf Tissue and Treatment") +
scale_color_discrete(name = "Leaf Tissue") +
xlab("Leading logFC dim 1") +
ylab("Leading logFC dim 2") +
theme_classic(base_size = 14)
ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)
par(mfrow = c(1, 2))
# MDS colored by Treatment
plot(
mds2$x, mds2$y,
pch = 21,
bg = factor(y_filtered_bySample$samples$Treatment),
col = 0,
main = "MDS: Treatment",
xlab = "Leading logFC dim 1",
ylab = "Leading logFC dim 2"
)
legend(
"topright",
legend = levels(factor(y_filtered_bySample$samples$Treatment)),
pch = 21,
pt.bg = 1:nlevels(factor(y_filtered_bySample$samples$Treatment))
)
# MDS colored by Genotype
plot(
mds2$x, mds2$y,
col = factor(y_filtered_bySample$samples$Genotype),
pch = 19,
main = "MDS: Genotype",
xlab = "Leading logFC dim 1",
ylab = "Leading logFC dim 2"
)
legend(
"topright",
legend = levels(factor(y_filtered_bySample$samples$Genotype)),
col = 1:nlevels(factor(y_filtered_bySample$samples$Genotype)),
pch = 19
)
CRITICAL: Before creating the design matrix, ensure Treatment levels are correctly set to match the original factor levels for proper interpretation.
# Verify Plot_Row is available
cat("\nPlot_Row present?", "Plot_Row" %in% colnames(d), "\n")
##
## Plot_Row present? TRUE
cat("Plot_Row values:\n")
## Plot_Row values:
print(table(d$Plot_Row))
##
## 1 2 3 4 5 6 7 8
## 3 4 4 4 10 9 4 4
# Set Treatment with original levels for model matrix
# This ensures the contrast is interpreted correctly
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
cat("\nTreatment levels for model:\n")
##
## Treatment levels for model:
print(levels(d$Treatment))
## [1] "+P" "-P"
print(table(d$Treatment))
##
## +P -P
## 15 27
# IMPORTANT: Reset y_filtered_bySample to original y for modeling
# This ensures we use all samples (not just the filtered ones)
y_filtered_bySample <- y
# Create design matrix
# Model accounts for:
# - Plot_Row: spatial variation (row in field)
# - Plot_Column: spatial variation (column in field)
# - Rep: block effects
# - leaf_tissue: continuous leaf position effect (1-4, apical to basal)
# - Treatment: P treatment (+P vs -P)
# - Genotype: CTRL vs INV4M
# - Treatment:Genotype: interaction between P and genotype
design <- model.matrix(
~ Plot_Row + Plot_Column + Rep + leaf_tissue + Treatment * Genotype,
d
)
cat("\nDesign matrix dimensions:", dim(design), "\n")
##
## Design matrix dimensions: 42 11
cat("\nCoefficients in model:\n")
##
## Coefficients in model:
print(colnames(design))
## [1] "(Intercept)" "Plot_Row"
## [3] "Plot_Column" "Rep6"
## [5] "Rep7" "Rep11"
## [7] "Rep12" "leaf_tissue"
## [9] "Treatment-P" "GenotypeINV4"
## [11] "Treatment-P:GenotypeINV4"
cat("\nDesign matrix summary:\n")
##
## Design matrix summary:
print(head(design))
## (Intercept) Plot_Row Plot_Column Rep6 Rep7 Rep11 Rep12 leaf_tissue
## S01 1 3 3 1 0 0 0 1
## S02 1 3 3 1 0 0 0 2
## S03 1 3 3 1 0 0 0 3
## S04 1 3 3 1 0 0 0 4
## S05 1 4 3 1 0 0 0 1
## S06 1 4 3 1 0 0 0 2
## Treatment-P GenotypeINV4 Treatment-P:GenotypeINV4
## S01 1 0 0
## S02 1 0 0
## S03 1 0 0
## S04 1 0 0
## S05 1 1 1
## S06 1 1 1
# Calculate normalization factors
y_filtered_bySample <- calcNormFactors(y_filtered_bySample)
cat("\nNormalization factors calculated\n")
##
## Normalization factors calculated
cat("Norm factors summary:\n")
## Norm factors summary:
print(summary(y_filtered_bySample$samples$norm.factors))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.5275 0.8574 1.0529 1.0237 1.1859 1.5273
# Apply voom transformation
voomR <- voom(y_filtered_bySample, design = design, plot = TRUE)
# Fit linear model
fit <- lmFit(voomR)
ebfit <- eBayes(fit)
cat("\nModel fitted successfully\n")
##
## Model fitted successfully
cat("Number of lipids:", nrow(ebfit$coefficients), "\n")
## Number of lipids: 137
# Extract results for coefficients 8-11
# Coef 8: leaf_tissue
# Coef 9: Treatment-P
# Coef 10: GenotypeINV4M
# Coef 11: Treatment-P:GenotypeINV4M
r <- list()
for (x in 8:11) {
# Get top table for coefficient
r[[as.character(x)]] <- topTable(
ebfit,
coef = x,
sort.by = 'none',
n = Inf
)
# Calculate confidence intervals
cr <- qt(0.975, ebfit$df.residual + ebfit$df.prior) *
ebfit$stdev.unscaled[, x] * sqrt(ebfit$s2.post)
r[[as.character(x)]][["upper"]] <- r[[as.character(x)]][["logFC"]] + cr
r[[as.character(x)]][["lower"]] <- r[[as.character(x)]][["logFC"]] - cr
}
# Assign predictor names and lipid IDs
r$`8`["predictor"] <- "leaf"
r$`8`["Response"] <- rownames(counts)
r$`9`["predictor"] <- "-P"
r$`9`["Response"] <- rownames(counts)
r$`10`["predictor"] <- "Inv4m"
r$`10`["Response"] <- rownames(counts)
r$`11`["predictor"] <- "-P:Inv4m"
r$`11`["Response"] <- rownames(counts)
# Combine all results
effects <- rbind(r$`8`, r$`9`, r$`10`, r$`11`)
cat("\nTotal number of tests:", nrow(effects), "\n")
##
## Total number of tests: 548
# Define predictor order
effect_order <- c("leaf", "-P", "Inv4m", "-P:Inv4m")
# Apply predictor-specific fold change thresholds
effects <- effects %>%
mutate(predictor = factor(predictor, levels = effect_order)) %>%
mutate(neglogP = -log10(adj.P.Val)) %>%
mutate(is_significant = adj.P.Val < 0.05) %>%
mutate(upregulated = case_when(
(predictor != "leaf") & (logFC >= 2) & is_significant ~ TRUE,
(predictor == "leaf") & (logFC >= 0.7) & is_significant ~ TRUE,
.default = FALSE
)) %>%
mutate(downregulated = case_when(
(predictor != "leaf") & (logFC <= -2) & is_significant ~ TRUE,
(predictor == "leaf") & (logFC <= -0.7) & is_significant ~ TRUE,
.default = FALSE
)) %>%
mutate(
regulation = case_when(
is_significant & upregulated ~ "Upregulated",
is_significant & downregulated ~ "Downregulated",
.default = "Unregulated"
),
is_DEG = is_significant & regulation != "Unregulated",
label = ifelse(is_DEG, Response, "")
)
# Format labels for plotting
effects$label <- sub("_", "", effects$label, perl = TRUE)
effects$label <- sub("_", ":", effects$label, perl = TRUE)
# Summary of differential lipids
cat("\nDifferentially expressed lipids per predictor:\n")
##
## Differentially expressed lipids per predictor:
print(with(effects, table(predictor, regulation)))
## regulation
## predictor Downregulated Unregulated Upregulated
## leaf 4 124 9
## -P 9 122 6
## Inv4m 0 137 0
## -P:Inv4m 0 137 0
# Custom key glyph for legend
draw_key_custom <- function(data, params, size) {
colour <- data$colour
data <- data[data$colour == colour, ]
grid::segmentsGrob(
0, 1 / 2, 1, 1 / 2,
gp = grid::gpar(
col = data$colour,
lwd = data$linewidth * 2
)
)
}
volcano_all <- effects %>%
filter(predictor != "-P:Inv4m") %>%
droplevels() %>%
inner_join(sp, by = c(Response = "colname")) %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
ylim(0, 10) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
geom_text_repel(
force = 3,
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5)),
fill = "none"
) +
theme_classic2(base_size = 25) +
theme(
plot.title = element_blank(),
strip.text = element_markdown(),
legend.position = c(0.5, 0.95),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 15),
legend.direction = "horizontal",
strip.background = element_blank()
)
print(volcano_all)
This focused plot excludes Inv4m results to better visualize differential lipids in response to leaf tissue position and phosphorus deficiency.
volcano_leaf_p <- effects %>%
filter(predictor %in% c("leaf", "-P")) %>%
droplevels() %>%
inner_join(sp, by = c(Response = "colname")) %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
ylim(0, 10) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
geom_text_repel(
force = 3,
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
facet_wrap(. ~ predictor, scales = "free_x", ncol = 2) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5)),
fill = "none"
) +
theme_classic2(base_size = 25) +
theme(
plot.title = element_blank(),
strip.text = element_markdown(),
legend.position = c(0.5, 0.95),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 15),
legend.direction = "horizontal",
strip.background = element_blank()
)
print(volcano_leaf_p)
# Filter significant effects and prepare for plotting
to_plot <- effects %>%
mutate(Response = gsub("_glyco", "", Response)) %>%
mutate(predictor = factor(
predictor,
levels = c("leaf", "-P", "Inv4m", "-P:Inv4m")
)) %>%
filter(P.Value < 0.05) %>%
inner_join(sp, by = c(Response = "colname")) %>%
mutate(sign = sign(logFC)) %>%
ungroup() %>%
group_by(Response) %>%
mutate(max_effect = max(abs(logFC))) %>%
ungroup() %>%
group_by(predictor, head_group, class) %>%
mutate(head_group = fct_reorder(head_group, adj.P.Val)) %>%
ungroup() %>%
group_by(predictor) %>%
arrange(predictor, head_group, class, adj.P.Val) %>%
mutate(.r = row_number()) %>%
ungroup()
cat("\nSignificant effects per predictor:\n")
##
## Significant effects per predictor:
print(with(to_plot, table(predictor, head_group)))
## head_group
## predictor glycolipid neutral phospholipid
## leaf 20 15 32
## -P 15 10 28
## Inv4m 5 4 11
## -P:Inv4m 6 9 9
pd <- position_dodge(0.4)
effect_plot_all <- to_plot %>%
droplevels() %>%
mutate(Response = fct_reorder(Response, .r)) %>%
ungroup() %>%
ggplot(aes(
x = logFC,
y = Response,
col = head_group
)) +
xlab("Effect (log2 Fold Change)") +
geom_vline(xintercept = 0, lty = 2) +
geom_point(position = pd, size = 3) +
geom_errorbar(
aes(xmin = upper, xmax = lower),
position = pd,
width = 0.2,
size = 0.7
) +
guides(
shape = guide_legend(title = NULL),
color = guide_legend(reverse = TRUE)
) +
facet_wrap(. ~ predictor, scales = "free", ncol = 4) +
scale_y_discrete(limits = rev) +
scale_color_manual(values = c("blue", "red", "gold")) +
theme_light(base_size = 15) +
theme(
legend.position = "top",
strip.background = element_rect(fill = "white"),
strip.text = element_text(color = "black", size = 15),
axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 0, face = "bold"),
plot.caption = element_text(hjust = 0)
)
print(effect_plot_all)
# Create separate plots for leaf and -P
response_leaf <- to_plot %>%
filter(predictor == "leaf") %>%
droplevels() %>%
mutate(Response = fct_reorder(Response, .r)) %>%
ungroup() %>%
ggplot(aes(
x = logFC,
y = Response,
col = head_group
)) +
xlab("Effect (log2 Fold Change)") +
geom_vline(xintercept = 0, lty = 2) +
geom_point(position = pd, size = 3) +
geom_errorbar(
aes(xmin = upper, xmax = lower),
position = pd,
width = 0.2,
size = 0.7
) +
guides(
shape = guide_legend(title = NULL),
color = guide_legend(reverse = TRUE)
) +
facet_wrap(. ~ predictor, scales = "free") +
scale_y_discrete(limits = rev) +
scale_color_manual(values = c("blue", "red", "gold")) +
theme_light(base_size = 15) +
theme(
legend.position = "top",
strip.background = element_rect(fill = "white"),
strip.text = element_text(color = "black", size = 15),
axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 0, face = "bold"),
plot.caption = element_text(hjust = 0)
)
response_p <- to_plot %>%
filter(predictor == "-P") %>%
droplevels() %>%
mutate(Response = fct_reorder(Response, .r)) %>%
ungroup() %>%
ggplot(aes(
x = logFC,
y = Response,
col = head_group
)) +
xlab("Effect (log2 Fold Change)") +
geom_vline(xintercept = 0, lty = 2) +
geom_point(position = pd, size = 3) +
geom_errorbar(
aes(xmin = upper, xmax = lower),
position = pd,
width = 0.2,
size = 0.7
) +
guides(
shape = guide_legend(title = NULL),
color = guide_legend(reverse = TRUE)
) +
facet_wrap(. ~ predictor, scales = "free") +
scale_y_discrete(limits = rev) +
scale_color_manual(values = c("blue", "gold","red")) +
theme_light(base_size = 15) +
theme(
legend.position = "top",
strip.background = element_rect(fill = "white"),
strip.text = element_text(color = "black", size = 15),
axis.title.y = element_blank(),
axis.text.y = element_text(hjust = 0, face = "bold"),
plot.caption = element_text(hjust = 0)
)
ggarrange(response_leaf, response_p, ncol = 2, common.legend = TRUE)
# Export differential expression results
write.csv(
effects,
file = "~/Desktop/lipid_differential_expression_results.csv",
row.names = FALSE
)
# Save volcano plots
ggsave(
volcano_all,
file = "~/Desktop/lipid_volcano_all.svg",
height = 6,
width = 12
)
ggsave(
volcano_leaf_p,
file = "~/Desktop/lipid_volcano_leaf_P.svg",
height = 6,
width = 10
)
# Save effect plots
ggsave(
effect_plot_all,
file = "~/Desktop/lipid_effects_all.svg",
height = 10,
width = 12
)
# Count differential lipids by class and regulation
summary_table <- effects %>%
filter(is_DEG) %>%
inner_join(sp, by = c(Response = "colname")) %>%
group_by(predictor, head_group, regulation) %>%
summarise(n_lipids = n(), .groups = "drop") %>%
arrange(predictor, head_group, regulation)
print(summary_table)
## # A tibble: 9 × 4
## predictor head_group regulation n_lipids
## <fct> <chr> <chr> <int>
## 1 leaf glycolipid Downregulated 1
## 2 leaf glycolipid Upregulated 3
## 3 leaf neutral Downregulated 1
## 4 leaf neutral Upregulated 4
## 5 leaf phospholipid Downregulated 2
## 6 leaf phospholipid Upregulated 2
## 7 -P glycolipid Upregulated 3
## 8 -P neutral Upregulated 3
## 9 -P phospholipid Downregulated 9
# Significant lipids per predictor
sig_summary <- effects %>%
group_by(predictor) %>%
summarise(
total_tested = n(),
n_significant = sum(is_significant),
n_upregulated = sum(upregulated),
n_downregulated = sum(downregulated),
pct_significant = round(100 * n_significant / total_tested, 1)
)
print(sig_summary)
## # A tibble: 4 × 6
## predictor total_tested n_significant n_upregulated n_downregulated
## <fct> <int> <int> <int> <int>
## 1 leaf 137 53 9 4
## 2 -P 137 29 6 9
## 3 Inv4m 137 0 0 0
## 4 -P:Inv4m 137 0 0 0
## # ℹ 1 more variable: pct_significant <dbl>
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggtext_0.1.2 ggpubr_0.6.2 ggrepel_0.9.6 forcats_1.0.1 tidyr_1.3.1
## [6] dplyr_1.1.4 ggplot2_4.0.0 edgeR_4.7.6 limma_3.65.7
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.6 sass_0.4.10 generics_0.1.4 rstatix_0.7.3
## [5] xml2_1.4.0 stringi_1.8.7 lattice_0.22-7 digest_0.6.37
## [9] magrittr_2.0.4 evaluate_1.0.5 grid_4.5.1 RColorBrewer_1.1-3
## [13] fastmap_1.2.0 jsonlite_2.0.0 backports_1.5.0 Formula_1.2-5
## [17] gridExtra_2.3 purrr_1.1.0 scales_1.4.0 jquerylib_0.1.4
## [21] abind_1.4-8 cli_3.6.5 rlang_1.1.6 litedown_0.7
## [25] commonmark_2.0.0 cowplot_1.2.0 withr_3.0.2 cachem_1.1.0
## [29] yaml_2.3.10 tools_4.5.1 ggsignif_0.6.4 locfit_1.5-9.12
## [33] broom_1.0.10 vctrs_0.6.5 R6_2.6.1 lifecycle_1.0.4
## [37] stringr_1.5.2 car_3.1-3 pkgconfig_2.0.3 pillar_1.11.1
## [41] bslib_0.9.0 gtable_0.3.6 glue_1.8.0 Rcpp_1.1.0
## [45] statmod_1.5.1 xfun_0.53 tibble_3.3.0 tidyselect_1.2.1
## [49] rstudioapi_0.17.1 knitr_1.50 farver_2.1.2 htmltools_0.5.8.1
## [53] labeling_0.4.3 rmarkdown_2.30 carData_3.0-5 compiler_4.5.1
## [57] S7_0.2.0 markdown_2.0 gridtext_0.1.5