Purpose: Identify differentially abundant lipids across leaf developmental stages and phosphorus treatments using total ion current (TIC) normalization.
Approach: 1. Filter lipids by missingness (<20% missing values) 2. Filter samples by total ion current (library size) 3. Normalize to molar ion fractions (TIC normalization) 4. Apply limma-voom for differential abundance analysis 5. Model spatial and technical covariates
Experimental factors: - Leaf tissue position: Continuous gradient (leaf 1-4) - Phosphorus treatment: High P (+P) vs Low P (-P) - Genotype: Control (CTRL) vs Inversion 4m (INV4M)
Effect size thresholds: - Leaf predictors: |logFC| > 0.5 - Categorical predictors: |logFC| > 1.5
library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(ggpubr)
library(ggtext)
Purpose: Remove internal standards and odd-chain lipids from analysis.
internal_standards <- c(
"CUDA", "LPE_17_1", "Sphingosine_17_1", "PC_25_0", "DG_18_1",
"PG_34_0", "DG_12_0", "TG_17_0", "LPC_17_0", "PE_34", "Cholesterol"
)
odd_carbon <- c("PG_17_0", "SM_35_1", "TG_57_6")
to_exclude <- c(internal_standards, odd_carbon)
Purpose: Load MS-DIAL peak areas with internal standard integration.
lipid_csv <- "../data/PSU_RawData_MSDial_NewStdInt_240422.csv"
raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf")) %>%
select(-all_of(to_exclude[to_exclude %in% colnames(.)])) %>%
filter(!grepl("Methanol|Pool", sampleID))
cat("Loaded samples:", nrow(raw), "\n")
## Loaded samples: 43
Purpose: Merge phenotypic data with RNA-seq metadata and MS injection order.
plant_csv <- "/Users/fvrodriguez/Desktop/Desktop/22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv"
psu <- read.csv(plant_csv) %>%
dplyr::rename(Genotype = "Who.What", rowid = "P22.") %>%
filter(Genotype %in% c("CTRL", "INV4M")) %>%
droplevels()
sampleInfo <- read.csv('../data/inv4mRNAseq_metadata.csv') %>%
dplyr::rename(Genotype = genotype, rowid = row)
sampleInfo <- psu %>%
select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
inner_join(sampleInfo) %>%
filter(!is.na(DTA))
sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"
sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)
ms_order <- read.csv("../data/PSU-PHO22_ms_order.csv")
ms_order$tube <- gsub("L|R", "S", ms_order$top_tag, perl = TRUE)
sampleInfo <- sampleInfo %>%
dplyr::right_join(ms_order) %>%
distinct()
cat("Sample info loaded:", nrow(sampleInfo), "samples\n")
## Sample info loaded: 53 samples
Purpose: Combine MS peak areas with sample metadata.
raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)
pheno <- sampleInfo %>%
select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue,
leaf_group, injection_order) %>%
inner_join(raw %>% select(tube, DGDG_34_0:TG_58_5)) %>%
mutate(
block = as.factor(Rep),
Treatment = factor(Treatment, levels = c("High_P", "Low_P"))
) %>%
pivot_wider(
names_from = Rep,
values_from = Rep,
names_prefix = "block_",
values_fn = function(x) 1,
values_fill = 0
) %>%
select(rowid, tube:Plot_Row, block, block_6:block_12,
injection_order, everything())
levels(pheno$Treatment) <- c("+P", "-P")
cat("Final dataset:", nrow(pheno), "samples\n")
## Final dataset: 43 samples
Purpose: Annotate lipids by head group and class for downstream analysis.
m <- pheno %>%
select(DGDG_34_0:TG_58_5) %>%
as.matrix()
head_group <- c(
DG = "neutral", DGDG = "glycolipid", DGGA = "glycolipid",
LPC = "phospholipid", LPE = "phospholipid", MGDG = "glycolipid",
PC = "phospholipid", PE = "phospholipid", PG = "phospholipid",
PI = "phospholipid", SQDG = "glycolipid", TG = "neutral"
)
sp <- data.frame(
colname = colnames(m),
class = gsub("_.*", "", colnames(m), perl = TRUE)
)
sp$head_group <- head_group[sp$class]
cat("\nLipid class distribution:\n")
##
## Lipid class distribution:
print(with(sp, table(head_group, class)))
## class
## head_group DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
## glycolipid 0 12 9 0 0 8 0 0 0 0 9 0
## neutral 22 0 0 0 0 0 0 0 0 0 0 25
## phospholipid 0 0 0 6 3 0 19 12 6 4 0 0
Purpose: Prepare lipid abundance matrix for filtering and normalization.
counts <- t(m)
colnames(counts) <- pheno$tube
cat("Count matrix dimensions:", dim(counts), "\n")
## Count matrix dimensions: 135 43
cat("Lipids:", nrow(counts), "\n")
## Lipids: 135
cat("Samples:", ncol(counts), "\n")
## Samples: 43
hist(counts,
main = "Distribution of Raw Lipid Abundances",
xlab = "Peak Area",
breaks = 50,
col = "lightblue")
Purpose: Identify lipids with excessive missing values for removal.
Approach: Calculate proportion of missing/zero values per lipid and set threshold at 20%.
missing_per_lipid <- rowSums(is.na(counts) | counts == 0) / ncol(counts)
cat("Missing data summary:\n")
## Missing data summary:
print(summary(missing_per_lipid))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.00000 0.00000 0.01585 0.00000 0.27907
hist(missing_per_lipid * 100,
main = "Missing Data Distribution Across Lipids",
xlab = "% Missing",
breaks = 30,
col = "coral")
abline(v = 20, col = "red", lwd = 2, lty = 2)
Purpose: Remove lipids with >20% missing values across samples.
missing_threshold <- 0.20
keep_lipids <- missing_per_lipid < missing_threshold
cat("Lipid filtering (< 20% missing):\n")
## Lipid filtering (< 20% missing):
cat(" Before:", nrow(counts), "\n")
## Before: 135
cat(" After:", sum(keep_lipids), "\n")
## After: 132
cat(" Removed:", sum(!keep_lipids), "\n")
## Removed: 3
counts_filtered <- counts[keep_lipids, ]
Purpose: Ensure sample metadata matches filtered count matrix.
sampleNames <- colnames(counts_filtered)
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]
sampleInfo_matched$Treatment <- factor(
pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
levels = c("+P", "-P")
)
cat("Treatment levels:\n")
## Treatment levels:
print(table(sampleInfo_matched$Treatment))
##
## +P -P
## 16 27
Purpose: Identify samples with insufficient total ion current for removal.
Approach: Calculate total peak area per sample (library size) and visualize distribution to determine filtering threshold.
genes <- data.frame(gene = rownames(counts_filtered))
y_raw <- DGEList(counts = counts_filtered, samples = sampleInfo_matched)
y_raw$group <- interaction(y_raw$samples$Treatment, y_raw$samples$Genotype)
cat("Library size summary (before filtering):\n")
## Library size summary (before filtering):
print(summary(y_raw$samples$lib.size))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.026e+09 4.078e+10 5.495e+10 5.752e+10 7.071e+10 1.110e+11
hist(
y_raw$samples$lib.size / 1e6,
main = "Library Size Distribution",
xlab = "Total Ion Current (millions)",
breaks = 30,
col = "lightgreen"
)
lib_size_threshold <- quantile(y_raw$samples$lib.size, 0.10)
abline(v = lib_size_threshold / 1e6, col = "red", lwd = 2, lty = 2)
cat("Suggested threshold (10th percentile):", lib_size_threshold, "\n")
## Suggested threshold (10th percentile): 28898886284
Purpose: Remove samples with low total ion current that may compromise data quality.
Expected outcome: Retain samples with sufficient signal for reliable quantification.
# min_lib_size <- 3e10
min_lib_size <- 1
keep_samples <- y_raw$samples$lib.size >= min_lib_size
cat("Sample filtering (TIC >=", min_lib_size, "):\n")
## Sample filtering (TIC >= 1 ):
cat(" Before:", ncol(counts_filtered), "\n")
## Before: 43
cat(" After:", sum(keep_samples), "\n")
## After: 43
cat(" Removed:", sum(!keep_samples), "\n")
## Removed: 0
if (sum(!keep_samples) > 0) {
cat("\nRemoved samples:\n")
print(y_raw$samples[!keep_samples,
c("tube", "lib.size", "Treatment",
"Genotype", "leaf_tissue")])
}
y <- y_raw[, keep_samples]
y$samples <- droplevels(y$samples)
Purpose: Visualize sample relationships colored by library size to assess filtering quality.
mds_all <- plotMDS(y_raw, plot = FALSE)
mds_qc_data <- y_raw$samples %>%
mutate(
dim1 = mds_all$x,
dim2 = mds_all$y,
lib_size = lib.size / 1e6,
filtered_out = !keep_samples
)
ggplot(mds_qc_data,
aes(x = dim1, y = dim2,
color = lib_size,
shape = filtered_out)) +
geom_point(size = 4) +
scale_color_viridis_c(option = "plasma") +
scale_shape_manual(
values = c("TRUE" = 4, "FALSE" = 19),
labels = c("Kept", "Removed")
) +
labs(
x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
color = "Library Size\n(millions)",
shape = "Sample Status",
title = "MDS: Library Size Quality Control"
) +
theme_classic(base_size = 15)
Purpose: Assess technical drift associated with MS injection order.
ggplot(mds_qc_data %>% filter(!filtered_out),
aes(x = dim1, y = dim2,
fill = injection_order,
shape = Treatment)) +
geom_point(size = 4, color ="transparent") +
scale_fill_viridis_c(option = "plasma") +
scale_shape_manual(values = c(21, 25)) +
labs(
x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
color = "Injection Order",
title = "MDS: Injection Order Effect"
) +
theme_classic(base_size = 15)
Purpose: Normalize lipid abundances to total ion current (TIC), converting raw peak areas to molar ion fractions.
Approach: For each sample, divide each lipid’s peak area by the total peak area. This accounts for differences in total signal between samples and expresses abundances as proportions.
Expected outcome: Molar ion fractions sum to 1.0 per sample, representing each lipid’s proportion of the total detected lipid pool.
cat("=== Calculating Molar Ion Fractions ===\n")
## === Calculating Molar Ion Fractions ===
raw_counts <- y$counts
molar_ion_fractions <- sweep(raw_counts, 2, colSums(raw_counts), FUN = "/")
cat("Molar ion fraction range:", range(molar_ion_fractions), "\n")
## Molar ion fraction range: 0 0.3637007
cat("Sample sums (should be 1):",
unique(round(colSums(molar_ion_fractions), 10)), "\n")
## Sample sums (should be 1): 1
Purpose: Convert molar ion fractions to count-like scale for voom transformation.
Approach: Multiply fractions by 1e9 to create numerically stable values that voom can process while maintaining the TIC normalization.
scaled_fractions <- molar_ion_fractions * 1e9
y_tic <- DGEList(counts = scaled_fractions, samples = y$samples)
y_tic$samples$norm.factors <- 1
cat("Scaled molar ion fractions created\n")
## Scaled molar ion fractions created
cat("Library size summary (should be ~1e9):\n")
## Library size summary (should be ~1e9):
print(summary(y_tic$samples$lib.size))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1e+09 1e+09 1e+09 1e+09 1e+09 1e+09
Purpose: Reduce dimensionality to visualize major sources of variation in lipid profiles.
Approach: Apply multidimensional scaling to TIC-normalized data, extracting first 4 dimensions.
mds <- plotMDS(y_tic, plot = FALSE)
d <- y_tic$samples
d$dim1 <- mds$x
d$dim2 <- mds$y
d$dim3 <- mds$eigen.vectors[, 3] * sqrt(mds$var.explained[3])
d$dim4 <- mds$eigen.vectors[, 4] * sqrt(mds$var.explained[4])
d$Treatment <- factor(d$Treatment)
d$Genotype <- factor(d$Genotype)
cat("Variance explained by MDS dimensions:\n")
## Variance explained by MDS dimensions:
print(tibble(
dimension = paste("Dim", 1:4),
var_explained = round(mds$var.explained[1:4], 4)
))
## # A tibble: 4 × 2
## dimension var_explained
## <chr> <dbl>
## 1 Dim 1 0.337
## 2 Dim 2 0.170
## 3 Dim 3 0.116
## 4 Dim 4 0.0753
Purpose: Identify which experimental and technical factors drive major axes of variation.
p1 <- ggplot(d, aes(x = dim2, y = dim3, color = Treatment)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Treatment")
p2 <- ggplot(d, aes(x = dim2, y = dim3, color = injection_order,
shape = Treatment)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Injection Order")
p3 <- ggplot(d, aes(x = dim2, y = dim3, color = decimal_time)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Collection Time")
p4 <- ggplot(d, aes(x = dim2, y = dim3, color = COLLECTOR)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Collector")
p5 <- ggplot(d, aes(x = dim2, y = dim3, color = Genotype)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Genotype")
p6 <- d %>%
mutate(leaf = factor(leaf_tissue)) %>%
ggplot(aes(x = dim2, y = dim3, color = leaf)) +
geom_point(size = 3) +
theme_classic(base_size = 15) +
ggtitle("Leaf Tissue")
ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)
Purpose: Publication-quality MDS showing main biological factors.
base_size <- 30
lipid_MDS_23 <- d %>%
mutate(leaf = factor(leaf_tissue)) %>%
ggplot(aes(x = dim2, y = dim3)) +
ggtitle("MDS Lipids") +
xlab(paste0("dim2 (", round(100 * mds$var.explained[2]), "%)")) +
ylab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
geom_point(aes(fill = leaf, shape = Treatment), size = 4) +
scale_fill_viridis_d() +
scale_shape_manual(values = c(21, 25)) +
guides(
shape = guide_legend(
title = element_blank(),
override.aes = list(fill = "black"),
reverse=TRUE
),
fill = guide_legend(
title = "Leaf",
order = 2,
override.aes = list(geom = "point", shape = 22, size = 7)
)
) +
theme_classic2(base_size = base_size) +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_blank(), # Remove x-axis labels
axis.ticks.x = element_blank(), # Remove x-axis ticks
axis.text.y = element_blank(), # Remove y-axis labels
axis.ticks.y = element_blank(), # Remove y-axis ticks
# legend.position = "left",
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "in"),
legend.background = element_rect(fill = "transparent")
)
saveRDS(lipid_MDS_23 , "../results/lipid_MDS_23.rds")
cat("MDS plots saved to output/\n")
## MDS plots saved to output/
print(lipid_MDS_23)
d %>%
ggplot(aes(x = dim3, y = dim4, fill = leaf_tissue, shape = Treatment)) +
xlab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
ylab(paste0("dim4 (", round(100 * mds$var.explained[4]), "%)")) +
geom_point(size = 4) +
scale_fill_viridis_c() +
scale_shape_manual(values = c(21, 25)) +
guides(
shape = guide_legend(
title = element_blank(),
override.aes = list(fill = "black")
),
fill = guide_legend(
title = "Leaf",
order = 2,
override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
)
) +
theme_classic2(base_size = 25) +
theme(
legend.box = "horizontal",
legend.position = c(0.9, 0.8),
legend.text = element_markdown(),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line")
)
Purpose: Quantify relationships between MDS dimensions and experimental factors.
mds_cor_results <- tibble(
dimension = c("Dim2", "Dim3", "Dim4"),
factor = c("Treatment", "Leaf tissue", "Leaf tissue"),
correlation = c(
cor(d$dim2, as.numeric(d$Treatment)),
cor(d$dim3, as.numeric(d$leaf_tissue)),
cor(d$dim4, as.numeric(d$leaf_tissue))
),
p_value = c(
cor.test(d$dim2, as.numeric(d$Treatment))$p.value,
cor.test(d$dim3, d$leaf_tissue)$p.value,
cor.test(d$dim4, d$leaf_tissue)$p.value
)
) %>%
mutate(
adj_p_value = p.adjust(p_value, method = "fdr"),
correlation = round(correlation, 3),
p_value = signif(p_value, 3),
adj_p_value = signif(adj_p_value, 3)
)
print(mds_cor_results)
## # A tibble: 3 × 5
## dimension factor correlation p_value adj_p_value
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Dim2 Treatment 0.54 0.000185 0.000277
## 2 Dim3 Leaf tissue -0.733 0.0000000233 0.00000007
## 3 Dim4 Leaf tissue 0.346 0.0231 0.0231
Purpose: Construct design matrix incorporating biological factors and technical covariates.
Approach: Model includes: - Spatial covariates: Plot_Row (field position) - Technical covariate: injection_order (MS drift) - Biological factors: leaf_tissue (centered), Treatment, Genotype - Interactions: leaf_tissue × Treatment, Genotype × Treatment, leaf_tissue × Genotype
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$leaf_tissue_c <- scale(d$leaf_tissue, center = TRUE, scale = FALSE)
design <- model.matrix(
~ Plot_Row + injection_order + leaf_tissue_c * Treatment +
Genotype * Treatment + leaf_tissue_c * Genotype,
d
)
cat("Design matrix dimensions:", dim(design), "\n")
## Design matrix dimensions: 43 9
cat("\nCoefficients in model:\n")
##
## Coefficients in model:
print(colnames(design))
## [1] "(Intercept)" "Plot_Row"
## [3] "injection_order" "leaf_tissue_c"
## [5] "Treatment-P" "GenotypeINV4"
## [7] "leaf_tissue_c:Treatment-P" "Treatment-P:GenotypeINV4"
## [9] "leaf_tissue_c:GenotypeINV4"
Purpose: Model mean-variance relationship and calculate precision weights for linear modeling.
Approach: Apply voom without additional normalization since data is already TIC-normalized.
voomR <- voom(y_tic, design = design, normalize.method = "none", plot = TRUE)
cat("Voom transformation complete\n")
## Voom transformation complete
Purpose: Account for within-block correlation from field design.
Approach: Define blocks as Treatment × Plot_Column interaction and estimate consensus correlation.
block <- interaction(d$Treatment, d$Plot_Column)
corfit <- duplicateCorrelation(voomR, design, block = block)
cat("Consensus correlation:", corfit$consensus.correlation, "\n")
## Consensus correlation: 0.05080766
Purpose: Fit linear model for each lipid accounting for block structure.
fit <- lmFit(voomR, design, block = block,
correlation = corfit$consensus.correlation)
ebfit <- eBayes(fit)
cat("Model fitted:", nrow(fit$coefficients), "lipids ×",
ncol(fit$coefficients), "coefficients\n")
## Model fitted: 132 lipids × 9 coefficients
cat("\nSignificant lipids per coefficient (FDR < 0.05):\n")
##
## Significant lipids per coefficient (FDR < 0.05):
print(colSums(abs(decideTests(ebfit))))
## (Intercept) Plot_Row
## 121 0
## injection_order leaf_tissue_c
## 42 8
## Treatment-P GenotypeINV4
## 40 0
## leaf_tissue_c:Treatment-P Treatment-P:GenotypeINV4
## 2 0
## leaf_tissue_c:GenotypeINV4
## 0
Purpose: Extract differential abundance statistics for biological predictors of interest.
#' Extract differential expression results for specified predictors
#'
#' @param ebfit An eBayes fitted model object from limma
#' @param predictor_map Named vector mapping coefficient names to display names
#'
#' @return Data frame with DE results for all specified predictors
extract_predictor_effects <- function(ebfit, predictor_map) {
coef_names <- colnames(coef(ebfit))
coef_indices <- match(names(predictor_map), coef_names)
if (any(is.na(coef_indices))) {
missing <- names(predictor_map)[is.na(coef_indices)]
stop("Coefficients not found: ", paste(missing, collapse = ", "),
"\nAvailable: ", paste(coef_names, collapse = ", "))
}
result_list <- lapply(seq_along(coef_indices), function(i) {
idx <- coef_indices[i]
tt <- topTable(ebfit, coef = idx, sort.by = "none", n = Inf)
crit_value <- qt(0.975, ebfit$df.residual + ebfit$df.prior)
std_errors <- ebfit$stdev.unscaled[, idx] * sqrt(ebfit$s2.post)
tt$upper <- tt$logFC + crit_value * std_errors
tt$lower <- tt$logFC - crit_value * std_errors
tt$predictor <- predictor_map[i]
tt$response <- rownames(tt)
tt$std_err <- std_errors
tt
})
results <- do.call(rbind, result_list)
rownames(results) <- NULL
results %>%
select(predictor, response, everything()) %>%
arrange(adj.P.Val)
}
predictor_map <- c(
"leaf_tissue_c" = "Leaf",
"Treatment-P" = "-P",
"leaf_tissue_c:Treatment-P" = "Leaf:-P"
)
cat("Available coefficients:\n")
## Available coefficients:
print(colnames(coef(ebfit)))
## [1] "(Intercept)" "Plot_Row"
## [3] "injection_order" "leaf_tissue_c"
## [5] "Treatment-P" "GenotypeINV4"
## [7] "leaf_tissue_c:Treatment-P" "Treatment-P:GenotypeINV4"
## [9] "leaf_tissue_c:GenotypeINV4"
effects_df <- extract_predictor_effects(ebfit, predictor_map)
cat("\nTotal tests:", nrow(effects_df), "\n")
##
## Total tests: 396
cat("Tests per predictor:\n")
## Tests per predictor:
print(table(effects_df$predictor))
##
## -P Leaf Leaf:-P
## 132 132 132
cat("\nSignificant per predictor (FDR < 0.05):\n")
##
## Significant per predictor (FDR < 0.05):
print(table(effects_df$predictor[effects_df$adj.P.Val < 0.05]))
##
## -P Leaf Leaf:-P
## 40 8 2
Purpose: Apply two-tier classification system for differential lipids.
Approach: - Tier 1: Significant (FDR < 0.05) - Tier 2: High-confidence (significant + effect size threshold)
Effect size thresholds: - Leaf predictors: |logFC| > 0.5 - Categorical predictors: |logFC| > 1.5
#' Classify differential lipids with predictor-specific thresholds
classify_differential_lipids <- function(effects_df) {
leaf_predictors <- c("Leaf", "Leaf:-P")
effects_df %>%
mutate(
is_DL = adj.P.Val < 0.05,
is_hiconf_DL = case_when(
predictor %in% leaf_predictors & is_DL & abs(logFC) > 0.5 ~ TRUE,
predictor == "-P" & is_DL & abs(logFC) > 1.5 ~ TRUE,
.default = FALSE
),
regulation = case_when(
!is_hiconf_DL ~ "Not significant",
logFC > 0 ~ "Upregulated",
logFC < 0 ~ "Downregulated",
TRUE ~ "Not significant"
),
neglogP = -log10(adj.P.Val),
label = if_else(is_DL, response, "")
)
}
effect_order <- c("Leaf", "-P", "Leaf:-P")
effects_df <- effects_df %>%
mutate(predictor = factor(predictor, levels = effect_order)) %>%
classify_differential_lipids()
effects_df$label <- sub("_", "", effects_df$label, perl = TRUE)
effects_df$label <- sub("_", ":", effects_df$label, perl = TRUE)
cat("=== Classification Summary ===\n")
## === Classification Summary ===
cat("Total tests:", nrow(effects_df), "\n\n")
## Total tests: 396
cat("Tier 1 - Significant DLs (FDR < 0.05):\n")
## Tier 1 - Significant DLs (FDR < 0.05):
print(table(effects_df$predictor, effects_df$is_DL))
##
## FALSE TRUE
## Leaf 124 8
## -P 92 40
## Leaf:-P 130 2
cat("\nTier 2 - High-confidence DLs:\n")
##
## Tier 2 - High-confidence DLs:
print(table(effects_df$predictor, effects_df$is_hiconf_DL))
##
## FALSE TRUE
## Leaf 124 8
## -P 105 27
## Leaf:-P 130 2
cat("\nDirection of significant effects:\n")
##
## Direction of significant effects:
print(with(effects_df %>% filter(is_DL),
table(predictor, regulation)))
## regulation
## predictor Downregulated Not significant Upregulated
## Leaf 3 0 5
## -P 12 13 15
## Leaf:-P 1 0 1
write.csv(
effects_df,
file = "~/Desktop/lipid_differential_abundance_TIC_normalized.csv",
row.names = FALSE
)
write.csv(
molar_ion_fractions,
file = "~/Desktop/lipid_molar_ion_fractions.csv",
row.names = TRUE
)
write.csv(
voomR$E,
file = "~/Desktop/voom_log2_TIC_normalized_lipids.csv",
row.names = TRUE
)
cat("=== Analysis Complete ===\n")
cat("Results exported to Desktop\n")
Purpose: Create custom legend key displaying line segments instead of points.
#' Custom key glyph for legend
#'
#' @param data Aesthetic data for the legend key
#' @param params Additional parameters
#' @param size Size of the key
#'
#' @return A grid grob object
draw_key_custom <- function(data, params, size) {
colour <- data$colour
data <- data[data$colour == colour, ]
grid::segmentsGrob(
0, 1 / 2, 1, 1 / 2,
gp = grid::gpar(
col = data$colour,
lwd = data$linewidth * 4
)
)
}
Purpose: Join differential abundance results with lipid class annotations.
plot_data <- effects_df %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
label_data <- plot_data %>%
filter(is_hiconf_DL)
force <- 1
label_size <- 6
base_size <- 30
Purpose: Visualize differential abundance across all three predictors with faceted layout.
Approach: Create volcano plots for Leaf, -P, and Leaf:-P predictors with staged label placement to avoid overlaps. Downregulated lipids labeled on left, upregulated on right.
Expected outcome: Publication-quality three-panel figure showing effect sizes and significance levels with lipid class color coding.
force <- 3
predictor_labels <- c( "Leaf", "-P", "Leaf × P")
names(predictor_labels) <- levels(plot_data$predictor)
lipid_volcano_3 <- plot_data %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Downregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
segment.color = "grey",
min.segment.length = 0,
xlim = c(NA, -0.5),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Upregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
segment.color = "grey",
min.segment.length = 0,
xlim = c(0.5, NA),
#max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Downregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
direction = "y",
segment.color = "grey",
xlim = c(NA, -5),
#max.overlaps = 0,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Upregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
direction = "y",
segment.color = "grey",
xlim = c(3, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf:-P", !is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
segment.color = "grey",
min.segment.length = 0,
max.overlaps = 2,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycoglycerolipid", "Neutral lipid", "Glycerophospholipid")
) +
facet_wrap(. ~ predictor,
labeller = labeller(predictor = predictor_labels),
scales = "free_x", ncol = 3) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5)),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
scale_x_continuous(expand = expansion(mult = c(0.5, 0.3))) +
theme_classic2(base_size = base_size) +
theme(
plot.title = element_blank(),
strip.text = element_markdown(size = 35),
strip.background = element_blank(),
legend.position = c(0.85, 0.95),
legend.background = element_rect(fill = "transparent"),
axis.title.x = element_blank(),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 20),
axis.title.y = element_text(margin = margin(r = 5,"pt")),
# plot.margin = margin(5.5, 5.5, 0, 5.5, "pt"),
)
ggsave(
filename = "~/Desktop/lipid_volcano_3_TIC.png",
plot = lipid_volcano_3,
height = 6,
width = 12,
units = "in",
dpi = 150
)
saveRDS(lipid_volcano_3, "../results/lipid_volcano_3_TIC.rds")
print(lipid_volcano_3)
Purpose: Focused visualization of Leaf and -P effects without interaction term.
Approach: Subset to primary biological predictors and create two-panel layout with optimized label placement.
plot_data_2 <- effects_df %>%
filter(predictor != "Leaf:-P") %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
volcano_2 <- plot_data_2 %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Bellow threshold", "Upregulated")
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Downregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
segment.color = "grey",
min.segment.length = 0,
xlim = c(NA, -0.5),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Upregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
segment.color = "grey",
min.segment.length = 0,
xlim = c(0.5, NA),
#max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Downregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
direction = "y",
segment.color = "grey",
xlim = c(NA, -5),
#max.overlaps = 0,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Upregulated",
!is.na(label), label != ""),
force = force,
fontface = "bold",
size = label_size,
direction = "y",
segment.color = "grey",
xlim = c(3, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycoglycerolipid", "Neutral lipid", "Glycerophospholipid")
) +
facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5),
reverse = TRUE),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
scale_x_continuous(expand = expansion(mult = c(0.3, 0.02))) +
theme_classic2(base_size = base_size) +
theme(
plot.title = element_blank(),
strip.text = element_markdown(size = 35),
strip.background = element_blank(),
axis.title = element_text(size = 25),
legend.position = c(0.19, 0.99),
legend.background = element_rect(fill = "transparent"),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 20),
)
print(volcano_2)
saveRDS(volcano_2, "../results/lipid_volcano_2_TIC.rds")
ggsave(
filename = "~/Desktop/lipid_volcano_2_TIC.png",
plot = volcano_2,
height = 6,
width = 10,
units = "in",
dpi = 150
)
Purpose: Highlight lipids showing differential response to phosphorus treatment across leaf developmental stages.
Approach: Isolate Leaf:-P interaction term and create single volcano plot without x-axis label restrictions.
plot_data_leafP <- effects_df %>%
filter(predictor == "Leaf:-P") %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
volcano_leafP <- plot_data_leafP %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
ylim(0, 5) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Bellow threshold", "Upregulated")
) +
geom_text_repel(
data = plot_data_leafP %>%
filter(regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size = 7,
segment.color = "grey",
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
geom_text_repel(
data = plot_data_leafP %>%
filter(regulation == "Upregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size = 7,
segment.color = "grey",
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("orange2", "darkred", "darkblue"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5),
reverse = TRUE),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
theme_classic2(base_size = base_size) +
theme(
plot.title = element_blank(),
strip.text = element_markdown(size = 35),
legend.position = "none",
legend.background = element_rect(fill = "transparent"),
axis.title = element_text(size = 25),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 20),
axis.title.y = element_text(margin = margin(r = -5)),
strip.background = element_blank()
)
print(volcano_leafP)
ggsave(
filename = "~/Desktop/lipid_volcano_leafP_TIC.png",
plot = volcano_leafP,
height = 7,
width = 7,
units = "in",
dpi = 150
)
write.csv(
label_data,
file = "~/Desktop/high_confidence_DLs_TIC.csv",
row.names = FALSE
)
cat("Volcano plots saved:\n")
## Volcano plots saved:
cat(" - lipid_volcano_3_TIC.png (3-panel, 12×6 in, 300 dpi)\n")
## - lipid_volcano_3_TIC.png (3-panel, 12×6 in, 300 dpi)
cat(" - lipid_volcano_2_TIC.png (2-panel, 5×3 in, 150 dpi)\n")
## - lipid_volcano_2_TIC.png (2-panel, 5×3 in, 150 dpi)
cat(" - lipid_volcano_leafP_TIC.png (1-panel, 7×7 in, 150 dpi)\n")
## - lipid_volcano_leafP_TIC.png (1-panel, 7×7 in, 150 dpi)
cat(" - high_confidence_DLs_TIC.csv\n")
## - high_confidence_DLs_TIC.csv
Purpose: Extract high-confidence lipids showing significant interaction between leaf developmental stage and phosphorus treatment.
interaction_lipids <- effects_df %>%
filter(predictor == "Leaf:-P", is_hiconf_DL) %>%
arrange(adj.P.Val)
cat("High-confidence Leaf:-P interaction lipids:",
nrow(interaction_lipids), "\n")
## High-confidence Leaf:-P interaction lipids: 2
print(interaction_lipids %>%
select(response, logFC, adj.P.Val, regulation))
## response logFC adj.P.Val regulation
## 1 PC_34_2 -0.5753961 0.008235879 Downregulated
## 2 TG_54_9 1.2513465 0.008235879 Upregulated
interaction_lipid_ids <- interaction_lipids$response
Purpose: Convert analysis-ready data back to biologically interpretable units for visualization.
Approach: The statistical analysis uses voom-normalized log2(CPM) values. For visualization, we convert these back to total ion current (TIC) fractions, which represent each lipid’s proportion of the total detected lipid pool.
Three possible input formats: 1. Raw counts: Peak areas directly from MS-DIAL 2. TIC fractions: Proportions summing to 1.0 per sample (molar ion fractions) 3. log2(CPM): Voom-normalized values used in statistical models
cat("=== Available Data Formats ===\n")
## === Available Data Formats ===
cat("1. Raw counts (y$counts): Peak areas from MS detector\n")
## 1. Raw counts (y$counts): Peak areas from MS detector
cat("2. TIC fractions: Proportions of total signal per sample\n")
## 2. TIC fractions: Proportions of total signal per sample
cat("3. log2(CPM) (voomR$E): Voom-normalized for statistics\n\n")
## 3. log2(CPM) (voomR$E): Voom-normalized for statistics
cat("Statistical analysis used:", class(voomR$E), "\n")
## Statistical analysis used: matrix array
cat("For visualization, we convert to TIC fractions\n")
## For visualization, we convert to TIC fractions
Purpose: Provide explicit functions to convert between data formats.
#' Convert raw counts to TIC fractions
#'
#' @param count_matrix Matrix with raw peak areas (lipids × samples)
#'
#' @return Matrix of TIC fractions (0-1 scale)
counts_to_TIC_fraction <- function(count_matrix) {
TIC_fractions <- sweep(count_matrix, 2, colSums(count_matrix), FUN = "/")
sample_sums <- colSums(TIC_fractions)
if (!all(abs(sample_sums - 1) < 1e-10)) {
warning("TIC fractions don't sum to 1. Range: ",
paste(range(sample_sums), collapse = " - "))
}
TIC_fractions
}
#' Convert log2(CPM) to TIC fractions
#'
#' @param log2_cpm_matrix Matrix with log2(CPM) values from voom
#' @param pseudo_count Pseudo-count added before log transformation
#'
#' @return Matrix of TIC fractions (0-1 scale)
log2cpm_to_TIC_fraction <- function(log2_cpm_matrix, pseudo_count = 1e-9) {
# Back-transform from log2(CPM)
cpm <- 2^log2_cpm_matrix - pseudo_count
# CPM to counts (proportional, scaling doesn't matter for fractions)
# Since CPM = (counts/library_size) * 1e6
# We can work directly with CPM as pseudo-counts
counts_proxy <- cpm
# Convert to TIC fractions
TIC_fractions <- sweep(counts_proxy, 2, colSums(counts_proxy), FUN = "/")
TIC_fractions
}
#' Convert TIC fractions to log10 for visualization
#'
#' @param TIC_fraction_matrix Matrix of TIC fractions (0-1 scale)
#' @param offset Small value to avoid log(0)
#'
#' @return Matrix of log10(TIC fraction)
TIC_fraction_to_log10 <- function(TIC_fraction_matrix, offset = 1e-9) {
log10(TIC_fraction_matrix + offset)
}
Purpose: Transform voom-normalized data to TIC fractions for interpretable visualization.
Approach: 1. Start with voomR$E (log2 CPM from statistical analysis) 2. Back-transform to pseudo-counts 3. Convert to TIC fractions (proportions) 4. Apply log10 transformation for plotting
cat("=== Data Transformation Pipeline ===\n")
## === Data Transformation Pipeline ===
cat("Input: voomR$E (log2 CPM)\n")
## Input: voomR$E (log2 CPM)
# Step 1: Convert log2(CPM) to TIC fractions
TIC_fraction <- log2cpm_to_TIC_fraction(
log2_cpm_matrix = voomR$E,
pseudo_count = 0
)
cat("Intermediate: TIC fractions\n")
## Intermediate: TIC fractions
cat(" Range:", range(TIC_fraction, na.rm = TRUE), "\n")
## Range: 5e-10 0.3637006
cat(" Sample sums:",
range(colSums(TIC_fraction, na.rm = TRUE)), "\n")
## Sample sums: 1 1
# Step 2: Log10 transform for visualization
log10_TIC <- TIC_fraction_to_log10(
TIC_fraction_matrix = TIC_fraction,
offset = 1e-9
)
cat("Output: log10(TIC fraction)\n")
## Output: log10(TIC fraction)
cat(" Range:", range(log10_TIC, na.rm = TRUE), "\n")
## Range: -8.823909 -0.4392559
Purpose: Visualize lipid abundance trajectories across leaf stages, separated by phosphorus treatment.
Input format: The function accepts any of: - log10(TIC fraction) - recommended for visualization - log2(CPM) - directly from voom - TIC fraction - will apply log10 internally
#' Plot lipid trajectories by treatment
#'
#' @param lipid_data Matrix or data frame with lipid abundances
#' Can be: log10(TIC fraction), log2(CPM), or TIC fraction
#' @param lipid_ids Character vector of lipid IDs to plot
#' @param metadata Data frame with sample information (tube, leaf_tissue,
#' Treatment columns required)
#' @param data_type Character: "log10_TIC", "log2_CPM", or "TIC_fraction"
#'
#' @return ggplot object with treatment-colored trajectories
plot_lipid_by_treatment <- function(lipid_data,
lipid_ids,
metadata,
data_type = "log10_TIC") {
# Validate data type
stopifnot(
data_type %in% c("log10_TIC", "log2_CPM", "TIC_fraction")
)
# Extract and format data
plot_data <- lipid_data %>%
as.data.frame() %>%
tibble::rownames_to_column("lipid") %>%
filter(lipid %in% lipid_ids) %>%
pivot_longer(
cols = -lipid,
names_to = "tube",
values_to = "abundance"
) %>%
left_join(
metadata %>% select(tube, leaf_tissue, Treatment),
by = "tube"
) %>%
filter(!is.na(Treatment)) %>%
mutate(
Treatment = factor(
Treatment,
levels = c("High_P", "Low_P"),
labels = c("+P", "-P")
)
)
# Format lipid labels: PC_34_2 -> PC 34:2
plot_data$lipid_label <- sub("_", "", plot_data$lipid, perl = TRUE)
plot_data$lipid_label <- sub("_", ":", plot_data$lipid_label, perl = TRUE)
# Maintain input order
lipid_levels <- sub("_", "", lipid_ids, perl = TRUE)
lipid_levels <- sub("_", ":", lipid_levels, perl = TRUE)
plot_data$lipid_label <- factor(
plot_data$lipid_label,
levels = lipid_levels
)
# Calculate summary statistics
summary_data <- plot_data %>%
group_by(lipid, lipid_label, leaf_tissue, Treatment) %>%
summarise(
mean = mean(abundance, na.rm = TRUE),
se = sd(abundance, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
# Set y-axis label based on input data type
y_label <- switch(
data_type,
"log10_TIC" = expression(log[10]("TIC fraction")),
"log2_CPM" = expression(log[2]("CPM")),
"TIC_fraction" = "TIC fraction"
)
# Plot parameters
base_size <- 35
pd <- position_dodge(width = 0.2)
# Create plot
summary_data %>%
ggplot(aes(
x = leaf_tissue,
y = mean,
color = Treatment,
fill = Treatment,
linetype = Treatment,
shape = Treatment,
group = Treatment
)) +
geom_line(linewidth = 1.5) +
geom_point(position = pd, size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
position = pd,
width = 0.2,
linewidth = 1,
linetype = "solid"
) +
scale_color_manual(
values = c("+P" = "gold", "-P" = "purple4")
) +
scale_fill_manual(
values = c("+P" = "gold", "-P" = "purple4")
) +
scale_linetype_manual(
values = c("+P" = "solid", "-P" = "dashed")
) +
scale_shape_manual(
values = c("+P" = 19, "-P" = 25)
) +
facet_wrap(~ lipid_label, scales = "free_y", ncol = 2) +
labs(
x = "Leaf",
y = y_label
) +
scale_y_continuous(
expand = expansion(add = c(0.1, 0.1))
) +
guides(
fill = "none",
linetype = "none",
shape = guide_legend(
override.aes = list(
fill = c("gold","purple4"),
color= c("gold","purple4"),
linetype = "solid",
size = 5)
),
) +
theme_classic(base_size = base_size) +
theme(
strip.background = element_blank(),
strip.text = element_text(face = "bold", size = 25),
# legend.position = c(0.1, 0.95),
legend.title = element_blank(),
legend.background = element_rect(fill = "transparent")
)
}
Purpose: Visualize high-confidence lipids showing significant interaction between leaf stage and phosphorus treatment.
Approach: Plot log10(TIC fraction) to show proportions on interpretable scale. Divergent trajectories between treatments indicate interaction.
Expected outcome: Treatment-specific responses across leaf developmental gradient, revealing context-dependent regulation.
#' Create horizontal 6-panel lipid trajectory plot with lipid class colors
#'
#' @param lipid_data Matrix with lipid abundances (log10 TIC fraction)
#' @param effects_df Data frame with differential abundance results
#' @param metadata Sample metadata with tube, leaf_tissue, Treatment
#' @param sp Data frame with lipid class annotations
#' @param data_type Character indicating input data format
#'
#' @return ggplot object with 6-panel horizontal layout
create_trajectory_6panel <- function(lipid_data,
effects_df,
metadata,
sp,
data_type = "log10_TIC",
nrow=1,
ncol=6) {
# Select top lipid per predictor×regulation combination
selected_lipids <- effects_df %>%
filter(is_hiconf_DL) %>%
group_by(predictor, regulation) %>%
slice_min(adj.P.Val, n = 1, with_ties = FALSE) %>%
ungroup() %>%
mutate(
panel_label = case_when(
predictor == "Leaf" & regulation == "Downregulated" ~ "Leaf Down",
predictor == "Leaf" & regulation == "Upregulated" ~ "Leaf Up",
predictor == "-P" & regulation == "Downregulated" ~ "-P Down",
predictor == "-P" & regulation == "Upregulated" ~ "-P Up",
predictor == "Leaf:-P" & regulation == "Downregulated" ~
"Negative\nLeaf × -P",
predictor == "Leaf:-P" & regulation == "Upregulated" ~
"Positive\nLeaf × -P"
)
) %>%
arrange(predictor, desc(regulation)) %>%
select(response, predictor, regulation, panel_label, logFC, adj.P.Val)
# Join with lipid class annotations
selected_lipids <- selected_lipids %>%
left_join(sp, by = c("response" = "colname"))
# Format lipid labels for titles
selected_lipids$lipid_label <- sub("_", "", selected_lipids$response,
perl = TRUE)
selected_lipids$lipid_label <- sub("_", ":", selected_lipids$lipid_label,
perl = TRUE)
# Combine panel label with lipid name
selected_lipids$full_label <- paste0(selected_lipids$panel_label, "\n",
selected_lipids$lipid_label)
# Extract abundances and join with metadata
plot_data <- lipid_data %>%
as.data.frame() %>%
tibble::rownames_to_column("lipid") %>%
filter(lipid %in% selected_lipids$response) %>%
pivot_longer(
cols = -lipid,
names_to = "tube",
values_to = "abundance"
) %>%
left_join(
metadata %>% select(tube, leaf_tissue, Treatment),
by = "tube"
) %>%
filter(!is.na(Treatment)) %>%
mutate(
Treatment = factor(
Treatment,
levels = c("High_P", "Low_P"),
labels = c("+P", "-P")
)
) %>%
left_join(
selected_lipids %>% select(response, full_label, head_group),
by = c("lipid" = "response")
)
# Set panel order
panel_order <- c(
paste0("Leaf Down\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "Leaf Down"])),
paste0("Leaf Up\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "Leaf Up"])),
paste0("-P Down\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "-P Down"])),
paste0("-P Up\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "-P Up"])),
paste0("Negative\nLeaf × -P\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "Negative\nLeaf × -P"])),
paste0("Positive\nLeaf × -P\n", unique(selected_lipids$lipid_label[
selected_lipids$panel_label == "Positive\nLeaf × -P"]))
)
plot_data$full_label <- factor(
plot_data$full_label,
levels = panel_order
)
# Calculate summary statistics
summary_data <- plot_data %>%
group_by(lipid, full_label, head_group, leaf_tissue, Treatment) %>%
summarise(
mean = mean(abundance, na.rm = TRUE),
se = sd(abundance, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
# Y-axis label
y_label <- switch(
data_type,
"log10_TIC" = expression(log[10]("fraction")),
"log2_CPM" = expression(log[2]("CPM")),
"TIC_fraction" = "TIC fraction"
)
# Plot
base_size <- 30
pd <- position_dodge(width = 0.2)
summary_data %>%
ggplot(aes(
x = leaf_tissue,
y = mean,
color = head_group,
fill = head_group,
linetype = Treatment,
shape = Treatment,
group = Treatment
)) +
geom_line(linewidth = 1.2) +
geom_point(position = pd, size = 4) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
position = pd,
width = 0.2,
linewidth = 0.8,
linetype = "solid"
) +
scale_color_manual(
name = "",
values = c("glycolipid" = "darkblue", "neutral" = "orange2",
"phospholipid" = "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
scale_fill_manual(
name = "",
values = c("glycolipid" = "darkblue", "neutral" = "orange2",
"phospholipid" = "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
scale_linetype_manual(
values = c("+P" = "solid", "-P" = "dashed")
) +
scale_shape_manual(
values = c("+P" = 19, "-P" = 25)
) +
facet_wrap(
~ full_label,
scales = "free_y",
nrow = nrow,
ncol = ncol
) +
labs(
x = "Leaf",
y = y_label
) +
scale_y_continuous(
expand = expansion(mult = c(0.1, 0.1))
) +
guides(
color = "none",
fill = "none",
linetype = guide_legend(
title = element_blank(),
override.aes = list(color = "black", linewidth = 1.5)
),
shape = guide_legend(
title = element_blank(),
override.aes = list(fill = "black", color = "black", size = 5)
)
) +
theme_classic(base_size = base_size) +
theme(
strip.background = element_blank(),
strip.text = element_text( size = 25),
legend.position = "top",
legend.title = element_blank(),
legend.background = element_rect(fill = "transparent"),
panel.spacing = unit(0.5, "lines")
)
}
p_trajectory_6 <- create_trajectory_6panel(
lipid_data = log10_TIC,
effects_df = effects_df,
metadata = sampleInfo,
sp = sp,
data_type = "log10_TIC"
)
ggsave(
filename = "~/Desktop/lipid_trajectory_6panel.png",
plot = p_trajectory_6,
width = 18,
height = 6,
dpi = 300
)
saveRDS(p_trajectory_6, "../results/lipid_trajectory_6panel.rds")
# Print Selected Lipids
selected_for_plot <- effects_df %>%
filter(is_hiconf_DL) %>%
group_by(predictor, regulation) %>%
slice_min(adj.P.Val, n = 1, with_ties = FALSE) %>%
ungroup() %>%
left_join(sp, by = c("response" = "colname")) %>%
arrange(predictor, desc(regulation)) %>%
select(predictor, regulation, response, class, head_group, logFC, adj.P.Val)
cat("\n=== Lipids Selected for 6-Panel Plot ===\n")
##
## === Lipids Selected for 6-Panel Plot ===
print(selected_for_plot)
## # A tibble: 6 × 7
## predictor regulation response class head_group logFC adj.P.Val
## <fct> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Leaf Upregulated LPC_18_3 LPC phospholipid 1.47 0.00200
## 2 Leaf Downregulated DG_36_4 DG neutral -0.560 0.00164
## 3 -P Upregulated DGGA_36_4 DGGA glycolipid 2.03 0.00139
## 4 -P Downregulated LPC_18_2 LPC phospholipid -3.77 0.000567
## 5 Leaf:-P Upregulated TG_54_9 TG neutral 1.25 0.00824
## 6 Leaf:-P Downregulated PC_34_2 PC phospholipid -0.575 0.00824
create_trajectory_6panel(
lipid_data = log10_TIC,
effects_df = effects_df,
metadata = sampleInfo,
nrow=2,
ncol=3,
sp = sp,
data_type = "log10_TIC"
)
# Session Info
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggtext_0.1.2 ggpubr_0.6.2 ggrepel_0.9.6 forcats_1.0.1 tidyr_1.3.1
## [6] dplyr_1.1.4 ggplot2_4.0.0 edgeR_4.7.6 limma_3.65.7
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.6 sass_0.4.10 generics_0.1.4 rstatix_0.7.3
## [5] xml2_1.4.0 stringi_1.8.7 lattice_0.22-7 digest_0.6.37
## [9] magrittr_2.0.4 evaluate_1.0.5 grid_4.5.1 RColorBrewer_1.1-3
## [13] fastmap_1.2.0 jsonlite_2.0.0 backports_1.5.0 Formula_1.2-5
## [17] purrr_1.1.0 viridisLite_0.4.2 scales_1.4.0 textshaping_1.0.4
## [21] jquerylib_0.1.4 abind_1.4-8 cli_3.6.5 rlang_1.1.6
## [25] litedown_0.7 commonmark_2.0.0 cowplot_1.2.0 withr_3.0.2
## [29] cachem_1.1.0 yaml_2.3.10 tools_4.5.1 ggsignif_0.6.4
## [33] locfit_1.5-9.12 broom_1.0.10 vctrs_0.6.5 R6_2.6.1
## [37] lifecycle_1.0.4 stringr_1.5.2 car_3.1-3 ragg_1.5.0
## [41] pkgconfig_2.0.3 pillar_1.11.1 bslib_0.9.0 gtable_0.3.6
## [45] glue_1.8.0 Rcpp_1.1.0 systemfonts_1.3.1 statmod_1.5.1
## [49] xfun_0.53 tibble_3.3.0 tidyselect_1.2.1 rstudioapi_0.17.1
## [53] knitr_1.50 farver_2.1.2 htmltools_0.5.8.1 labeling_0.4.3
## [57] rmarkdown_2.30 carData_3.0-5 compiler_4.5.1 S7_0.2.0
## [61] markdown_2.0 gridtext_0.1.5