Overview

This analysis visualizes the results from LION (Lipid Ontology) enrichment analysis comparing lipid profiles between Low P and High P treatments. The enrichment identifies biological categories of lipids that are over-represented among differentially expressed lipids.

LION enrichment categories include:

Lipid classes: Structural classifications (e.g., triacylglycerols, glycerophospholipids)
Cellular location: Where lipids are localized (e.g., lipid droplet, mitochondrion)
Biophysical properties: Membrane characteristics (e.g., lateral diffusion, bilayer thickness)
Chemical properties: Structural features (e.g., headgroup charge, fatty acid saturation)

Libraries

library(ggplot2)
library(dplyr)
library(janitor)
library(ggrepel)
library(ggpubr)

Data Loading

# Load LION enrichment results
enrichment <- read.csv("LION-enrichment_LowPVSHighP.csv")

cat("Loaded", nrow(enrichment), "enrichment terms\n")

## Loaded 52 enrichment terms

cat("\nOriginal column names:\n")

## 
## Original column names:

print(colnames(enrichment))

## [1] "Term.ID"     "Discription" "Annotated"   "p.value"     "FDR.q.value"
## [6] "ES"          "Regulated"

Data Preprocessing

Clean Column Names

Use janitor to standardize column names to snake_case and remove special characters.

# Clean column names using janitor
enrichment <- enrichment %>%
  clean_names()

cat("\nCleaned column names:\n")

## 
## Cleaned column names:

print(colnames(enrichment))

## [1] "term_id"     "discription" "annotated"   "p_value"     "fdr_q_value"
## [6] "es"          "regulated"

# Display structure
cat("\nData structure:\n")

## 
## Data structure:

str(enrichment)

## 'data.frame':    52 obs. of  7 variables:
##  $ term_id    : chr  "LION:0000622" "LION:0012011" "LION:0012084" "LION:0012010" ...
##  $ discription: chr  "triacylglycerols [GL0301]" "lipid storage" "lipid droplet" "membrane component" ...
##  $ annotated  : int  26 26 26 78 54 50 47 43 11 4 ...
##  $ p_value    : num  6.1e-13 6.1e-13 6.1e-13 3.1e-12 1.4e-09 ...
##  $ fdr_q_value: num  1.06e-11 1.06e-11 1.06e-11 4.03e-11 1.46e-08 ...
##  $ es         : num  0.796 0.796 0.796 -0.762 -0.605 ...
##  $ regulated  : chr  "UP" "UP" "UP" "DOWN" ...

Calculate Log-Transformed Values

# Calculate -log10 transformations for visualization
enrichment <- enrichment %>%
  mutate(
    neg_log10_fdr = -log10(fdr_q_value),
    neg_log10_pvalue = -log10(p_value)
  )

# Summary statistics
cat("\nSummary of -log10(FDR):\n")

## 
## Summary of -log10(FDR):

print(summary(enrichment$neg_log10_fdr))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1278  0.7429  1.0805  2.1634  1.7144 10.9747

cat("\nSummary of -log10(p-value):\n")

## 
## Summary of -log10(p-value):

print(summary(enrichment$neg_log10_pvalue))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1276  0.8652  1.3733  2.5580  2.2844 12.2147

cat("\nSummary of ES (Enrichment Score):\n")

## 
## Summary of ES (Enrichment Score):

print(summary(enrichment$es))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.8911 -0.4900 -0.3856 -0.2952 -0.2989  0.7960

cat("\nSummary of Annotated (number of lipids):\n")

## 
## Summary of Annotated (number of lipids):

print(summary(enrichment$annotated))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00    7.00   12.00   17.23   23.00   78.00

Filter Significant Terms

# Define significance threshold
fdr_threshold <- 0.05

# Identify significant terms and create label column
enrichment <- enrichment %>%
  mutate(
    is_significant = fdr_q_value < fdr_threshold,
    label = ifelse(is_significant, discription, "")
  )

cat("\nSignificant terms (FDR < 0.05):", 
    sum(enrichment$is_significant), "/", nrow(enrichment), "\n")

## 
## Significant terms (FDR < 0.05): 21 / 52

# Count by regulation direction
cat("\nSignificant terms by regulation:\n")

## 
## Significant terms by regulation:

print(with(enrichment %>% filter(is_significant), table(regulated)))

## regulated
## DOWN   UP 
##   17    4

Enrichment Summary Tables

Top Upregulated Terms

top_up <- enrichment %>%
  filter(regulated == "UP") %>%
  arrange(fdr_q_value) %>%
  select(term_id, discription, annotated, p_value, fdr_q_value, es) %>%
  head(10)

knitr::kable(
  top_up,
  caption = "Top 10 Upregulated Lipid Categories",
  digits = 3
)

Top 10 Upregulated Lipid Categories
term_id	discription	annotated	p_value	fdr_q_value	es
LION:0000622	triacylglycerols [GL0301]	26	0.000	0.000	0.796
LION:0012011	lipid storage	26	0.000	0.000	0.796
LION:0012084	lipid droplet	26	0.000	0.000	0.796
LION:0000094	headgroup with neutral charge	50	0.000	0.000	0.589
LION:0002956	fatty acid with 17 carbons	4	0.212	0.263	0.495
LION:0000607	diacylglycerols [GL0201]	24	0.232	0.274	0.231
LION:0002968	saturated fatty acid	6	0.271	0.306	0.394
LION:0002966	fatty acid with less than 2 double bonds	11	0.582	0.600	0.232

Top Downregulated Terms

top_down <- enrichment %>%
  filter(regulated == "DOWN") %>%
  arrange(fdr_q_value) %>%
  select(term_id, discription, annotated, p_value, fdr_q_value, es) %>%
  head(10)

knitr::kable(
  top_down,
  caption = "Top 10 Downregulated Lipid Categories",
  digits = 3
)

Top 10 Downregulated Lipid Categories
term_id	discription	annotated	p_value	fdr_q_value	es
LION:0012010	membrane component	78	0.000	0.000	-0.762
LION:0000003	glycerophospholipids [GP]	54	0.000	0.000	-0.605
LION:0012080	endoplasmic reticulum (ER)	47	0.000	0.000	-0.468
LION:0000095	headgroup with positive charge / zwitter-ion	43	0.000	0.001	-0.421
LION:0000093	headgroup with negative charge	11	0.000	0.001	-0.649
LION:0002967	polyunsaturated fatty acid	4	0.001	0.003	-0.891
LION:0080978	average lateral diffusion	11	0.004	0.017	-0.537
LION:0012081	mitochondrion	23	0.005	0.019	-0.398
LION:0000010	glycerophosphocholines [GP01]	27	0.005	0.019	-0.373
LION:0000059	diacylglycerophosphoglycerols [GP0401]	7	0.005	0.019	-0.633

Enrichment Bubble Plot

All Terms with Significant Labels

# Create bubble plot with all terms, labels only for significant
p_all <- enrichment %>%
  ggplot(aes(
    x = es,
    y = neg_log10_fdr,
    size = annotated,
    color = neg_log10_pvalue,
    label = label
  )) +
  geom_point(alpha = 0.7, shape = 21, stroke = 0.5) +
  geom_text_repel(
    max.overlaps = 20,
    min.segment.length = 0,
    size = 3,
    box.padding = 0.5,
    point.padding = 0.3,
    force = 2
  ) +
  geom_hline(
    yintercept = -log10(0.05),
    linetype = "dashed",
    color = "grey30",
    linewidth = 0.8
  ) +
  scale_color_gradient(
    low = "dodgerblue",
    high = "tomato",
    name = expression(-log[10](italic(p) - value))
  ) +
  scale_size_continuous(
    name = "Annotated\nLipids",
    range = c(2, 12)
  ) +
  xlab("Enrichment Score (ES)") +
  ylab(expression(-log[10](FDR ~ italic(q) - value))) +
  ggtitle("LION Lipid Enrichment: Low P vs High P") +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 16),
    panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
  )

print(p_all)

Significant Terms Only (FDR < 0.05)

# Filter to significant terms only
sig_enrichment <- enrichment %>%
  filter(is_significant)

cat("Plotting", nrow(sig_enrichment), "significant terms\n")

## Plotting 21 significant terms

# Create bubble plot with significant terms
p_sig <- sig_enrichment %>%
  ggplot(aes(
    x = es,
    y = neg_log10_fdr,
    size = annotated,
    color = neg_log10_pvalue,
    label = label
  )) +
  geom_point(alpha = 0.8, shape = 21, stroke = 0.5) +
  geom_hline(
    yintercept = -log10(0.05),
    linetype = "dashed",
    color = "grey30",
    linewidth = 0.8
  ) +
  scale_color_gradient(
    low = "dodgerblue",
    high = "tomato",
    name = expression(-log[10](italic(p) - value))
  ) +
  scale_size_continuous(
    name = "Annotated\nLipids",
    range = c(3, 15)
  ) +
  xlab("Enrichment Score (ES)") +
  ylab(expression(-log[10](FDR ~ italic(q) - value))) +
  ggtitle("LION Lipid Enrichment: Low P vs High P (Significant Terms)") +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 16),
    panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
  )

print(p_sig)

Labeled Significant Terms (Enhanced)

This plot shows significant terms with optimized label placement using ggrepel.

# Create bubble plot with labels for significant terms
p_labeled <- sig_enrichment %>%
  # 1. Define only the core aesthetics (x, y, label) globally
  ggplot(aes(x = es, y = annotated, label = label)) +
  geom_vline(
    xintercept = 0,
    linetype = "dashed",
    color = "grey30",
    # 2. Use 'size' instead of 'linewidth' for broader compatibility
    size = 0.8
  ) +
  # 3. Map size and color aesthetics only to the points they apply to
  geom_point(
    aes(size = neg_log10_fdr, color = neg_log10_fdr),
    alpha = 0.8,
    shape = 19
  ) +
  geom_text_repel(
    aes(color = neg_log10_fdr),
    max.overlaps = 10,
    min.segment.length = 2,
    size = 6, 
    box.padding = 0.2,
    point.padding = 0.3,
    force = 1
  ) +
  # 4. Make legend titles consistent to merge them into one
  scale_color_gradient(
    low = "dodgerblue",
    high = "tomato",
    name = expression(-log[10](FDR))
  ) +
  scale_size_continuous(
    name = expression(-log[10](FDR)),
    breaks = c(3, 6, 10)
  ) +
  xlab("Enrichment Score (ES)") +
  ylab("Count of Lipid Species") +
  xlim(-2, 2) +
  ylim(0, 80) +
  #ggtitle("LION Lipid Enrichment: -P vs +P") +
  ggpubr::theme_classic2(base_size = 25) +
  theme(
    plot.title = element_blank(),
    legend.position = c(0.9, 0.8),
    legend.title = element_text(size = 16),
    legend.text = element_text(size = 14),
    legend.key.size = unit(0.8, "lines"),
    legend.spacing.x = unit(0.5, "cm")
  )

print(p_labeled)

Faceted by Regulation Direction

# Create faceted plot by regulation direction
p_faceted <- sig_enrichment %>%
  ggplot(aes(
    x = es,
    y = neg_log10_fdr,
    size = annotated,
    color = neg_log10_pvalue
  )) +
  geom_point(alpha = 0.8, shape = 21, stroke = 0.5) +
  geom_vline(
    xintercept = 0,
    linetype = "dashed",
    color = "grey30",
    linewidth = 0.8
  ) +
  scale_color_gradient(
    low = "dodgerblue",
    high = "tomato",
    name = expression(-log[10](italic(p) - value))
  ) +
  scale_size_continuous(
    name = "Annotated\nLipids",
    range = c(3, 15)
  ) +
  xlab("Enrichment Score (ES)") +
  ylab(expression(-log[10](FDR ~ italic(q) - value))) +
  ggtitle("LION Lipid Enrichment by Regulation Direction") +
  facet_wrap(~ regulated, scales = "free_x") +
  theme_classic(base_size = 30) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 16),
    #strip.background = element_rect(fill = "grey90"),
    strip.text = element_text(face = "bold", size = 12),
    #panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
  )

print(p_faceted)

Enrichment Analysis Summary

Distribution of Enrichment Scores

# Histogram of enrichment scores
ggplot(enrichment, aes(x = es, fill = regulated)) +
  geom_histogram(bins = 30, alpha = 0.7, position = "identity") +
  scale_fill_manual(
    values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
    name = "Regulation"
  ) +
  xlab("Enrichment Score (ES)") +
  ylab("Count") +
  ggtitle("Distribution of Enrichment Scores") +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    plot.title = element_text(face = "bold")
  )

P-value vs FDR Comparison

# Scatter plot comparing p-value and FDR
ggplot(enrichment, aes(
  x = neg_log10_pvalue,
  y = neg_log10_fdr,
  color = regulated,
  size = annotated
)) +
  geom_point(alpha = 0.7) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
  scale_color_manual(
    values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
    name = "Regulation"
  ) +
  scale_size_continuous(name = "Annotated\nLipids") +
  xlab(expression(-log[10](italic(p) - value))) +
  ylab(expression(-log[10](FDR ~ italic(q) - value))) +
  ggtitle("P-value vs FDR: Multiple Testing Correction Effect") +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold")
  )

Category Size Distribution

# Boxplot of annotated lipids by regulation
ggplot(enrichment, aes(x = regulated, y = annotated, fill = regulated)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
  scale_fill_manual(
    values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
    guide = "none"
  ) +
  xlab("Regulation Direction") +
  ylab("Number of Annotated Lipids") +
  ggtitle("Distribution of Category Sizes") +
  theme_classic(base_size = 14) +
  theme(plot.title = element_text(face = "bold"))

Export Results

# Export processed enrichment data
write.csv(
  enrichment,
  file = "LION_enrichment_processed.csv",
  row.names = FALSE
)

# Export significant terms only
write.csv(
  sig_enrichment,
  file = "LION_enrichment_significant.csv",
  row.names = FALSE
)

# Save plots
ggsave(
  p_labeled,
  file = "LION_enrichment_bubble_labeled.svg",
  width = 14,
  height = 12
)

ggsave(
  p_faceted,
  file = "LION_enrichment_bubble_faceted.svg",
  width = 14,
  height = 8
)

Biological Interpretation

Key Findings

cat("=== Summary of LION Enrichment Analysis ===\n\n")

## === Summary of LION Enrichment Analysis ===

cat("Total enrichment terms tested:", nrow(enrichment), "\n")

## Total enrichment terms tested: 52

cat("Significant terms (FDR < 0.05):", sum(enrichment$is_significant), "\n\n")

## Significant terms (FDR < 0.05): 21

cat("Upregulated categories:", 
    sum(enrichment$is_significant & enrichment$regulated == "UP"), "\n")

## Upregulated categories: 4

cat("Downregulated categories:", 
    sum(enrichment$is_significant & enrichment$regulated == "DOWN"), "\n\n")

## Downregulated categories: 17

# Most enriched upregulated category
top_up_term <- enrichment %>%
  filter(regulated == "UP") %>%
  arrange(fdr_q_value) %>%
  slice(1)

cat("Most enriched UP category:\n")

## Most enriched UP category:

cat("  ", top_up_term$discription, "\n")

##    triacylglycerols [GL0301]

cat("   FDR:", format(top_up_term$fdr_q_value, scientific = TRUE, digits = 3), "\n")

##    FDR: 1.06e-11

cat("   ES:", round(top_up_term$es, 3), "\n")

##    ES: 0.796

cat("   Lipids:", top_up_term$annotated, "\n\n")

##    Lipids: 26

# Most enriched downregulated category
top_down_term <- enrichment %>%
  filter(regulated == "DOWN") %>%
  arrange(fdr_q_value) %>%
  slice(1)

cat("Most enriched DOWN category:\n")

## Most enriched DOWN category:

cat("  ", top_down_term$discription, "\n")

##    membrane component

cat("   FDR:", format(top_down_term$fdr_q_value, scientific = TRUE, digits = 3), "\n")

##    FDR: 4.03e-11

cat("   ES:", round(top_down_term$es, 3), "\n")

##    ES: -0.762

cat("   Lipids:", top_down_term$annotated, "\n")

##    Lipids: 78

Upregulated Under Low P (Storage Lipids)

The most significantly enriched upregulated categories include:

Triacylglycerols: Neutral storage lipids accumulated in lipid droplets
Lipid storage and Lipid droplet: Cellular compartments for energy storage
These findings suggest increased lipid storage under phosphorus deficiency

Downregulated Under Low P (Membrane Lipids)

The most significantly enriched downregulated categories include:

Membrane components: Structural lipids in cellular membranes
Glycerophospholipids: Major membrane phospholipids
Phosphatidylcholines and Phosphatidylethanolamines: Key membrane components
Polyunsaturated fatty acids: Important for membrane fluidity

These patterns indicate membrane remodeling under P stress, with reduction in phosphorus-containing membrane lipids (phospholipids) and compensatory increase in phosphorus-free storage lipids (triacylglycerols).

Session Information

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggpubr_0.6.2  ggrepel_0.9.6 janitor_2.2.1 dplyr_1.1.4   ggplot2_4.0.0
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.10        generics_0.1.4     tidyr_1.3.1        rstatix_0.7.3     
##  [5] stringi_1.8.7      digest_0.6.37      magrittr_2.0.4     evaluate_1.0.5    
##  [9] grid_4.5.1         timechange_0.3.0   RColorBrewer_1.1-3 fastmap_1.2.0     
## [13] jsonlite_2.0.0     backports_1.5.0    Formula_1.2-5      purrr_1.1.0       
## [17] scales_1.4.0       jquerylib_0.1.4    abind_1.4-8        cli_3.6.5         
## [21] rlang_1.1.6        withr_3.0.2        cachem_1.1.0       yaml_2.3.10       
## [25] tools_4.5.1        ggsignif_0.6.4     broom_1.0.10       vctrs_0.6.5       
## [29] R6_2.6.1           lifecycle_1.0.4    lubridate_1.9.4    snakecase_0.11.1  
## [33] stringr_1.5.2      car_3.1-3          pkgconfig_2.0.3    pillar_1.11.1     
## [37] bslib_0.9.0        gtable_0.3.6       glue_1.8.0         Rcpp_1.1.0        
## [41] xfun_0.53          tibble_3.3.0       tidyselect_1.2.1   rstudioapi_0.17.1 
## [45] knitr_1.50         farver_2.1.2       htmltools_0.5.8.1  labeling_0.4.3    
## [49] rmarkdown_2.30     carData_3.0-5      compiler_4.5.1     S7_0.2.0

LION Lipid Enrichment Analysis: Low P vs High P

Maize Genetics Analysis

2025-10-27