This analysis visualizes the results from LION (Lipid Ontology) enrichment analysis comparing lipid profiles between Low P and High P treatments. The enrichment identifies biological categories of lipids that are over-represented among differentially expressed lipids.
LION enrichment categories include:
library(ggplot2)
library(dplyr)
library(janitor)
library(ggrepel)
library(ggpubr)
# Load LION enrichment results
enrichment <- read.csv("LION-enrichment_LowPVSHighP.csv")
cat("Loaded", nrow(enrichment), "enrichment terms\n")
## Loaded 52 enrichment terms
cat("\nOriginal column names:\n")
##
## Original column names:
print(colnames(enrichment))
## [1] "Term.ID" "Discription" "Annotated" "p.value" "FDR.q.value"
## [6] "ES" "Regulated"
Use janitor to standardize column names to snake_case and remove special characters.
# Clean column names using janitor
enrichment <- enrichment %>%
clean_names()
cat("\nCleaned column names:\n")
##
## Cleaned column names:
print(colnames(enrichment))
## [1] "term_id" "discription" "annotated" "p_value" "fdr_q_value"
## [6] "es" "regulated"
# Display structure
cat("\nData structure:\n")
##
## Data structure:
str(enrichment)
## 'data.frame': 52 obs. of 7 variables:
## $ term_id : chr "LION:0000622" "LION:0012011" "LION:0012084" "LION:0012010" ...
## $ discription: chr "triacylglycerols [GL0301]" "lipid storage" "lipid droplet" "membrane component" ...
## $ annotated : int 26 26 26 78 54 50 47 43 11 4 ...
## $ p_value : num 6.1e-13 6.1e-13 6.1e-13 3.1e-12 1.4e-09 ...
## $ fdr_q_value: num 1.06e-11 1.06e-11 1.06e-11 4.03e-11 1.46e-08 ...
## $ es : num 0.796 0.796 0.796 -0.762 -0.605 ...
## $ regulated : chr "UP" "UP" "UP" "DOWN" ...
# Calculate -log10 transformations for visualization
enrichment <- enrichment %>%
mutate(
neg_log10_fdr = -log10(fdr_q_value),
neg_log10_pvalue = -log10(p_value)
)
# Summary statistics
cat("\nSummary of -log10(FDR):\n")
##
## Summary of -log10(FDR):
print(summary(enrichment$neg_log10_fdr))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1278 0.7429 1.0805 2.1634 1.7144 10.9747
cat("\nSummary of -log10(p-value):\n")
##
## Summary of -log10(p-value):
print(summary(enrichment$neg_log10_pvalue))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.1276 0.8652 1.3733 2.5580 2.2844 12.2147
cat("\nSummary of ES (Enrichment Score):\n")
##
## Summary of ES (Enrichment Score):
print(summary(enrichment$es))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.8911 -0.4900 -0.3856 -0.2952 -0.2989 0.7960
cat("\nSummary of Annotated (number of lipids):\n")
##
## Summary of Annotated (number of lipids):
print(summary(enrichment$annotated))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 7.00 12.00 17.23 23.00 78.00
# Define significance threshold
fdr_threshold <- 0.05
# Identify significant terms and create label column
enrichment <- enrichment %>%
mutate(
is_significant = fdr_q_value < fdr_threshold,
label = ifelse(is_significant, discription, "")
)
cat("\nSignificant terms (FDR < 0.05):",
sum(enrichment$is_significant), "/", nrow(enrichment), "\n")
##
## Significant terms (FDR < 0.05): 21 / 52
# Count by regulation direction
cat("\nSignificant terms by regulation:\n")
##
## Significant terms by regulation:
print(with(enrichment %>% filter(is_significant), table(regulated)))
## regulated
## DOWN UP
## 17 4
top_up <- enrichment %>%
filter(regulated == "UP") %>%
arrange(fdr_q_value) %>%
select(term_id, discription, annotated, p_value, fdr_q_value, es) %>%
head(10)
knitr::kable(
top_up,
caption = "Top 10 Upregulated Lipid Categories",
digits = 3
)
| term_id | discription | annotated | p_value | fdr_q_value | es |
|---|---|---|---|---|---|
| LION:0000622 | triacylglycerols [GL0301] | 26 | 0.000 | 0.000 | 0.796 |
| LION:0012011 | lipid storage | 26 | 0.000 | 0.000 | 0.796 |
| LION:0012084 | lipid droplet | 26 | 0.000 | 0.000 | 0.796 |
| LION:0000094 | headgroup with neutral charge | 50 | 0.000 | 0.000 | 0.589 |
| LION:0002956 | fatty acid with 17 carbons | 4 | 0.212 | 0.263 | 0.495 |
| LION:0000607 | diacylglycerols [GL0201] | 24 | 0.232 | 0.274 | 0.231 |
| LION:0002968 | saturated fatty acid | 6 | 0.271 | 0.306 | 0.394 |
| LION:0002966 | fatty acid with less than 2 double bonds | 11 | 0.582 | 0.600 | 0.232 |
top_down <- enrichment %>%
filter(regulated == "DOWN") %>%
arrange(fdr_q_value) %>%
select(term_id, discription, annotated, p_value, fdr_q_value, es) %>%
head(10)
knitr::kable(
top_down,
caption = "Top 10 Downregulated Lipid Categories",
digits = 3
)
| term_id | discription | annotated | p_value | fdr_q_value | es |
|---|---|---|---|---|---|
| LION:0012010 | membrane component | 78 | 0.000 | 0.000 | -0.762 |
| LION:0000003 | glycerophospholipids [GP] | 54 | 0.000 | 0.000 | -0.605 |
| LION:0012080 | endoplasmic reticulum (ER) | 47 | 0.000 | 0.000 | -0.468 |
| LION:0000095 | headgroup with positive charge / zwitter-ion | 43 | 0.000 | 0.001 | -0.421 |
| LION:0000093 | headgroup with negative charge | 11 | 0.000 | 0.001 | -0.649 |
| LION:0002967 | polyunsaturated fatty acid | 4 | 0.001 | 0.003 | -0.891 |
| LION:0080978 | average lateral diffusion | 11 | 0.004 | 0.017 | -0.537 |
| LION:0012081 | mitochondrion | 23 | 0.005 | 0.019 | -0.398 |
| LION:0000010 | glycerophosphocholines [GP01] | 27 | 0.005 | 0.019 | -0.373 |
| LION:0000059 | diacylglycerophosphoglycerols [GP0401] | 7 | 0.005 | 0.019 | -0.633 |
# Create bubble plot with all terms, labels only for significant
p_all <- enrichment %>%
ggplot(aes(
x = es,
y = neg_log10_fdr,
size = annotated,
color = neg_log10_pvalue,
label = label
)) +
geom_point(alpha = 0.7, shape = 21, stroke = 0.5) +
geom_text_repel(
max.overlaps = 20,
min.segment.length = 0,
size = 3,
box.padding = 0.5,
point.padding = 0.3,
force = 2
) +
geom_hline(
yintercept = -log10(0.05),
linetype = "dashed",
color = "grey30",
linewidth = 0.8
) +
scale_color_gradient(
low = "dodgerblue",
high = "tomato",
name = expression(-log[10](italic(p) - value))
) +
scale_size_continuous(
name = "Annotated\nLipids",
range = c(2, 12)
) +
xlab("Enrichment Score (ES)") +
ylab(expression(-log[10](FDR ~ italic(q) - value))) +
ggtitle("LION Lipid Enrichment: Low P vs High P") +
theme_classic(base_size = 14) +
theme(
legend.position = "right",
plot.title = element_text(face = "bold", size = 16),
panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
)
print(p_all)
# Filter to significant terms only
sig_enrichment <- enrichment %>%
filter(is_significant)
cat("Plotting", nrow(sig_enrichment), "significant terms\n")
## Plotting 21 significant terms
# Create bubble plot with significant terms
p_sig <- sig_enrichment %>%
ggplot(aes(
x = es,
y = neg_log10_fdr,
size = annotated,
color = neg_log10_pvalue,
label = label
)) +
geom_point(alpha = 0.8, shape = 21, stroke = 0.5) +
geom_hline(
yintercept = -log10(0.05),
linetype = "dashed",
color = "grey30",
linewidth = 0.8
) +
scale_color_gradient(
low = "dodgerblue",
high = "tomato",
name = expression(-log[10](italic(p) - value))
) +
scale_size_continuous(
name = "Annotated\nLipids",
range = c(3, 15)
) +
xlab("Enrichment Score (ES)") +
ylab(expression(-log[10](FDR ~ italic(q) - value))) +
ggtitle("LION Lipid Enrichment: Low P vs High P (Significant Terms)") +
theme_classic(base_size = 14) +
theme(
legend.position = "right",
plot.title = element_text(face = "bold", size = 16),
panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
)
print(p_sig)
This plot shows significant terms with optimized label placement
using ggrepel.
# Create bubble plot with labels for significant terms
p_labeled <- sig_enrichment %>%
# 1. Define only the core aesthetics (x, y, label) globally
ggplot(aes(x = es, y = annotated, label = label)) +
geom_vline(
xintercept = 0,
linetype = "dashed",
color = "grey30",
# 2. Use 'size' instead of 'linewidth' for broader compatibility
size = 0.8
) +
# 3. Map size and color aesthetics only to the points they apply to
geom_point(
aes(size = neg_log10_fdr, color = neg_log10_fdr),
alpha = 0.8,
shape = 19
) +
geom_text_repel(
aes(color = neg_log10_fdr),
max.overlaps = 10,
min.segment.length = 2,
size = 6,
box.padding = 0.2,
point.padding = 0.3,
force = 1
) +
# 4. Make legend titles consistent to merge them into one
scale_color_gradient(
low = "dodgerblue",
high = "tomato",
name = expression(-log[10](FDR))
) +
scale_size_continuous(
name = expression(-log[10](FDR)),
breaks = c(3, 6, 10)
) +
xlab("Enrichment Score (ES)") +
ylab("Count of Lipid Species") +
xlim(-2, 2) +
ylim(0, 80) +
#ggtitle("LION Lipid Enrichment: -P vs +P") +
ggpubr::theme_classic2(base_size = 25) +
theme(
plot.title = element_blank(),
legend.position = c(0.9, 0.8),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
legend.key.size = unit(0.8, "lines"),
legend.spacing.x = unit(0.5, "cm")
)
print(p_labeled)
# Create faceted plot by regulation direction
p_faceted <- sig_enrichment %>%
ggplot(aes(
x = es,
y = neg_log10_fdr,
size = annotated,
color = neg_log10_pvalue
)) +
geom_point(alpha = 0.8, shape = 21, stroke = 0.5) +
geom_vline(
xintercept = 0,
linetype = "dashed",
color = "grey30",
linewidth = 0.8
) +
scale_color_gradient(
low = "dodgerblue",
high = "tomato",
name = expression(-log[10](italic(p) - value))
) +
scale_size_continuous(
name = "Annotated\nLipids",
range = c(3, 15)
) +
xlab("Enrichment Score (ES)") +
ylab(expression(-log[10](FDR ~ italic(q) - value))) +
ggtitle("LION Lipid Enrichment by Regulation Direction") +
facet_wrap(~ regulated, scales = "free_x") +
theme_classic(base_size = 30) +
theme(
legend.position = "right",
plot.title = element_text(face = "bold", size = 16),
#strip.background = element_rect(fill = "grey90"),
strip.text = element_text(face = "bold", size = 12),
#panel.grid.major = element_line(color = "grey90", linewidth = 0.3)
)
print(p_faceted)
# Histogram of enrichment scores
ggplot(enrichment, aes(x = es, fill = regulated)) +
geom_histogram(bins = 30, alpha = 0.7, position = "identity") +
scale_fill_manual(
values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
name = "Regulation"
) +
xlab("Enrichment Score (ES)") +
ylab("Count") +
ggtitle("Distribution of Enrichment Scores") +
theme_classic(base_size = 14) +
theme(
legend.position = "top",
plot.title = element_text(face = "bold")
)
# Scatter plot comparing p-value and FDR
ggplot(enrichment, aes(
x = neg_log10_pvalue,
y = neg_log10_fdr,
color = regulated,
size = annotated
)) +
geom_point(alpha = 0.7) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "grey50") +
scale_color_manual(
values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
name = "Regulation"
) +
scale_size_continuous(name = "Annotated\nLipids") +
xlab(expression(-log[10](italic(p) - value))) +
ylab(expression(-log[10](FDR ~ italic(q) - value))) +
ggtitle("P-value vs FDR: Multiple Testing Correction Effect") +
theme_classic(base_size = 14) +
theme(
legend.position = "right",
plot.title = element_text(face = "bold")
)
# Boxplot of annotated lipids by regulation
ggplot(enrichment, aes(x = regulated, y = annotated, fill = regulated)) +
geom_boxplot(alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.5, size = 2) +
scale_fill_manual(
values = c("DOWN" = "dodgerblue", "UP" = "tomato"),
guide = "none"
) +
xlab("Regulation Direction") +
ylab("Number of Annotated Lipids") +
ggtitle("Distribution of Category Sizes") +
theme_classic(base_size = 14) +
theme(plot.title = element_text(face = "bold"))
# Export processed enrichment data
write.csv(
enrichment,
file = "LION_enrichment_processed.csv",
row.names = FALSE
)
# Export significant terms only
write.csv(
sig_enrichment,
file = "LION_enrichment_significant.csv",
row.names = FALSE
)
# Save plots
ggsave(
p_labeled,
file = "LION_enrichment_bubble_labeled.svg",
width = 14,
height = 12
)
ggsave(
p_faceted,
file = "LION_enrichment_bubble_faceted.svg",
width = 14,
height = 8
)
cat("=== Summary of LION Enrichment Analysis ===\n\n")
## === Summary of LION Enrichment Analysis ===
cat("Total enrichment terms tested:", nrow(enrichment), "\n")
## Total enrichment terms tested: 52
cat("Significant terms (FDR < 0.05):", sum(enrichment$is_significant), "\n\n")
## Significant terms (FDR < 0.05): 21
cat("Upregulated categories:",
sum(enrichment$is_significant & enrichment$regulated == "UP"), "\n")
## Upregulated categories: 4
cat("Downregulated categories:",
sum(enrichment$is_significant & enrichment$regulated == "DOWN"), "\n\n")
## Downregulated categories: 17
# Most enriched upregulated category
top_up_term <- enrichment %>%
filter(regulated == "UP") %>%
arrange(fdr_q_value) %>%
slice(1)
cat("Most enriched UP category:\n")
## Most enriched UP category:
cat(" ", top_up_term$discription, "\n")
## triacylglycerols [GL0301]
cat(" FDR:", format(top_up_term$fdr_q_value, scientific = TRUE, digits = 3), "\n")
## FDR: 1.06e-11
cat(" ES:", round(top_up_term$es, 3), "\n")
## ES: 0.796
cat(" Lipids:", top_up_term$annotated, "\n\n")
## Lipids: 26
# Most enriched downregulated category
top_down_term <- enrichment %>%
filter(regulated == "DOWN") %>%
arrange(fdr_q_value) %>%
slice(1)
cat("Most enriched DOWN category:\n")
## Most enriched DOWN category:
cat(" ", top_down_term$discription, "\n")
## membrane component
cat(" FDR:", format(top_down_term$fdr_q_value, scientific = TRUE, digits = 3), "\n")
## FDR: 4.03e-11
cat(" ES:", round(top_down_term$es, 3), "\n")
## ES: -0.762
cat(" Lipids:", top_down_term$annotated, "\n")
## Lipids: 78
The most significantly enriched upregulated categories include:
The most significantly enriched downregulated categories include:
These patterns indicate membrane remodeling under P stress, with reduction in phosphorus-containing membrane lipids (phospholipids) and compensatory increase in phosphorus-free storage lipids (triacylglycerols).
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.6.2 ggrepel_0.9.6 janitor_2.2.1 dplyr_1.1.4 ggplot2_4.0.0
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 tidyr_1.3.1 rstatix_0.7.3
## [5] stringi_1.8.7 digest_0.6.37 magrittr_2.0.4 evaluate_1.0.5
## [9] grid_4.5.1 timechange_0.3.0 RColorBrewer_1.1-3 fastmap_1.2.0
## [13] jsonlite_2.0.0 backports_1.5.0 Formula_1.2-5 purrr_1.1.0
## [17] scales_1.4.0 jquerylib_0.1.4 abind_1.4-8 cli_3.6.5
## [21] rlang_1.1.6 withr_3.0.2 cachem_1.1.0 yaml_2.3.10
## [25] tools_4.5.1 ggsignif_0.6.4 broom_1.0.10 vctrs_0.6.5
## [29] R6_2.6.1 lifecycle_1.0.4 lubridate_1.9.4 snakecase_0.11.1
## [33] stringr_1.5.2 car_3.1-3 pkgconfig_2.0.3 pillar_1.11.1
## [37] bslib_0.9.0 gtable_0.3.6 glue_1.8.0 Rcpp_1.1.0
## [41] xfun_0.53 tibble_3.3.0 tidyselect_1.2.1 rstudioapi_0.17.1
## [45] knitr_1.50 farver_2.1.2 htmltools_0.5.8.1 labeling_0.4.3
## [49] rmarkdown_2.30 carData_3.0-5 compiler_4.5.1 S7_0.2.0