Purpose: GO over-representation analysis for differential expression across Leaf development and Phosphorus deficiency.
Experimental predictors: - Leaf: Leaf development/senescence (FC threshold: ±0.7) - -P: Phosphorus deficiency (FC threshold: ±2) - Leaf:-P: Interaction term
library(dplyr)
library(ggplot2)
library(clusterProfiler)
library(tidyr)
library(forcats)
library(eulerr)
library(ggupset)
library(SuperExactTest)
library(patchwork)
library(ggplotify)
library(cowplot)
library(tibble)
# Load differential expression results
effects_df <- read.csv("~/Desktop/predictor_effects_leaf_interaction_model.csv")
# Load curated labels
curated <- read.csv("~/Desktop/selected_DEGs_curated_locus_label_2.csv") %>%
dplyr::select(gene, locus_label)
effects_df$locus_label[effects_df$locus_label == "A0A1D6NG77"] <- NA
# Merge curated labels
effects_df <- effects_df %>%
left_join(curated, by = "gene", suffix = c("", "_curated")) %>%
mutate(locus_label = coalesce(locus_label_curated, locus_label)) %>%
dplyr::select(-locus_label_curated)
# Curated updates: only gene and locus_label
curated <- tibble::tribble(
~gene, ~locus_label,
"Zm00001eb144680", "rns",
"Zm00001eb339870", "pld16",
"Zm00001eb289800", "pah1",
"Zm00001eb297970", "sqd2",
"Zm00001eb154650", "ppa1",
"Zm00001eb430590", "nrx3",
"Zm00001eb335670", "sqd3",
"Zm00001eb258130", "mgd3",
"Zm00001eb280120", "pfk1",
"Zm00001eb151650", "pap1",
"Zm00001eb130570", "sag21",
"Zm00001eb369590", "nrx1",
"Zm00001eb048730", "spx2",
"Zm00001eb238670", "pep2",
"Zm00001eb239700", "ppa2",
"Zm00001eb386270", "spx6",
"Zm00001eb370610", "rfk1",
"Zm00001eb247580", "ppck3",
"Zm00001eb277280", "gst19",
"Zm00001eb361620", "ppa2"
)
# Update only the locus_label using curated values
effects_df <- effects_df %>%
left_join(curated, by = "gene", suffix = c("", "_new")) %>%
mutate(locus_label = coalesce(locus_label_new, locus_label)) %>%
select(-locus_label_new)
# Load GO annotation data
TERM2NAME <- readRDS(
"/Users/fvrodriguez/Desktop/GOMAP_maize_B73_NAM5/TERM2NAME.rds"
)
TERM2GENE <- readRDS(
"/Users/fvrodriguez/Desktop/GOMAP_maize_B73_NAM5/TERM2GENE_Fattel_2024_full.rds"
)
# Gene universe
universe <- union(
effects_df$gene[effects_df$predictor == "Leaf"],
effects_df$gene[effects_df$predictor == "-P"]
)
cat("Universe size:", length(universe), "genes\n")
## Universe size: 24011 genes
# Extract gene lists by predictor and regulation direction
redundant_DEGs <- with(effects_df %>% filter(is_hiconf_DEG), {
list(
Leaf.Down = gene[predictor == "Leaf" & regulation == "Downregulated"],
Leaf.Up = gene[predictor == "Leaf" & regulation == "Upregulated"],
`-P.Down` = gene[predictor == "-P" & regulation == "Downregulated"],
`-P.Up` = gene[predictor == "-P" & regulation == "Upregulated"],
`Leaf:-P.Down` = gene[predictor == "Leaf:-P" & regulation == "Downregulated"],
`Leaf:-P.Up` = gene[predictor == "Leaf:-P" & regulation == "Upregulated"]
)
})
DEGs <- lapply(redundant_DEGs, unique)
n_genes <- sapply(DEGs, length)
cat("Number of DEGs per cluster:\n")
## Number of DEGs per cluster:
print(n_genes)
## Leaf.Down Leaf.Up -P.Down -P.Up Leaf:-P.Down Leaf:-P.Up
## 746 676 212 580 194 361
# --- 1. Prepare labels and DEG list ---
label_map <- c(
"Leaf.Down" = "Leaf Down",
"Leaf.Up" = "Leaf Up",
"-P.Down" = "-P Down",
"-P.Up" = "-P Up",
"Leaf:-P.Down" = "Negative Leaf × -P",
"Leaf:-P.Up" = "Positive Leaf × -P"
)
DEGs_renamed <- setNames(DEGs, label_map[names(DEGs)])
# --- 2. Generate Euler diagram and convert to ggplot ---
fit <- euler(DEGs_renamed)
v <- plot(
fit,
quantities = list(cex = 2),
labels = list(font = 2, cex = 2),
fills = list(
fill = c("#4393C3", "#D6604D", "#92C5DE", "#F4A582", "#2166AC", "#B2182B"),
alpha = 0.5
),
edges = list(col = "grey30", lwd = 1),
legend = FALSE
)
# Apply label repositioning logic
diagram <- v$children[[1]]$children[[1]]
tags <- diagram$children$tags$children
e <- fit$ellipses
center_h <- mean(e$h)
center_k <- mean(e$k)
scale_factor <- 0.92
for (i in seq_along(e$h)) {
curr_x <- e$h[i]; curr_y <- e$k[i]
dx <- curr_x - center_h; dy <- curr_y - center_k
dist <- sqrt(dx^2 + dy^2); if (dist == 0) dist <- 1
new_x <- curr_x + (dx / dist) * e$a[i] * scale_factor
new_y <- curr_y + (dy / dist) * e$b[i] * scale_factor
tags[[i]]$children[[1]]$x <- unit(new_x, "native")
tags[[i]]$children[[1]]$y <- unit(new_y, "native")
}
diagram$children$tags$children <- tags
v$children[[1]]$children[[1]] <- diagram
# --- 3. Generate filtered UpSet plot (significant intersections only) ---
N <- length(universe)
set_result <- supertest(DEGs, n = N)
supertest_df <- summary(set_result)$Table %>%
as.data.frame() %>%
mutate(
intersection = Reduce(
function(x, pattern) gsub(pattern, label_map[pattern], x, fixed = TRUE),
names(label_map),
init = Intersections
),
intersection = sapply(strsplit(intersection, " & "), function(x) paste(sort(x), collapse = ";")),
fold_enrichment = Observed.Overlap / pmax(Expected.Overlap, 0.1),
log2FE = log2(fold_enrichment)
) %>%
filter(P.value < 0.05)
# Membership only for significant intersections
deg_membership <- stack(DEGs) %>%
rename(gene = values, category = ind) %>%
mutate(category = label_map[as.character(category)]) %>%
group_by(gene) %>%
summarise(categories = list(as.character(category)), .groups = "drop") %>%
mutate(intersection = sapply(categories, function(x) paste(sort(x), collapse = ";"))) %>%
filter(intersection %in% supertest_df$intersection) %>%
left_join(supertest_df, by = "intersection")
set_order <- c(
"Leaf Down", "Leaf Up",
"-P Down", "-P Up",
"Negative Leaf × -P", "Positive Leaf × -P"
)
p_upset <- ggplot(deg_membership, aes(x = categories)) +
geom_bar(aes(fill = log2FE)) +
geom_text(stat = "count", aes(label = after_stat(count)),
vjust = -0.6, size = 8) +
scale_x_upset(
order_by = "freq",
sets = set_order
) +
scale_fill_gradient2(
low = "#2166AC", mid = "grey90", high = "#B2182B",
midpoint = 0, name = expression(log[2](Obs/Exp))
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.2))) +
labs(x = NULL, y = "Intersection Size") +
theme_classic(base_size = 25) +
theme(
legend.position = c(0.75,0.8),
legend.text = element_text(size = 20),
legend.title = element_text(size = 20,hjust = 1),
axis.text.y = element_text(size = 25),
axis.title.y = element_text(size = 25),
plot.title = element_text(size = 28, face = "bold", hjust = 0.5)
) +
theme_combmatrix(
combmatrix.panel.point.size = 8,
combmatrix.panel.line.size = 2,
combmatrix.label.text = element_text(size = 25)
)
#' Perform GO Enrichment Analysis
#'
#' @param x Character vector of gene IDs
#' @param ontology GO ontology ("BP", "MF", or "CC")
#' @return enrichResult object
ego <- function(x, ontology = "BP") {
enricher(
gene = x,
pvalueCutoff = 0.05,
pAdjustMethod = "none",
universe = universe,
minGSSize = 10,
maxGSSize = 500,
qvalueCutoff = 0.2,
TERM2GENE = TERM2GENE[TERM2GENE$GO %in% TERM2NAME[[ontology]]$go_id, ],
TERM2NAME = TERM2NAME[[ontology]]
)
}
# Run comparative GO enrichment
compGO_BP <- compareCluster(geneCluster = DEGs, fun = ego)
# Set factor levels
cluster_order <- names(DEGs)
levels(compGO_BP@compareClusterResult$Cluster) <- cluster_order
cat("Enriched terms per cluster:\n")
## Enriched terms per cluster:
print(table(compGO_BP@compareClusterResult$Cluster))
##
## Leaf.Down Leaf.Up -P.Down -P.Up Leaf:-P.Down Leaf:-P.Up
## 152 87 0 135 12 86
# --- 1. Extract annotated genes per cluster ---
annotated_genes <- compGO_BP@compareClusterResult %>%
separate_longer_delim(geneID, delim = "/") %>%
rename(gene = geneID) %>%
dplyr::select(Cluster, gene) %>%
distinct()
# Create gene lists by cluster
annotated_DEGs <- annotated_genes %>%
group_by(Cluster) %>%
summarise(genes = list(unique(gene)), .groups = "drop")
annotated_DEGs <- setNames(annotated_DEGs$genes, annotated_DEGs$Cluster)
cat("Number of annotated genes per cluster:\n")
## Number of annotated genes per cluster:
print(sapply(annotated_DEGs, length))
## Leaf.Down Leaf.Up -P.Up Leaf:-P.Down Leaf:-P.Up
## 301 204 241 29 121
# Check annotation coverage
all_clusters <- names(DEGs)
coverage_stats <- data.frame(
Cluster = all_clusters,
Original = sapply(DEGs, length),
stringsAsFactors = FALSE
)
coverage_stats$Annotated <- sapply(all_clusters, function(cl) {
if (cl %in% names(annotated_DEGs)) {
length(annotated_DEGs[[cl]])
} else {
0
}
})
coverage_stats <- coverage_stats %>%
mutate(Coverage_pct = round(100 * Annotated / Original, 1))
cat("\nAnnotation coverage by cluster:\n")
##
## Annotation coverage by cluster:
print(coverage_stats)
## Cluster Original Annotated Coverage_pct
## Leaf.Down Leaf.Down 746 301 40.3
## Leaf.Up Leaf.Up 676 204 30.2
## -P.Down -P.Down 212 0 0.0
## -P.Up -P.Up 580 241 41.6
## Leaf:-P.Down Leaf:-P.Down 194 29 14.9
## Leaf:-P.Up Leaf:-P.Up 361 121 33.5
# --- Rename annotated DEG list using label_map ---
annotated_DEGs_renamed <- setNames(
annotated_DEGs,
label_map[names(annotated_DEGs)]
)
# ===================================================================================
# >>> CORRECT EULER DIAGRAM: Color ONLY the input sets (eulerr auto-blends overlaps)
# ===================================================================================
# Define colors for each biological set (must match renamed labels exactly)
set_colors <- c(
"Leaf Down" = "#4393C3",
"Leaf Up" = "#D6604D",
"-P Up" = "#F4A582",
"Negative Leaf × -P" = "#2166AC",
"Positive Leaf × -P" = "#B2182B"
)
# Keep only sets that actually have annotated genes AND have a defined color
present_sets <- names(annotated_DEGs_renamed)
present_sets <- present_sets[present_sets %in% names(set_colors)]
annotated_DEGs_renamed <- annotated_DEGs_renamed[present_sets]
# Assign colors in the exact order of the sets passed to euler()
fill_colors_for_sets <- set_colors[present_sets]
cat("\nSets used in Euler plot:\n")
##
## Sets used in Euler plot:
print(present_sets)
## [1] "Leaf Down" "Leaf Up" "-P Up"
## [4] "Negative Leaf × -P" "Positive Leaf × -P"
cat("Colors assigned to sets (in order):\n")
## Colors assigned to sets (in order):
print(fill_colors_for_sets)
## Leaf Down Leaf Up -P Up Negative Leaf × -P
## "#4393C3" "#D6604D" "#F4A582" "#2166AC"
## Positive Leaf × -P
## "#B2182B"
# Fit and plot Euler diagram — ONLY pass fill for the input sets (not regions)
fit_annotated <- euler(annotated_DEGs_renamed)
p_venn_annotated <- plot(
fit_annotated,
quantities = list(cex = 2),
labels = list(font = 2, cex = 2),
fills = list(fill = fill_colors_for_sets, alpha = 0.5), # ← length = number of sets (e.g., 5)
edges = list(col = "grey30", lwd = 1),
legend = FALSE
)
# ===================================================================================
# >>> Coverage bar plot
# ===================================================================================
total_original <- sum(sapply(DEGs, length))
total_annotated <- sum(sapply(annotated_DEGs, length))
coverage_overall <- round(100 * total_annotated / total_original, 1)
label_map_short <- c(
"Leaf.Down" = "Leaf Down",
"Leaf.Up" = "Leaf Up",
"-P.Down" = "-P Down",
"-P.Up" = "-P Up",
"Leaf:-P.Down" = "Neg L×-P",
"Leaf:-P.Up" = "Pos L×-P"
)
coverage_plot_data <- coverage_stats %>%
mutate(
Cluster_label = label_map_short[Cluster],
Annotated_in_enriched = Annotated,
Not_annotated = Original - Annotated
) %>%
tidyr::pivot_longer(
cols = c(Annotated_in_enriched, Not_annotated),
names_to = "Category",
values_to = "Count"
) %>%
mutate(
Category = factor(Category, levels = c("Not_annotated", "Annotated_in_enriched"))
)
annotated_colors <- c(
"Leaf Down" = "#2166AC",
"Leaf Up" = "#B2182B",
"-P Down" = "#4393C3",
"-P Up" = "#D6604D",
"Neg L×-P" = "#053061",
"Pos L×-P" = "#67001F"
)
not_annotated_colors <- c(
"Leaf Down" = "#92C5DE",
"Leaf Up" = "#F4A582",
"-P Down" = "#D1E5F0",
"-P Up" = "#FDDBC7",
"Neg L×-P" = "#4393C3",
"Pos L×-P" = "#D6604D"
)
set_order_short <- c("Leaf Down", "Leaf Up", "-P Down", "-P Up", "Neg L×-P", "Pos L×-P")
coverage_plot_data <- coverage_plot_data %>%
mutate(
fill_color = ifelse(
Category == "Annotated_in_enriched",
annotated_colors[Cluster_label],
not_annotated_colors[Cluster_label]
),
Cluster_label = factor(Cluster_label, levels = rev(set_order_short))
)
p_coverage <- ggplot(coverage_plot_data, aes(x = Cluster_label, y = Count, fill = fill_color)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
geom_text(
data = coverage_stats %>%
filter(Annotated > 0) %>%
mutate(Cluster_label = factor(label_map_short[Cluster], levels = rev(set_order_short))),
aes(x = Cluster_label, y = 10, label = Annotated),
inherit.aes = FALSE, color = "white", size = 7, fontface = "bold", hjust = 0
) +
geom_text(
data = coverage_stats %>%
mutate(Cluster_label = factor(label_map_short[Cluster], levels = rev(set_order_short))),
aes(x = Cluster_label, y = Original, label = Original),
inherit.aes = FALSE, color = "black", size = 7, fontface = "bold", hjust = -0.2
) +
scale_fill_identity() +
coord_flip(clip = "off") +
scale_y_continuous(expand = expansion(mult = c(0.02, 0.15))) +
labs(x = NULL, y = "Number of genes", title = "High confidence DEGs\nin enriched GO BP terms") +
theme_classic(base_size = 25) +
theme(
plot.title = element_text(size = 28, face = "bold", hjust = 0.5),
axis.text.y = element_text(size = 25),
axis.text.x = element_text(size = 25),
axis.title.x = element_text(size = 25),
plot.margin = margin(t = 40, r = 5, b = 5, l = 5, unit = "pt")
)
# ===================================================================================
# >>> Final plot assembly
# ===================================================================================
p_venn_annotated_gg <- as.ggplot(p_venn_annotated) +
labs(title = sprintf("High confidence DEGs\nin enriched GO BP terms",
total_annotated, total_original, coverage_overall)) +
theme(
plot.title = element_text(size = 28, face = "bold", hjust = 0.5),
plot.margin = margin(t = 40, r = 5, b = 5, l = 5, unit = "pt")
)
p_venn_gg <- as.ggplot(v) +
labs(title = "High confidence DEGs") +
theme(plot.title = element_text(size = 28, face = "bold", hjust = 0.5))
combined_top <- plot_grid(p_venn_gg, p_upset, ncol = 2, rel_widths = c(1.5, 1), labels = c("A", "B"), label_size = 35)
combined_bottom <- plot_grid(p_coverage, p_venn_annotated_gg, ncol = 2, rel_widths = c(1, 1.5), labels = c("C", "D"), label_size = 35)
final_combined <- plot_grid(combined_top, combined_bottom, ncol = 1, rel_heights = c(1, 1))
print(final_combined)
ggsave(
final_combined,
file = "~/Desktop/FigS_DEG_analysis.svg",
width = 14,
height = 16
)
# Load and apply GO slim filter
# slim <- read.table("~/Desktop/gobig/gobig_maize/GO_terms/BP_GO_terms.tsv", header= FALSE, sep ="\t")
# colnames(slim) <- c("ID","ontlogy")
slim <- read.csv("~/Desktop/slim_to_plot.csv")
slim_comp <- compGO_BP
slim_comp@compareClusterResult <- slim_comp@compareClusterResult %>%
filter(ID %in% slim$ID)
cat("Curated terms per cluster:\n")
## Curated terms per cluster:
print(table(slim_comp@compareClusterResult$Cluster))
##
## Leaf.Down Leaf.Up -P.Down -P.Up Leaf:-P.Down Leaf:-P.Up
## 18 8 0 14 3 8
# Filter to terms with annotated gene symbols
with_known_gene <- slim_comp@compareClusterResult %>%
separate_longer_delim(geneID, delim = "/") %>%
rename(gene = geneID) %>%
inner_join(
effects_df %>%
dplyr::select(gene, locus_label, desc_merged) %>%
distinct(),
relationship = "many-to-many"
) %>%
filter(!is.na(locus_label) & !is.na(desc_merged)) %>%
pull(ID) %>%
unique()
cat("GO terms with annotated genes:", length(with_known_gene), "\n")
## GO terms with annotated genes: 35
# Select top 6 terms by p-value per cluster
sorted_by_p <- slim_comp@compareClusterResult %>%
filter(ID %in% with_known_gene) %>%
mutate(neglogP = -log10(p.adjust)) %>%
mutate(Cluster = factor(Cluster, levels = cluster_order)) %>%
droplevels() %>%
group_by(Cluster) %>%
arrange(Cluster, p.adjust) %>%
slice(1:4) %>%
mutate(label_order = 1:n()) %>%
mutate(Description = fct_reorder(Description, label_order))
top_terms <- levels(sorted_by_p$Description)
top_terms
## [1] "flavonoid biosynthetic process"
## [2] "cell wall biogenesis"
## [3] "meristem maintenance"
## [4] "regulation of meristem growth"
## [5] "amino acid transmembrane transport"
## [6] "response to nitrate"
## [7] "glycerolipid catabolic process"
## [8] "aging"
## [9] "cellular response to phosphate starvation"
## [10] "galactolipid biosynthetic process"
## [11] "detoxification"
## [12] "photosynthesis, light harvesting in photosystem I"
## [13] "glycosinolate metabolic process"
## [14] "leaf morphogenesis"
## [15] "shoot axis formation"
## [16] "response to growth hormone"
# Prepare enrichment data
to_plot <- slim_comp@compareClusterResult %>%
filter(ID %in% with_known_gene) %>%
mutate(Cluster = factor(Cluster, levels = cluster_order)) %>%
filter(Description %in% top_terms) %>%
mutate(neglogP = -log10(p.adjust)) %>%
group_by(Cluster) %>%
arrange(Cluster, p.adjust) %>%
mutate(Description = factor(Description, levels = top_terms)) %>%
mutate(label_order = 1:n()) %>%
mutate(Description = fct_reorder(Description, label_order))
# Prepare gene metadata
effects_gene_label <- effects_df %>%
filter(is_hiconf_DEG) %>%
mutate(Cluster = case_when(
regulation == "Upregulated" ~ paste(predictor, "Up", sep = "."),
regulation == "Downregulated" ~ paste(predictor, "Down", sep = ".")
)) %>%
dplyr::select(
Cluster, gene, locus_label, locus_symbol,
desc_merged, logFC, P = adj.P.Val, mahalanobis
) %>%
filter(!is.na(locus_label) & !is.na(locus_symbol) & !is.na(Cluster)) %>%
mutate(neglogPG = -log10(P))
# Map top gene per GO term by p-value
term2gene_label <- to_plot %>%
filter(ID %in% with_known_gene) %>%
separate_longer_delim(geneID, delim = "/") %>%
rename(gene = geneID) %>%
inner_join(effects_gene_label, relationship = "many-to-many") %>%
dplyr::select(
Cluster, gene, locus_label, ID, Description,
p.adjust, Count, mahalanobis, logFC, P, neglogPG
) %>%
distinct() %>%
group_by(Cluster, ID) %>%
arrange(Cluster, ID, P) %>%
slice(1)
# Manual fix for specific gene
is_diox <- term2gene_label$gene == "Zm00001eb419580"
term2gene_label$locus_label[is_diox] <- "diox"
# Set factor levels
levels(to_plot$Cluster) <- cluster_order
levels(term2gene_label$Cluster) <- cluster_order
# Format cluster labels
labels <- paste0(
gsub(".", " ", cluster_order, fixed = TRUE),
"\n(", n_genes, ")"
) %>%
gsub("Leaf:-P.Down", "Negative\nLeaf × -P", .) %>%
gsub("Leaf:-P.Up", "Positive\nLeaf × -P", .)
names(labels) <- names(n_genes)
# Manual fix for specific gene
is_diox <- term2gene_label$gene == "Zm00001eb419580"
term2gene_label$locus_label[is_diox] <- "diox"
# Create plot
annotation_plot <- to_plot %>%
ggplot(aes(x = Cluster, y = Description, size = Count, fill = neglogP)) +
ggtitle("GO Biological Process") +
scale_fill_continuous(
low = "dodgerblue",
high = "tomato",
name = "-log10(FDR)",
guide = guide_colorbar()
) +
scale_y_discrete(limits = rev(levels(to_plot$Description))) +
scale_x_discrete(
labels = labels,
drop = FALSE,
expand = expansion(add = c(0.5, 1))
) +
geom_point(shape = 21) +
geom_text(
data = term2gene_label,
position = position_nudge(0.1),
hjust = 0,
vjust = 0.5,
fontface = "italic",
size = 7,
mapping = aes(x = Cluster, y = Description, label = locus_label),
inherit.aes = FALSE
) +
scale_size(range = c(2, 8)) +
ylab(NULL) +
xlab(NULL) +
DOSE::theme_dose(font.size = 20) +
theme(
plot.title = element_text( size = 25),
axis.text.x = element_text(hjust = 0.5, size =20, face = "bold")
)
print(annotation_plot)
# Save for figure assembly
saveRDS(
annotation_plot,
file = "~/Desktop/figure_panels/go_panel.rds"
)
ggsave(
annotation_plot,
filename = "~/Desktop/figure_panels/go_panel.pdf",
height = 7,
width = 16,
units = "in"
)
# Export the data used to generate the GO plot
go_plot_data <- to_plot %>%
left_join(
term2gene_label %>%
dplyr::select(Cluster, ID, locus_label),
by = c("Cluster", "ID")
) %>%
dplyr::select(
Cluster, ID, Description, GeneRatio, BgRatio,
pvalue, p.adjust, qvalue, geneID, Count,
neglogP, locus_label
)
write.csv(
go_plot_data,
file = "~/Desktop/figure_panels/go_plot_input_data.csv",
row.names = FALSE
)
cat("Exported", nrow(go_plot_data), "GO terms to go_plot_input_data.csv\n")
## Exported 28 GO terms to go_plot_input_data.csv
p_annotation <- go_plot_data %>% filter(Description=="cellular response to phosphate starvation") %>% select(Cluster,geneID) %>%
separate_longer_delim(
cols = geneID,
delim = "/"
)
leafxP_0016036 <- effects_df %>%
filter(is_hiconf_DEG,gene %in% p_annotation$geneID & predictor=="Leaf:-P") %>%
dplyr::select(predictor, regulation,gene,locus_label, logFC, neglogP, desc_merged)
write.csv(
leafxP_0016036,
file = "~/Desktop/leafxP_0016036_genes.csv"
)
# Load required library
library(dplyr)
# Read your gene CSV file
gene_data <- read.csv("~/Desktop/leafxP_0016036_genes.csv", row.names = NULL)
# Prepare gene table data
gene_table_data <- gene_data %>%
mutate(
logFC = round(logFC, digits = 2),
neglogP = round(neglogP, digits = 2),
predictor_display = case_when(
predictor == "-P" ~ "Phosphorus Deficiency (-P)",
predictor == "Leaf" ~ "Leaf Tissue Position (Leaf)",
predictor == "Leaf:-P" ~ "Leaf × Phosphorus Interaction (Leaf:-P)",
TRUE ~ as.character(predictor)
),
ID = gene, # Use 'gene' column as ID
Locus_label = ifelse(locus_label == "", "-", locus_label) # Replace empty with "-"
) %>%
arrange(predictor, regulation != "Upregulated", desc(neglogP)) # Up first, then Down
# Function to generate LaTeX table matching your format
create_gene_table <- function(df) {
lines <- c()
lines <- c(lines, "\\begin{table}[h!]")
lines <- c(lines, "\\centering")
lines <- c(lines, "\\footnotesize")
lines <- c(lines, "\\caption{Selected Differentially Expressed Genes for Leaf Stage effect.}")
lines <- c(lines, "\\label{table::leafDEGs}")
lines <- c(lines, "\\begin{tabular}{ccp{7.5cm}cc}")
lines <- c(lines, "\\hline")
lines <- c(lines, "\\textbf{ID} & \\textbf{Locus label} & \\multicolumn{1}{c}{\\textbf{Description}} & \\textbf{$-\\log_{10}{\\textit{FDR}}$} & \\textbf{$\\log_2{\\text{FC}}$}\\\\")
lines <- c(lines, "\\hline")
for (pred in unique(df$predictor)) {
pred_data <- df %>% filter(predictor == pred)
# Add predictor header if more than one predictor exists
if (length(unique(df$predictor)) > 1) {
pred_name <- pred_data$predictor_display[1]
lines <- c(lines, sprintf("\\multicolumn{5}{l}{\\textit{\\textbf{%s}}} \\\\", pred_name))
lines <- c(lines, "\\hline")
}
# Handle Upregulated
up_data <- pred_data %>% filter(regulation == "Upregulated")
if (nrow(up_data) > 0) {
lines <- c(lines, "\\multicolumn{5}{l}{\\textit{\\textbf{Upregulated Genes}}} \\\\")
lines <- c(lines, "\\hline")
for (i in 1:nrow(up_data)) {
row <- up_data[i, ]
desc <- gsub("_", "\\_", row$desc_merged) # Escape underscores for LaTeX
lines <- c(lines, sprintf(
"%s & %s & %s & %.2f & %.2f\\\\",
row$ID,
row$Locus_label,
desc,
row$neglogP,
row$logFC
))
}
}
# Handle Downregulated
down_data <- pred_data %>% filter(regulation == "Downregulated")
if (nrow(down_data) > 0) {
lines <- c(lines, "\\multicolumn{5}{l}{\\textit{\\textbf{Downregulated Genes}}} \\\\")
lines <- c(lines, "\\hline")
for (i in 1:nrow(down_data)) {
row <- down_data[i, ]
desc <- gsub("_", "\\_", row$desc_merged) # Escape underscores
lines <- c(lines, sprintf(
"%s & %s & %s & %.2f & %.2f\\\\",
row$ID,
row$Locus_label,
desc,
row$neglogP,
row$logFC
))
}
}
lines <- c(lines, "\\hline")
}
lines <- c(lines, "\\end{tabular}")
lines <- c(lines, "\\end{table}")
return(paste(lines, collapse = "\n"))
}
# Generate and save LaTeX table
latex_gene_table <- create_gene_table(gene_table_data)
writeLines(
latex_gene_table,
con = "~/Desktop/leaf_p_interaction_genes_table.tex"
)
cat("✅ Gene LaTeX table saved to: leaf_p_interaction_genes_table.tex\n")
## ✅ Gene LaTeX table saved to: leaf_p_interaction_genes_table.tex
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] tibble_3.3.0 cowplot_1.2.0 ggplotify_0.1.3
## [4] patchwork_1.3.2 SuperExactTest_1.1.0 ggupset_0.4.1
## [7] eulerr_7.0.4 forcats_1.0.1 tidyr_1.3.1
## [10] clusterProfiler_4.18.2 ggplot2_4.0.1 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 gson_0.1.0 rlang_1.1.6
## [4] magrittr_2.0.4 DOSE_4.4.0 compiler_4.5.1
## [7] RSQLite_2.4.4 polylabelr_0.3.0 png_0.1-8
## [10] systemfonts_1.3.1 vctrs_0.6.5 reshape2_1.4.5
## [13] stringr_1.6.0 pkgconfig_2.0.3 crayon_1.5.3
## [16] fastmap_1.2.0 XVector_0.50.0 labeling_0.4.3
## [19] rmarkdown_2.30 enrichplot_1.30.3 ragg_1.5.0
## [22] purrr_1.2.0 bit_4.6.0 xfun_0.54
## [25] cachem_1.1.0 aplot_0.2.9 jsonlite_2.0.0
## [28] blob_1.2.4 BiocParallel_1.44.0 parallel_4.5.1
## [31] R6_2.6.1 bslib_0.9.0 stringi_1.8.7
## [34] RColorBrewer_1.1-3 jquerylib_0.1.4 GOSemSim_2.36.0
## [37] Rcpp_1.1.0 Seqinfo_1.0.0 knitr_1.50
## [40] ggtangle_0.0.8 R.utils_2.13.0 IRanges_2.44.0
## [43] Matrix_1.7-4 splines_4.5.1 igraph_2.2.1
## [46] tidyselect_1.2.1 qvalue_2.42.0 rstudioapi_0.17.1
## [49] yaml_2.3.10 codetools_0.2-20 lattice_0.22-7
## [52] plyr_1.8.9 Biobase_2.70.0 treeio_1.34.0
## [55] withr_3.0.2 KEGGREST_1.50.0 S7_0.2.1
## [58] evaluate_1.0.5 gridGraphics_0.5-1 polyclip_1.10-7
## [61] Biostrings_2.78.0 pillar_1.11.1 ggtree_4.0.1
## [64] stats4_4.5.1 ggfun_0.2.0 generics_0.1.4
## [67] S4Vectors_0.48.0 scales_1.4.0 tidytree_0.4.6
## [70] glue_1.8.0 gdtools_0.4.4 lazyeval_0.2.2
## [73] tools_4.5.1 data.table_1.17.8 fgsea_1.36.0
## [76] ggiraph_0.9.2 fs_1.6.6 fastmatch_1.1-6
## [79] ape_5.8-1 AnnotationDbi_1.72.0 nlme_3.1-168
## [82] cli_3.6.5 rappdirs_0.3.3 textshaping_1.0.4
## [85] fontBitstreamVera_0.1.1 gtable_0.3.6 R.methodsS3_1.8.2
## [88] yulab.utils_0.2.1 sass_0.4.10 digest_0.6.39
## [91] fontquiver_0.2.1 BiocGenerics_0.56.0 ggrepel_0.9.6
## [94] htmlwidgets_1.6.4 farver_2.1.2 memoise_2.0.1
## [97] htmltools_0.5.8.1 R.oo_1.27.1 lifecycle_1.0.4
## [100] httr_1.4.7 GO.db_3.22.0 fontLiberation_0.1.0
## [103] bit64_4.6.0-1