Core Functions for Expression Index Analysis
Calculate Expression Indices
#' Calculate transcriptomic expression indices (xpindex) for gene sets
#'
#' @param expression_matrix Matrix with genes as rows, samples as columns
#' @param gene_sets Named list of gene ID vectors
#' @param method Aggregation method: "sum", "mean", or "median"
#' @return Data frame with sample-level xpindex values
calculate_xpindex <- function(expression_matrix,
gene_sets,
method = "mean") {
indices <- lapply(names(gene_sets), function(set_name) {
genes_in_set <- gene_sets[[set_name]]
genes_available <- intersect(genes_in_set, rownames(expression_matrix))
if (length(genes_available) == 0) {
warning(paste("No genes from", set_name, "found in expression matrix"))
return(rep(NA, ncol(expression_matrix)))
}
set_expression <- expression_matrix[genes_available, , drop = FALSE]
index <- switch(method,
"sum" = colSums(set_expression, na.rm = TRUE),
"mean" = colMeans(set_expression, na.rm = TRUE),
"median" = apply(set_expression, 2, median, na.rm = TRUE),
stop("method must be 'sum', 'mean', or 'median'")
)
cat(sprintf(
"%s: %d/%d genes found\n",
set_name, length(genes_available), length(genes_in_set)
))
index
})
indices_df <- as.data.frame(indices)
names(indices_df) <- names(gene_sets)
indices_df$sample <- colnames(expression_matrix)
indices_df %>%
select(sample, everything()) %>%
left_join(
extract_sample_metadata(colnames(expression_matrix)),
by = "sample"
)
}
Normalize Expression Indices
#' Min-max normalization for transcriptomic expression indices
#'
#' @param xpindex_data Data frame with xpindex columns
#' @param index_names Character vector of column names to normalize
#' @param expression_matrix Optional: original expression matrix
#' @param gene_sets Optional: named list of gene vectors
#' @param method Normalization scope: "per_set", "per_union", or "global"
#'
#' @return Data frame with normalized xpindex values (0-1 scale)
normalize_xpindex <- function(xpindex_data,
index_names,
expression_matrix = NULL,
gene_sets = NULL,
method = "per_set") {
stopifnot(
method %in% c("per_set", "per_union", "global"),
is.data.frame(xpindex_data),
all(index_names %in% names(xpindex_data))
)
if (method == "per_set") {
xpindex_data %>%
mutate(across(
all_of(index_names),
~ (. - min(.)) / (max(.) - min(.))
))
} else if (method == "per_union") {
stopifnot(
!is.null(expression_matrix),
!is.null(gene_sets)
)
union_genes <- unique(unlist(gene_sets))
union_genes <- union_genes[union_genes %in% rownames(expression_matrix)]
global_min <- min(expression_matrix[union_genes, ], na.rm = TRUE)
global_max <- max(expression_matrix[union_genes, ], na.rm = TRUE)
xpindex_data %>%
mutate(across(
all_of(index_names),
~ (. - global_min) / (global_max - global_min)
))
} else if (method == "global") {
stopifnot(!is.null(expression_matrix))
global_min <- min(expression_matrix, na.rm = TRUE)
global_max <- max(expression_matrix, na.rm = TRUE)
xpindex_data %>%
mutate(across(
all_of(index_names),
~ (. - global_min) / (global_max - global_min)
))
}
}
Summarize Expression Indices
#' Prepare summary statistics for transcriptomic expression indices
#'
#' @param xpindex_data Data frame with normalized xpindex values
#' @param index_names Character vector of xpindex column names
#' @param group_vars Character vector of grouping variables
#'
#' @return Data frame with mean and SE for each xpindex and group
summarize_xpindex <- function(xpindex_data,
index_names,
group_vars = "leaf_stage") {
summary_data <- xpindex_data %>%
group_by(across(all_of(group_vars))) %>%
summarise(
across(
all_of(index_names),
list(mean = mean, se = ~ sd(.) / sqrt(n())),
.names = "{.col}_{.fn}"
),
.groups = "drop"
)
summary_long <- summary_data %>%
pivot_longer(
cols = -all_of(group_vars),
names_to = c("xpindex", ".value"),
names_pattern = "(.+)_(mean|se)"
)
summary_long
}
Statistical Modeling Functions
#' Convert p-values to significance stars
#'
#' @param p Numeric vector of p-values
#' @return Character vector with significance stars
star_significance <- function(p) {
sapply(p, function(x) {
if (is.na(x)) return("ns")
if (x < 0.0001) return("****")
if (x < 0.001) return("***")
if (x < 0.01) return("**")
if (x < 0.05) return("*")
return("ns")
})
}
#' Fit linear models for expression indices
#'
#' @param xpindex_data Data frame with xpindex values
#' @param index_names Character vector of xpindex column names
#' @param formula_rhs Right-hand side of formula
#'
#' @return Named list of fitted lm objects
fit_xpindex_models <- function(xpindex_data,
index_names,
formula_rhs = "leaf_stage * treatment") {
models <- lapply(index_names, function(xpindex) {
formula_str <- paste(xpindex, "~", formula_rhs)
lm(as.formula(formula_str), data = xpindex_data)
})
names(models) <- index_names
models
}
#' Extract statistics from expression index linear models
#'
#' @param model_list Named list of lm objects
#' @return Data frame with model statistics for all predictors
extract_xpindex_stats <- function(model_list) {
results <- lapply(names(model_list), function(index_name) {
model <- model_list[[index_name]]
model_summary <- summary(model)
coef_summary <- coef(model_summary)
predictors <- rownames(coef_summary)[-1]
data.frame(
xpindex = tools::toTitleCase(index_name),
predictor = predictors,
effect = coef_summary[predictors, "Estimate"],
std_err = coef_summary[predictors, "Std. Error"],
p_value = coef_summary[predictors, "Pr(>|t|)"],
r2 = model_summary$r.squared,
row.names = NULL,
stringsAsFactors = FALSE
)
})
bind_rows(results) %>%
mutate(
p_adj = p.adjust(p_value, method = "fdr"),
p_signif = star_significance(p_adj)
) %>%
arrange(xpindex, predictor, p_value) %>%
as_tibble()
}
Phase 1: Exploration
Purpose: Explore different gene sets and xpindex
calculations to understand patterns before constructing the main
figure.
Pigment Biosynthesis Indices (CornCyc Pathways)
Purpose: Calculate xpindex for pigment biosynthesis
using CornCyc pathway annotations.
Approach: Extract gene sets from CornCyc, calculate
mean xpindex, normalize per_set, and visualize developmental
trajectories.
corncyc <- read.table(
"/Users/fvrodriguez/Desktop/corncyc_pathways.20251021",
sep = "\t",
header = TRUE,
quote = ""
)
pathway_ids <- list(
chlorophyll = c("CHLOROPHYLL-SYN", "PWY-5068"),
carotenoid = "CAROTENOID-PWY",
anthocyanin = "PWY-5125",
flavonoids = "PWY1F-FLAVSYN"
)
corncyc_gene_sets <- lapply(names(pathway_ids), function(pigment) {
corncyc_genes <- corncyc %>%
filter(Pathway.id %in% pathway_ids[[pigment]]) %>%
pull(Protein.id) %>%
gsub("ZM00001EB", "Zm00001eb", .) %>%
gsub("_P\\d+$", "", ., perl = TRUE)
missing <- corncyc_genes[!grepl("Zm00001eb", corncyc_genes)]
if (length(missing) > 0) {
cat(pigment, "- Protein.id missing:",
paste(missing, collapse = ", "), "\n")
}
corncyc_genes[grepl("Zm00001eb", corncyc_genes)] %>% unique()
})
## chlorophyll - Protein.id missing: GDQC-106314-MONOMER, GDQC-112611-MONOMER, MONOMERDQC-5752, GDQC-114541-MONOMER, GDQC-108880-MONOMER
## carotenoid - Protein.id missing: MONOMER-15665, GBWI-61910-MONOMER, GBWI-61910-MONOMER, MONOMER-15665, MONOMER-15661
names(corncyc_gene_sets) <- names(pathway_ids)
{
cat("\n=== Pigment Gene Set Sizes ===\n")
print(sapply(corncyc_gene_sets, length))
}
##
## === Pigment Gene Set Sizes ===
## chlorophyll carotenoid anthocyanin flavonoids
## 32 35 15 51
corncyc_xpindex <- calculate_xpindex(
expression_matrix = normalized_expression,
gene_sets = corncyc_gene_sets,
method = "mean"
)
## chlorophyll: 23/32 genes found
## carotenoid: 29/35 genes found
## anthocyanin: 10/15 genes found
## flavonoids: 32/51 genes found
corncyc_xpindex_norm <- normalize_xpindex(
xpindex_data = corncyc_xpindex,
index_names = names(pathway_ids),
expression_matrix = normalized_expression,
gene_sets = corncyc_gene_sets,
method = "per_set"
)
{
cat("\n=== Normalized Pigment Index Ranges ===\n")
corncyc_xpindex_norm %>%
summarise(across(
all_of(names(pathway_ids)),
list(min = min, max = max),
.names = "{.col}_{.fn}"
)) %>%
pivot_longer(
cols = everything(),
names_to = c("index", "stat"),
names_pattern = "(.+)_(min|max)"
) %>%
pivot_wider(
names_from = stat,
values_from = value
) %>%
print()
}
##
## === Normalized Pigment Index Ranges ===
## # A tibble: 4 × 3
## index min max
## <chr> <dbl> <dbl>
## 1 chlorophyll 0 1
## 2 carotenoid 0 1
## 3 anthocyanin 0 1
## 4 flavonoids 0 1
corncyc_summary <- summarize_xpindex(
xpindex_data = corncyc_xpindex_norm,
index_names = names(pathway_ids),
group_vars = "leaf_stage"
)
base_size <- 30
p_pigments <- corncyc_summary %>%
filter(xpindex != "flavonoids") %>%
mutate(xpindex = tools::toTitleCase(xpindex)) %>%
ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
geom_line(linewidth = 1.5) +
geom_point(size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
width = 0.2,
linewidth = 0.8
) +
labs(
x = "Leaf",
y = "Normalized Index",
color = "Pigment"
) +
scale_color_manual(
values = c(
"Chlorophyll" = "darkgreen",
"Carotenoid" = "gold",
"Anthocyanin" = "purple4"
)
) +
theme_classic(base_size = base_size) +
theme(legend.position = c(0.9, 0.9))
print(p_pigments)

ggsave(
"~/Desktop/corncyc_pigment_biosynthesis_indices.png",
plot = p_pigments,
width = 12,
height = 8,
dpi = 300
)
Photosynthesis vs Senescence by Leaf Stage
Purpose: Visualize photosynthesis and senescence
xpindex trajectories with treatment effects.
Approach: Calculate GO-based xpindex, normalize
per_set, and plot developmental patterns by treatment.
v3_v5 <- read.table(
file = "/Users/fvrodriguez/Desktop/B73_gene_xref/B73v3_to_B73v5.tsv",
sep = "\t",
header = FALSE
) %>%
rename(v3 = "V1", v5 = "V2") %>%
separate_longer_delim(cols = v5, delim = ",")
v4_v5 <- read.table(
file = "/Users/fvrodriguez/Desktop/B73_gene_xref/B73v4_to_B73v5.tsv",
sep = "\t",
header = FALSE
) %>%
rename(v4 = "V1", v5 = "V2") %>%
separate_longer_delim(cols = v5, delim = ",")
sag_orthologs <- read.csv("~/Desktop/SAG_orthologs.csv") %>%
janitor::clean_names() %>%
inner_join(v3_v5, by = c(gene_id_maize = "v3"))
staygreen_network <- read.csv(
"~/Desktop/staygreen_network_sekhon2019.csv",
na.strings = ""
) %>%
janitor::clean_names() %>%
inner_join(v4_v5, by = c(gene = "v4"))
natural_senescence <- read.csv("~/Desktop/natural_senescence.csv") %>%
janitor::clean_names() %>%
inner_join(v3_v5, by = c(gene_id = "v3"))
photosynthesis_genes <- TERM2GENE %>%
filter(GO %in% c(
"GO:0015979",
"GO:0019684",
"GO:0009768"
)) %>%
pull(gene) %>%
unique()
leaf_senescence_genes <- TERM2GENE %>%
filter(GO %in% c("GO:0010150")) %>%
pull(gene) %>%
unique()
gene_set_list <- list(
photosynthesis = intersect(photosynthesis_genes, universe),
senescence = intersect(leaf_senescence_genes, universe)
)
{
cat("\n=== Gene Set Sizes ===\n")
print(sapply(gene_set_list, length))
}
##
## === Gene Set Sizes ===
## photosynthesis senescence
## 525 157
xpindex_data <- calculate_xpindex(
expression_matrix = normalized_expression,
gene_sets = gene_set_list,
method = "mean"
)
## photosynthesis: 525/525 genes found
## senescence: 157/157 genes found
xpindex_norm <- normalize_xpindex(
xpindex_data = xpindex_data,
index_names = names(gene_set_list),
method = "per_set"
)
xpindex_summary <- summarize_xpindex(
xpindex_data = xpindex_norm,
index_names = names(gene_set_list),
group_vars = c("leaf_stage", "treatment")
) %>%
mutate(
xpindex = factor(
tools::toTitleCase(xpindex),
levels = c("Photosynthesis", "Senescence"),
labels = c("Photosynthesis", "Leaf Senescence")
)
)
p_by_treatment <- ggplot(
xpindex_summary,
aes(x = leaf_stage, y = mean, color = xpindex, linetype = treatment)
) +
geom_line(linewidth = 1.5) +
geom_point(size = 4) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
width = 0.2,
linewidth = 1
) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen",
"Leaf Senescence" = "orange")
) +
scale_linetype_manual(
values = c("+P" = "solid", "-P" = "dashed")
) +
labs(
x = "Leaf",
y = "Normalized Expression Index",
color = NULL,
linetype = "Treatment"
) +
theme_classic(base_size = base_size) +
theme(legend.position = "right")
print(p_by_treatment)

Main Figure: Left Panel Components
Purpose: Create publication-quality composite figure
showing normalized xpindex, individual gene trajectories, and marker
gene validation.
Panel A: Normalized Gene Set Indices
xpindex_summary_pooled <- summarize_xpindex(
xpindex_data = xpindex_norm,
index_names = names(gene_set_list),
group_vars = "leaf_stage"
) %>%
mutate(
xpindex = factor(
tools::toTitleCase(xpindex),
levels = c("Photosynthesis", "Senescence"),
labels = c("Photosynthesis", "Leaf Senescence")
)
)
lm_photo <- lm(photosynthesis ~ leaf_stage, data = xpindex_norm)
lm_sen <- lm(senescence ~ leaf_stage, data = xpindex_norm)
photo_stats <- summary(lm_photo)
sen_stats <- summary(lm_sen)
photo_r2 <- photo_stats$r.squared
photo_p <- coef(photo_stats)["leaf_stage", "Pr(>|t|)"]
sen_r2 <- sen_stats$r.squared
sen_p <- coef(sen_stats)["leaf_stage", "Pr(>|t|)"]
format_pval <- function(p) {
if (p < 0.001) return("p < 0.001")
if (p < 0.01) return(sprintf("p = %.3f", p))
return(sprintf("p = %.2f", p))
}
stats_text <- sprintf(
"R² = %.3f\n %s\n\n\nR² = %.3f\n %s",
photo_r2, format_pval(photo_p),
sen_r2, format_pval(sen_p)
)
base_family <- "Helvetica"
up <- 58
p_normalized <- ggplot(
xpindex_summary_pooled,
aes(x = leaf_stage, y = mean, color = xpindex)
) +
geom_line(linewidth = 1.5) +
geom_point(size = 4) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
width = 0.2,
linewidth = 1
) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen",
"Leaf Senescence" = "orange")
) +
annotate(
"text",
x = 1.0,
y = 0.45,
label = stats_text,
hjust = 0,
vjust = 0,
size = 4
) +
labs(
x = "Leaf",
y = "Normalized Expression Index",
color = NULL
) +
ggpubr::theme_classic2(base_size = base_size) +
theme(
legend.position = c(0.7, 0.9),
legend.text = element_text(size = 20),
legend.background = element_rect(fill = "transparent"),
legend.margin = margin(r = 50, unit = "pt"),
plot.margin = margin(5.5, 5.5, up, 7, "pt")
)
print(p_normalized)

Panel B: Spaghetti Plot (Individual Gene Trajectories)
xp_photo_genes <- intersect(
gene_set_list$photosynthesis,
rownames(normalized_expression)
)
xp_photosynthesis <- normalized_expression[xp_photo_genes, ] %>%
as.data.frame() %>%
tibble::rownames_to_column("gene") %>%
pivot_longer(-gene, names_to = "sample", values_to = "expression") %>%
mutate(
leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
) %>%
group_by(gene) %>%
mutate(centered_expr = expression - mean(expression)) %>%
ungroup() %>%
group_by(gene, leaf_stage) %>%
summarise(mean_expr = mean(centered_expr), .groups = "drop") %>%
arrange(gene)
xp_senescence_genes <- intersect(
gene_set_list$senescence,
rownames(normalized_expression)
)
xp_senescence <- normalized_expression[xp_senescence_genes, ] %>%
as.data.frame() %>%
tibble::rownames_to_column("gene") %>%
pivot_longer(-gene, names_to = "sample", values_to = "expression") %>%
mutate(
leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
) %>%
group_by(gene) %>%
mutate(centered_expr = expression - mean(expression)) %>%
ungroup() %>%
group_by(gene, leaf_stage) %>%
summarise(mean_expr = mean(centered_expr), .groups = "drop") %>%
arrange(gene)
{
cat("\n=== Spaghetti Plot Gene Counts ===\n")
cat("Photosynthesis genes:", length(xp_photo_genes), "\n")
cat("Senescence genes:", length(xp_senescence_genes), "\n")
}
##
## === Spaghetti Plot Gene Counts ===
## Photosynthesis genes: 525
## Senescence genes: 157
p_spaghetti <- xp_photosynthesis %>%
ggplot(aes(x = leaf_stage, y = mean_expr, group = gene)) +
geom_line(alpha = 0.1, linewidth = 0.5, color = "darkgreen") +
geom_smooth(
data = xp_photosynthesis,
aes(x = leaf_stage, y = mean_expr, group = 1),
method = "lm",
se = FALSE,
linewidth = 2,
color = "darkgreen"
) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
geom_line(
data = xp_senescence,
aes(x = leaf_stage, y = mean_expr, group = gene),
alpha = 0.1,
linewidth = 0.5,
color = "orange"
) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
geom_smooth(
data = xp_senescence,
aes(x = leaf_stage, y = mean_expr, group = 1),
method = "lm",
se = FALSE,
linewidth = 2,
color = "orange"
) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
labs(
x = "Leaf",
y = expression("Centered " * log[10] * "(CPM)")
) +
coord_cartesian(ylim = c(-1, 1)) +
scale_y_continuous(position = "right") +
ggpubr::theme_classic2(base_size = base_size) +
theme(plot.margin = margin(5.5, 7, up, 5.5, "pt"))
print(p_spaghetti)

Combine Top Panels
p_top_panel <- ggarrange(
p_normalized + rremove("xlab"),
p_spaghetti + rremove("xlab"),
ncol = 2
) %>%
annotate_figure(
bottom = text_grob("Leaf", size = base_size, vjust = -2)
)
print(p_top_panel)

Panel C: Marker Gene Validation
marker_genes <- data.frame(
locus_label = c("pep1", "salt1", "ssu1", "mir3",
"nye2", "sgrl1", "cab1", "dapat3"),
gene_set = c("Photosynthesis", "Senescence",
"Photosynthesis", "Senescence",
"Senescence", "Senescence",
"Photosynthesis", "Senescence"),
pair = c("Pair1", "Pair1", "Pair2", "Pair2",
"Pair3", "Pair3", "Pair4", "Pair4"),
stringsAsFactors = FALSE
)
gene_ids <- effects_df %>%
filter(locus_label %in% marker_genes$locus_label, !is.na(locus_label)) %>%
select(gene, locus_label, desc_merged) %>%
distinct() %>%
group_by(locus_label) %>%
slice(1) %>%
ungroup()
marker_data <- lapply(seq_len(nrow(gene_ids)), function(i) {
g <- gene_ids$gene[i]
data.frame(
sample = colnames(normalized_expression),
expression = normalized_expression[g, ],
gene = g
)
}) %>%
bind_rows() %>%
left_join(gene_ids, by = "gene") %>%
left_join(marker_genes, by = "locus_label") %>%
mutate(
leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
)
marker_summary <- marker_data %>%
group_by(gene, locus_label, gene_set, desc_merged, pair, leaf_stage) %>%
summarise(
mean_expr = mean(expression),
se_expr = sd(expression) / sqrt(n()),
.groups = "drop"
)
pair_max_expr <- marker_summary %>%
group_by(pair) %>%
summarise(max_expr = max(mean_expr), .groups = "drop") %>%
arrange(desc(max_expr))
marker_summary_colored <- marker_summary %>%
select(pair, locus_label, desc_merged, gene_set) %>%
distinct() %>%
mutate(
wrapped_text = stringr::str_wrap(desc_merged, width = 25),
with_br = gsub("\n", "<br>", wrapped_text),
with_color = case_when(
gene_set == "Photosynthesis" ~
paste0("<span style='color:darkgreen'>", with_br, "</span>"),
gene_set == "Senescence" ~
paste0("<span style='color:orange'>", with_br, "</span>"),
TRUE ~ with_br
),
sort_order = ifelse(gene_set == "Photosynthesis", 1, 2)
)
pair_labels <- marker_summary_colored %>%
arrange(pair, sort_order) %>%
group_by(pair) %>%
summarise(
facet_label = paste(with_color, collapse = "<br><br>"),
.groups = "drop"
) %>%
left_join(pair_max_expr, by = "pair") %>%
arrange(desc(max_expr))
marker_summary <- marker_summary %>%
left_join(
pair_labels %>% select(pair, facet_label, max_expr),
by = "pair"
) %>%
mutate(
facet_label = factor(facet_label, levels = pair_labels$facet_label)
)
p_markers <- ggplot(
marker_summary,
aes(x = leaf_stage, y = mean_expr, color = gene_set, group = gene)
) +
geom_line(linewidth = 1.5) +
geom_point(size = 5) +
geom_errorbar(
aes(ymin = mean_expr - se_expr, ymax = mean_expr + se_expr),
width = 0.2,
linewidth = 0.8
) +
geom_text(
data = marker_summary %>% filter(leaf_stage == 1),
aes(x = leaf_stage,
y = mean_expr,
label = locus_label,
color = gene_set),
nudge_x = -0.5,
hjust = 1,
fontface = "bold",
size = 5
) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen", "Senescence" = "orange")
) +
scale_x_continuous(expand = expansion(add = c(1.5, 0.2)), breaks = 1:4) +
facet_wrap(
~ facet_label,
nrow = 2,
scales = "free_y"
) +
labs(
x = "Leaf",
y = expression(log[10] * "(CPM)"),
color = NULL
) +
theme_classic(base_size = base_size) +
theme(
strip.background = element_blank(),
strip.text = element_markdown(size = 15, hjust = 0, face = "bold"),
plot.margin = margin(-50, 90, 5.5, 5.5, "pt"),
axis.title.y = element_text(margin = margin(r = -10)),
panel.spacing.x = unit(0, "cm"),
panel.spacing.y = unit(0, "cm"),
legend.position = "none"
)
print(p_markers)

Combine All Panels: Left Panel
left_panel <- cowplot::plot_grid(
p_top_panel, p_markers,
ncol = 1,
align = 'h',
axis = 'lr'
)
ggsave(
"~/Desktop/left_panel.png",
plot = left_panel,
width = 9,
height = 15,
dpi = 150
)
print(left_panel)

Gene Set Enrichment Analysis
Custom Enrichment Function
#' Fisher's Exact Test for Custom Gene Set Enrichment
#'
#' @param deg_lists Named list of DEG vectors by cluster
#' @param annotation Named list of gene sets
#' @param universe Background gene set
#' @return Data frame with enrichment results
test_custom_enrichment <- function(deg_lists, annotation, universe) {
annotation_filtered <- lapply(annotation, function(x) intersect(x, universe))
annotation_filtered <- annotation_filtered[
sapply(annotation_filtered, length) > 0
]
results <- list()
for (cluster_name in names(deg_lists)) {
deg_genes <- deg_lists[[cluster_name]]
for (term_name in names(annotation_filtered)) {
term_genes <- annotation_filtered[[term_name]]
overlap <- intersect(deg_genes, term_genes)
in_both <- length(overlap)
in_deg_only <- length(setdiff(deg_genes, term_genes))
in_term_only <- length(setdiff(term_genes, deg_genes))
in_neither <- length(universe) - in_both - in_deg_only - in_term_only
contingency <- matrix(
c(in_both, in_deg_only, in_term_only, in_neither),
nrow = 2
)
fisher_result <- fisher.test(contingency, alternative = "greater")
results[[length(results) + 1]] <- data.frame(
Cluster = cluster_name,
ID = term_name,
Description = term_name,
GeneRatio = paste0(in_both, "/", length(deg_genes)),
BgRatio = paste0(length(term_genes), "/", length(universe)),
pvalue = fisher_result$p.value,
geneID = paste(overlap, collapse = "/"),
Count = in_both,
stringsAsFactors = FALSE
)
}
}
results_df <- do.call(rbind, results)
results_df$p.adjust <- p.adjust(results_df$pvalue, method = "BH")
results_df %>% arrange(p.adjust)
}
#' Extract DEG Stats for Custom Annotation
#'
#' @param annotation_id Term ID to filter
#' @param gene_term_df Data frame with gene-term mappings
#' @param effects_data DEG results
#' @param filter_hiconf Filter to high-confidence DEGs
#' @return Tibble with DEG statistics
get_annotation_degs <- function(annotation_id,
gene_term_df,
effects_data,
filter_hiconf = TRUE) {
genes_in_category <- gene_term_df %>%
filter(ID == annotation_id) %>%
pull(gene)
result <- effects_data %>%
filter(gene %in% genes_in_category)
if (filter_hiconf) {
result <- result %>% filter(is_hiconf_DEG)
}
result %>%
select(
predictor:gene, locus_label, desc_merged,
logFC, neglogP, mahalanobis
) %>%
group_by(predictor, regulation) %>%
arrange(predictor, regulation, -mahalanobis) %>%
tibble()
}
Build Custom Annotation
custom_annotation <- c(
list(
staygreen = staygreen_network$v5,
sag = sag_orthologs$v5,
photosynthesis = gene_set_list$photosynthesis,
leaf_senescence = gene_set_list$senescence
),
natural_senescence %>%
select(term = set, gene = v5) %>%
distinct() %>%
split(.$term) %>%
lapply(function(x) x$gene)
)
{
cat("\n=== Custom Annotation Terms ===\n")
cat("Number of terms:", length(custom_annotation), "\n")
print(sapply(custom_annotation, length))
}
##
## === Custom Annotation Terms ===
## Number of terms: 6
## staygreen sag photosynthesis leaf_senescence natural
## 771 48 525 157 3681
## natural_induced
## 1075
Run Enrichment Tests
DEGs <- with(effects_df %>% filter(is_hiconf_DEG), {
list(
Leaf.Down = unique(gene[
predictor == "Leaf" & regulation == "Downregulated"
]),
Leaf.Up = unique(gene[predictor == "Leaf" & regulation == "Upregulated"]),
`-P.Down` = unique(gene[
predictor == "-P" & regulation == "Downregulated"
]),
`-P.Up` = unique(gene[predictor == "-P" & regulation == "Upregulated"])
)
})
custom_enrichment_results <- test_custom_enrichment(
DEGs,
custom_annotation,
universe
) %>%
filter(p.adjust < 0.05)
{
cat("\n=== Enrichment Results (FDR < 0.05) ===\n")
custom_enrichment_results %>%
select(-geneID) %>%
print()
}
##
## === Enrichment Results (FDR < 0.05) ===
## Cluster ID Description GeneRatio BgRatio pvalue
## 1 Leaf.Up natural natural 175/634 3284/24011 7.312839e-21
## 2 Leaf.Up natural_induced natural_induced 83/634 991/24011 8.520661e-21
## 3 -P.Up natural_induced natural_induced 83/661 991/24011 1.205018e-19
## 4 -P.Up natural natural 161/661 3284/24011 6.002531e-14
## 5 Leaf.Up sag sag 8/634 40/24011 8.242430e-06
## 6 Leaf.Up staygreen staygreen 32/634 572/24011 6.252709e-05
## 7 -P.Up staygreen staygreen 27/661 572/24011 4.943556e-03
## 8 -P.Up sag sag 5/661 40/24011 4.620103e-03
## Count p.adjust
## 1 175 1.022479e-19
## 2 83 1.022479e-19
## 3 83 9.640141e-19
## 4 161 3.601519e-13
## 5 8 3.956366e-05
## 6 32 2.501084e-04
## 7 27 1.483067e-02
## 8 5 1.483067e-02
Gene-Level Statistics
genes_in_custom_terms <- custom_enrichment_results %>%
separate_longer_delim(geneID, delim = "/") %>%
rename(gene = "geneID") %>%
select(Cluster, ID, Description, gene) %>%
inner_join(
effects_df %>% select(gene, locus_label, desc_merged) %>% distinct(),
by = "gene"
)
{
cat("\n=== Natural Senescence DEGs (Leaf effect) ===\n")
get_annotation_degs(
annotation_id = "natural",
gene_term_df = genes_in_custom_terms,
effects_data = effects_df,
filter_hiconf = TRUE
) %>%
filter(predictor == "Leaf") %>%
print(n = 50)
}
##
## === Natural Senescence DEGs (Leaf effect) ===
## # A tibble: 176 × 8
## predictor regulation gene locus_label desc_merged logFC neglogP mahalanobis
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Leaf Downregul… Zm00… glp1 germin-lik… -0.787 4.28 30.0
## 2 Leaf Upregulat… Zm00… osm1 osmotin-li… 3.19 4.03 382.
## 3 Leaf Upregulat… Zm00… nactf122 NAC-transc… 2.12 6.07 177.
## 4 Leaf Upregulat… Zm00… ox1 Peroxidase 2.02 5.41 160.
## 5 Leaf Upregulat… Zm00… pht2 phosphate … 1.97 6.67 157.
## 6 Leaf Upregulat… Zm00… <NA> Indole-2-m… 1.91 7.53 152.
## 7 Leaf Upregulat… Zm00… <NA> <NA> 1.92 4.22 142.
## 8 Leaf Upregulat… Zm00… <NA> Ergosterol… 1.76 7.14 131.
## 9 Leaf Upregulat… Zm00… fad15 fatty acid… 1.76 6.39 127.
## 10 Leaf Upregulat… Zm00… <NA> Outer arm … 1.66 8.84 127.
## 11 Leaf Upregulat… Zm00… fad17 fatty acid… 1.75 6.07 126.
## 12 Leaf Upregulat… Zm00… fomt4 flavonoid … 1.78 4.57 124.
## 13 Leaf Upregulat… Zm00… <NA> Chemocyanin 1.75 4.20 119.
## 14 Leaf Upregulat… Zm00… prp11 pathogenes… 1.72 5.05 117.
## 15 Leaf Upregulat… Zm00… prt1 Putative c… 1.45 10.9 116.
## 16 Leaf Upregulat… Zm00… <NA> Tryptamine… 1.67 4.62 110.
## 17 Leaf Upregulat… Zm00… <NA> Germin-lik… 1.66 3.06 105.
## 18 Leaf Upregulat… Zm00… prp4 pathogenes… 1.54 6.77 103.
## 19 Leaf Upregulat… Zm00… <NA> Cytochrome… 1.61 4.56 102.
## 20 Leaf Upregulat… Zm00… <NA> Cellulose … 1.43 8.23 97.9
## 21 Leaf Upregulat… Zm00… <NA> Peptidase … 1.59 3.72 97.8
## 22 Leaf Upregulat… Zm00… chn33 chitinase33 1.52 5.35 95.0
## 23 Leaf Upregulat… Zm00… <NA> Putative a… 1.54 4.39 93.4
## 24 Leaf Upregulat… Zm00… <NA> Mono-/di-a… 1.37 8.52 92.8
## 25 Leaf Upregulat… Zm00… prp9 pathogenes… 1.51 5.10 92.7
## 26 Leaf Upregulat… Zm00… <NA> Carbohydra… 1.46 6.12 90.7
## 27 Leaf Upregulat… Zm00… <NA> Armadillo 1.41 7.25 90.5
## 28 Leaf Upregulat… Zm00… <NA> Alpha-gluc… 1.44 5.91 87.6
## 29 Leaf Upregulat… Zm00… pip2d plasma mem… 1.50 3.55 87.5
## 30 Leaf Upregulat… Zm00… <NA> Probable p… 1.32 8.12 85.2
## 31 Leaf Upregulat… Zm00… <NA> Benzyl alc… 1.40 6.27 84.7
## 32 Leaf Upregulat… Zm00… <NA> <NA> 1.40 6.25 84.7
## 33 Leaf Upregulat… Zm00… ks4 kaurene sy… 1.37 6.77 83.9
## 34 Leaf Upregulat… Zm00… lbd41 LBD-transc… 1.46 3.58 83.2
## 35 Leaf Upregulat… Zm00… <NA> sphinganin… 1.40 5.74 82.7
## 36 Leaf Upregulat… Zm00… <NA> Cysteine-r… 1.40 5.25 81.3
## 37 Leaf Upregulat… Zm00… <NA> Epimerase … 1.32 6.72 79.1
## 38 Leaf Upregulat… Zm00… nactf108 NAC-transc… 1.17 9.20 78.5
## 39 Leaf Upregulat… Zm00… <NA> <NA> 1.28 7.17 77.4
## 40 Leaf Upregulat… Zm00… <NA> Putative r… 1.40 3.49 76.7
## 41 Leaf Upregulat… Zm00… <NA> Receptor-l… 1.14 9.33 76.3
## 42 Leaf Upregulat… Zm00… <NA> Cysteine d… 1.08 10.00 76.0
## 43 Leaf Upregulat… Zm00… <NA> Bowman-Bir… 1.40 3.46 75.8
## 44 Leaf Upregulat… Zm00… <NA> Putrescine… 1.17 8.76 75.7
## 45 Leaf Upregulat… Zm00… <NA> Type IV in… 1.30 5.91 73.8
## 46 Leaf Upregulat… Zm00… red1 Tropinone … 0.960 10.8 72.5
## 47 Leaf Upregulat… Zm00… <NA> Protein ki… 1.20 7.74 72.3
## 48 Leaf Upregulat… Zm00… <NA> Fucosyltra… 1.33 3.90 70.7
## 49 Leaf Upregulat… Zm00… bhlh62 bHLH-trans… 1.18 7.71 70.4
## 50 Leaf Upregulat… Zm00… chn17 chitinase17 1.24 6.43 70.0
## # ℹ 126 more rows
Build custom chlorophyll gene sets —
Manually cuarte sets
# Load CornCyc again if not in environment (safe re-load)
corncyc <- read.table(
"/Users/fvrodriguez/Desktop/corncyc_pathways.20251021",
sep = "\t",
header = TRUE,
quote = ""
)
# Extract synthesis genes: CHLOROPHYLL-SYN + PWY-5068
chloro_synthesis_genes <- corncyc %>%
filter(Pathway.id %in% c("CHLOROPHYLL-SYN", "PWY-5068")) %>%
pull(Protein.id) %>%
gsub("ZM00001EB", "Zm00001eb", .) %>%
gsub("_P\\d+$", "", ., perl = TRUE) %>%
grep("Zm00001eb", ., value = TRUE) %>%
unique()
# Extract degradation genes from PWY-5098
chloro_degradation_base <- corncyc %>%
filter(Pathway.id == "PWY-5098") %>%
pull(Protein.id) %>%
gsub("ZM00001EB", "Zm00001eb", .) %>%
gsub("_P\\d+$", "", ., perl = TRUE) %>%
grep("Zm00001eb", ., value = TRUE) %>%
unique()
# Manually add your curated degradation genes (using maizegdb_id = rownames in expression matrix)
manual_degradation_genes <- c(
"Zm00001eb307720", # ZmCHL
"Zm00001eb047780", # ZmCHL2
"Zm00001eb231810", # ZmPPH
"Zm00001eb103480", # ZmSGR
"Zm00001eb076680", # ZmSGRL
"Zm00001eb140370" # ZmPAO
)
chloro_degradation_genes <- unique(c(chloro_degradation_base, manual_degradation_genes))
# Create named list
chloro_gene_sets <- list(
chlorophyll_synthesis = chloro_synthesis_genes,
chlorophyll_degradation = chloro_degradation_genes
)
Calculate and normalize xpindex
cat("=== Chlorophyll Gene Sets ===\n")
## === Chlorophyll Gene Sets ===
cat("Synthesis genes:", length(chloro_gene_sets$chlorophyll_synthesis), "\n")
## Synthesis genes: 32
cat("Degradation genes:", length(chloro_gene_sets$chlorophyll_degradation), "\n")
## Degradation genes: 10
chloro_xpindex <- calculate_xpindex(
expression_matrix = normalized_expression,
gene_sets = chloro_gene_sets,
method = "mean"
)
## chlorophyll_synthesis: 23/32 genes found
## chlorophyll_degradation: 9/10 genes found
chloro_xpindex_norm <- normalize_xpindex(
xpindex_data = chloro_xpindex,
index_names = names(chloro_gene_sets),
method = "per_set"
)
lm(data=chloro_xpindex, chlorophyll_degradation~leaf_stage*treatment )
##
## Call:
## lm(formula = chlorophyll_degradation ~ leaf_stage * treatment,
## data = chloro_xpindex)
##
## Coefficients:
## (Intercept) leaf_stage treatment-P
## 5.38762 0.06527 -0.17022
## leaf_stage:treatment-P
## 0.15017
Summarize for plotting
# Pooled (all leaves, ignore treatment)
summary_pooled <- summarize_xpindex(
xpindex_data = chloro_xpindex_norm,
index_names = names(chloro_gene_sets),
group_vars = "leaf_stage"
) %>%
mutate(
xpindex = case_when(
xpindex == "chlorophyll_synthesis" ~ "Synthesis",
xpindex == "chlorophyll_degradation" ~ "Degradation"
),
xpindex = factor(xpindex, levels = c("Synthesis", "Degradation"))
)
# By treatment
summary_by_treatment <- summarize_xpindex(
xpindex_data = chloro_xpindex_norm,
index_names = names(chloro_gene_sets),
group_vars = c("leaf_stage", "treatment")
) %>%
mutate(
xpindex = case_when(
xpindex == "chlorophyll_synthesis" ~ "Synthesis",
xpindex == "chlorophyll_degradation" ~ "Degradation"
),
xpindex = factor(xpindex, levels = c("Synthesis", "Degradation"))
)
Plotting 1
p_normalized <- ggplot(
xpindex_summary_pooled,
aes(x = leaf_stage, y = mean, color = xpindex)
) +
geom_line(linewidth = 1.5) +
geom_point(size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
width = 0.2,
linewidth = 1
) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen",
"Leaf Senescence" = "orange")
) +
# annotate(
# "text",
# x = 1.0,
# y = 0.45,
# label = stats_text,
# hjust = 0,
# vjust = 0,
# size = 4
# ) +
labs(
x = "Leaf",
y = "Normalized Expression",
color = NULL
) +
ggpubr::theme_classic2(base_size = base_size) +
theme(
legend.position = c(0.7, 0.9),
legend.text = element_text(size = 25),
legend.background = element_rect(fill = "transparent"),
legend.margin = margin(r = 50, unit = "pt"),
plot.margin = margin(5.5, 5.5, up, 7, "pt")
)
print(p_normalized)

base_size <- 30
# Plot 1: Pooled
p_chloro_pooled <- summary_pooled %>%
ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
geom_line(linewidth = 1.5) +
geom_point(size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
width = 0.2,
linewidth = 0.8
) +
scale_color_manual(
values = c("Synthesis" = "darkgreen",
"Degradation" = "darkorange")
) +
labs(
y= "Normalized Expression",
x = "Leaf",
color = NULL
) +
theme_classic(base_size = base_size) +
theme(legend.position = "none")
print(p_chloro_pooled)

ggsave(
"~/Desktop/chlorophyll_synthesis_vs_degradation_pooled.png",
plot = p_chloro_pooled,
width = 10,
height = 6,
dpi = 300
)
Plotting 2 joins
pd <- position_dodge(width = 0.2)
# Plot 2: By treatment
p_chloro_treatment <- summary_by_treatment %>%
ggplot(aes(x = leaf_stage, y = mean,
color = xpindex,
fill = xpindex,
linetype = treatment,
shape=treatment)) +
geom_line(linewidth = 2) +
geom_point(position = pd,
size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
position = pd,
width = 0.2,
linewidth = 1,
linetype = "solid",
) +
scale_x_continuous(
expand=expansion(add = c(0.2,0.2))
) +
scale_y_continuous(
expand=expansion(add = c(0,0.2)),
breaks = 2*(0:5)/10
) +
scale_color_manual(
values = c("Synthesis" = "darkgreen",
"Degradation" = "darkorange"),
labels = c("Chlorophyll\nSynthesis","Degradation")
) +
scale_fill_manual(
values = c("Synthesis" = "darkgreen",
"Degradation" = "darkorange", guide= "none")
) +
scale_linetype_manual(
values = c("+P" = "solid", "-P" = "dashed", guide= "none")
) +
scale_shape_manual(
values = c("+P" = 19, "-P" = 25)
) +
labs(
x = "Leaf",
y = "Normalized Expression",
color = NULL,
shape = NULL,
) +
guides(
fill = "none",
linetype = "none",
shape = guide_legend(
override.aes = list(fill = "black"),
order = 2,
#label.position = "left",
#label.hjust = 1 ,
reverse = TRUE
),
color = guide_legend(
override.aes = list(shape = NA),
order = 1,
# label.position = "left",
# label.hjust = 1,
)
)+
theme_classic(base_size = base_size) +
theme(
legend.title = element_blank(),
legend.position = c(0.1, 1),
legend.justification = c(0, 1),
legend.text = element_text(
size = 25,
margin = margin(l =unit(2, "lines"))
),
legend.box = "horizontal",
legend.box.just = "bottom",
legend.margin = margin(0, 0, 0, 0),
legend.spacing.x = unit(1, "lines"),
legend.key.spacing.x = unit(2, "lines"),
legend.key.size = unit(2, "lines"),
legend.key = element_rect(fill = "transparent"),
legend.background = element_rect(fill = "transparent")
)
p_chloro <- ggpubr::ggarrange(p_chloro_pooled,
p_chloro_treatment,
align="hv")
p_chloro

ggsave(
"~/Desktop/chlorophyll_synthesis_vs_degradation_by_treatment.png",
plot = p_chloro,
width = 12,
height = 7,
dpi = 300
)
# --- Step 1: Prepare both datasets with consistent structure ---
p_photo_sene <- xpindex_summary %>%
ggplot(aes(x = leaf_stage, y = mean,
color = xpindex,
fill = xpindex,
linetype = treatment,
shape=treatment)) +
geom_line(linewidth = 1.5) +
geom_point(position = pd,
size = 5) +
geom_errorbar(
aes(ymin = mean - se, ymax = mean + se),
position = pd,
width = 0.2,
linewidth = 1,
linetype = "solid",
) +
scale_y_continuous(
expand=expansion(add = c(0,0.2))
) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen",
"Leaf Senescence" = "darkorange"),
) +
scale_fill_manual(
values = c("Photosynthesis" = "darkgreen",
"Leaf Senescence" = "darkorange"),
guide= "none"
) +
scale_linetype_manual(
values = c("+P" = "solid", "-P" = "dashed", guide= "none")
) +
scale_shape_manual(
values = c("+P" = 19, "-P" = 25),
guide="none"
) +
labs(
x = "Leaf",
y = "Normalized Expression",
color = NULL,
shape = NULL,
) +
guides(
fill = "none",
linetype = "none",
shape="none",
color = guide_legend(
override.aes = list(shape = NA),
order = 1,
#label.position = "left",
#label.hjust = 1,
)
)+
theme_classic(base_size = base_size) +
theme(
legend.title = element_blank(),
legend.position = c(0, 1),
legend.justification = c(0, 1),
legend.text = element_text(
size = 25,
margin = margin(l =unit(2, "lines")) ),
# legend.text.align = 1,
# legend.justification = "center",
# legend.box.just = "right",
# legend.margin = margin(0, 0, 0, 0),
# legend.spacing.y = unit(0, "lines"),
# legend.key.spacing.y = unit(0, "lines"),
legend.key.size = unit(2, "lines"),
legend.key = element_rect(fill = "transparent"),
legend.background = element_rect(fill = "transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank()
)
print(p_photo_sene)

p_combined_by_treatment <- ggpubr::ggarrange(
p_chloro_treatment + rremove("xlab"),
p_photo_sene + rremove("xlab"),
nrow=1,
widths= c(1.4,1)) %>%
annotate_figure(
bottom = text_grob("Leaf", size = base_size,
vjust = -0.25,
hjust = 0)
)
p_combined_by_treatment

ggsave(
"~/Desktop/combined_by_treatment_xpindex.png",
plot = p_combined_by_treatment,
width = 9.25,
height = 9,
dpi = 150
)
Pooled indices
# Common y-axis: 0 to 1, breaks every 0.2
common_y_scale <- scale_y_continuous(
limits = c(0, 1),
breaks = seq(0, 1, by = 0.2),
expand = expansion(add = c(0, 0.2))
)
# Left panel: Chlorophyll
p_chloro_pooled_legend <- summary_pooled %>%
ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
geom_line(linewidth = 1.5) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, linewidth = 0.8) +
scale_color_manual(
values = c("Synthesis" = "darkgreen", "Degradation" = "darkorange"),
labels = c("Chlorophyll\nSynthesis", "Degradation")
) +
common_y_scale +
labs(x = "Leaf", y = "Normalized Expression") +
theme_classic(base_size = base_size) +
theme(
legend.position = c(0, 1),
legend.justification = c(0, 1),
legend.text = element_text(
size = 25,
margin = margin(l =unit(2, "lines"))
),
legend.title = element_blank(),
legend.key = element_rect(fill = "transparent"),
legend.background = element_rect(fill = "transparent")
)
# Right panel: Photosynthesis & Senescence
p_normalized_legend <- ggplot(xpindex_summary_pooled,
aes(x = leaf_stage, y = mean, color = xpindex)) +
geom_line(linewidth = 1.5) +
geom_point(size = 4) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, linewidth = 1) +
scale_color_manual(
values = c("Photosynthesis" = "darkgreen", "Leaf Senescence" = "orange"),
labels = c("Photosynthesis", "Leaf Senescence")
) +
common_y_scale +
labs(x = "Leaf", y = "Normalized Expression") +
ggpubr::theme_classic2(base_size = base_size) +
theme(
legend.position = c(0, 1),
legend.justification = c(0, 1),
legend.text = element_text(
size = 25,
margin = margin(l =unit(2, "lines"))
),
legend.title = element_blank(),
legend.key = element_rect(fill = "transparent"),
legend.background = element_rect(fill = "transparent"),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank()
)
# Combine
p_combined_pooled_separate_legends <- ggpubr::ggarrange(
p_chloro_pooled_legend + rremove("xlab"),
p_normalized_legend + rremove("xlab"),
nrow = 1,
widths = c(1.33, 1)
) %>%
annotate_figure(
bottom = text_grob("Leaf", size = base_size, vjust = -0.3, hjust = 0)
)
print(p_combined_pooled_separate_legends)

ggsave(
"~/Desktop/combined_pooled_xpindex_separate_legends.png",
plot = p_combined_pooled_separate_legends,
width = 8.2,
height = 7,
dpi = 150
)
Caluclate model stats
# --- Define combined gene sets ---
panel_sets <- c(chloro_gene_sets, gene_set_list)
# --- Calculate and normalize expression indices ---
panel_data <- calculate_xpindex(
expression_matrix = normalized_expression,
gene_sets = panel_sets,
method = "mean"
)
## chlorophyll_synthesis: 23/32 genes found
## chlorophyll_degradation: 9/10 genes found
## photosynthesis: 525/525 genes found
## senescence: 157/157 genes found
panel_norm <- normalize_xpindex(
xpindex_data = panel_data,
index_names = names(panel_sets),
method = "per_set"
)
# --- Model 1: Full model with interaction (leaf_stage * treatment) ---
cat("=== MODEL 1: Leaf Stage × Treatment Interaction ===\n")
## === MODEL 1: Leaf Stage × Treatment Interaction ===
panel_models_interaction <- fit_xpindex_models(
xpindex_data = panel_norm,
index_names = names(panel_sets),
formula_rhs = "leaf_stage * treatment"
)
stats_interaction <- extract_xpindex_stats(panel_models_interaction)
print(stats_interaction)
## # A tibble: 12 × 8
## xpindex predictor effect std_err p_value r2 p_adj p_signif
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chlorophyll_degrada… leaf_sta… 0.0376 0.0203 7.13e-2 0.517 0.107 ns
## 2 Chlorophyll_degrada… leaf_sta… 0.0864 0.0309 7.96e-3 0.517 0.0159 *
## 3 Chlorophyll_degrada… treatmen… -0.0980 0.0832 2.46e-1 0.517 0.246 ns
## 4 Chlorophyll_synthes… leaf_sta… -0.0897 0.0275 2.29e-3 0.572 0.00688 **
## 5 Chlorophyll_synthes… leaf_sta… -0.109 0.0419 1.29e-2 0.572 0.0221 *
## 6 Chlorophyll_synthes… treatmen… 0.191 0.113 9.81e-2 0.572 0.118 ns
## 7 Photosynthesis leaf_sta… -0.0820 0.0210 3.55e-4 0.684 0.00393 **
## 8 Photosynthesis leaf_sta… -0.118 0.0320 6.56e-4 0.684 0.00393 **
## 9 Photosynthesis treatmen… 0.283 0.0860 2.15e-3 0.684 0.00688 **
## 10 Senescence leaf_sta… 0.0546 0.0316 9.14e-2 0.549 0.118 ns
## 11 Senescence leaf_sta… 0.151 0.0481 3.18e-3 0.549 0.00763 **
## 12 Senescence treatmen… -0.176 0.130 1.83e-1 0.549 0.200 ns
# --- Model 2: Additive model (leaf_stage + treatment) ---
cat("\n=== MODEL 2: Additive Effects (Leaf Stage + Treatment) ===\n")
##
## === MODEL 2: Additive Effects (Leaf Stage + Treatment) ===
panel_models_additive <- fit_xpindex_models(
xpindex_data = panel_norm,
index_names = names(panel_sets),
formula_rhs = "leaf_stage + treatment"
)
stats_additive <- extract_xpindex_stats(panel_models_additive)
print(stats_additive)
## # A tibble: 8 × 8
## xpindex predictor effect std_err p_value r2 p_adj p_signif
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chlorophyll_degrada… leaf_sta… 0.0747 0.0166 5.47e-5 0.420 1.09e-4 ***
## 2 Chlorophyll_degrada… treatmen… 0.114 0.0369 3.58e-3 0.420 4.77e-3 **
## 3 Chlorophyll_synthes… leaf_sta… -0.137 0.0222 2.86e-7 0.498 1.14e-6 ****
## 4 Chlorophyll_synthes… treatmen… -0.0770 0.0495 1.28e-1 0.498 1.46e-1 ns
## 5 Photosynthesis leaf_sta… -0.133 0.0182 6.81e-9 0.572 5.45e-8 ****
## 6 Photosynthesis treatmen… -0.00796 0.0405 8.45e-1 0.572 8.45e-1 ns
## 7 Senescence leaf_sta… 0.120 0.0263 4.99e-5 0.435 1.09e-4 ***
## 8 Senescence treatmen… 0.196 0.0587 1.85e-3 0.435 2.95e-3 **
# --- Model 3: Leaf stage only (pooled across treatments) ---
cat("\n=== MODEL 3: Leaf Stage Only (Pooled) ===\n")
##
## === MODEL 3: Leaf Stage Only (Pooled) ===
panel_models_pooled <- fit_xpindex_models(
xpindex_data = panel_norm,
index_names = names(panel_sets),
formula_rhs = "leaf_stage"
)
stats_pooled <- extract_xpindex_stats(panel_models_pooled)
print(stats_pooled)
## # A tibble: 4 × 8
## xpindex predictor effect std_err p_value r2 p_adj p_signif
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chlorophyll_degradat… leaf_sta… 0.0729 0.0182 2.52e-4 0.282 2.86e-4 ***
## 2 Chlorophyll_synthesis leaf_sta… -0.135 0.0226 4.30e-7 0.468 8.61e-7 ****
## 3 Photosynthesis leaf_sta… -0.133 0.0179 4.49e-9 0.572 1.79e-8 ****
## 4 Senescence leaf_sta… 0.117 0.0294 2.86e-4 0.277 2.86e-4 ***
# --- Optional: Compare models via AIC (for one example index) ---
# Example: Chlorophyll synthesis
example_index <- "chlorophyll_synthesis"
if (example_index %in% names(panel_sets)) {
m_int <- panel_models_interaction[[example_index]]
m_add <- panel_models_additive[[example_index]]
m_pooled <- panel_models_pooled[[example_index]]
cat("\n=== AIC Comparison (", example_index, ") ===\n", sep = "")
AIC_comparison <- AIC(m_int, m_add, m_pooled)
rownames(AIC_comparison) <- c("Interaction", "Additive", "Pooled")
print(AIC_comparison)
}
##
## === AIC Comparison (chlorophyll_synthesis) ===
## df AIC
## Interaction 5 -34.99953
## Additive 4 -30.09655
## Pooled 3 -29.57391
# --- Export results (optional) ---
# write.csv(stats_interaction, "~/Desktop/xpindex_interaction_stats.csv", row.names = FALSE)
# write.csv(stats_pooled, "~/Desktop/xpindex_pooled_stats.csv", row.names = FALSE)