Overview

Purpose: Construct transcriptomic expression indices (xpindex) for photosynthesis and senescence gene sets to proxy physiological trajectories across leaf developmental stages.

Approach: 1. Define gene sets from literature, GO annotations, and CornCyc pathways 2. Calculate aggregate xpindex values across developmental stages 3. Normalize indices using different strategies (per_set, per_union, global) 4. Validate patterns with individual marker genes 5. Create publication-quality visualizations

Transcriptomic Expression Index (xpindex): An aggregate metric summarizing the collective expression of all genes in a pathway or functional gene set. Calculated by averaging log-transformed expression values across all genes in the set, providing a single value per sample that represents pathway activity.

Setup

Libraries

library(dplyr)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(stringr)
library(clusterProfiler)
library(ggtext)
library(ggfx)
library(ggrepel)

Load Core Data

effects_df <- read.csv("~/Desktop/DEG_effects.csv")

curated <- read.csv("~/Desktop/selected_DEGs_curated_locus_label_2.csv") %>%
  select(gene, locus_label)

effects_df$locus_label[effects_df$locus_label == "A0A1D6NG77"] <- NA

effects_df <- effects_df %>%
  left_join(curated, by = "gene", suffix = c("", "_curated")) %>%
  mutate(locus_label = coalesce(locus_label_curated, locus_label)) %>%
  select(-locus_label_curated)

normalized_expression <- readRDS(
  "~/Desktop/normalized_expression_logCPM.rda"
)

universe <- effects_df %>%
  filter(predictor == "Leaf") %>%
  pull(gene) %>%
  unique()

TERM2GENE <- readRDS(
  "/Users/fvrodriguez/Desktop/GOMAP_maize_B73_NAM5/TERM2GENE_Fattel_2024_full.rds"
)

{
  cat("Expression matrix:", dim(normalized_expression), "\n")
  cat("Universe size:", length(universe), "genes\n")
}

## Expression matrix: 24249 43 
## Universe size: 24011 genes

Core Functions for Expression Index Analysis

Extract Sample Metadata

#' Extract leaf stage and treatment from sample names
#'
#' @param sample_names Character vector of sample identifiers
#' @return Data frame with sample, leaf_stage, and treatment columns
extract_sample_metadata <- function(sample_names) {
  data.frame(
    sample = sample_names,
    leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample_names)),
    treatment = factor(
      substr(sample_names, 1, 2),
      levels = c("+P", "-P")
    ),
    stringsAsFactors = FALSE
  )
}

Calculate Expression Indices

#' Calculate transcriptomic expression indices (xpindex) for gene sets
#'
#' @param expression_matrix Matrix with genes as rows, samples as columns
#' @param gene_sets Named list of gene ID vectors
#' @param method Aggregation method: "sum", "mean", or "median"
#' @return Data frame with sample-level xpindex values
calculate_xpindex <- function(expression_matrix,
                               gene_sets,
                               method = "mean") {
  indices <- lapply(names(gene_sets), function(set_name) {
    genes_in_set <- gene_sets[[set_name]]
    genes_available <- intersect(genes_in_set, rownames(expression_matrix))
    
    if (length(genes_available) == 0) {
      warning(paste("No genes from", set_name, "found in expression matrix"))
      return(rep(NA, ncol(expression_matrix)))
    }
    
    set_expression <- expression_matrix[genes_available, , drop = FALSE]
    
    index <- switch(method,
      "sum" = colSums(set_expression, na.rm = TRUE),
      "mean" = colMeans(set_expression, na.rm = TRUE),
      "median" = apply(set_expression, 2, median, na.rm = TRUE),
      stop("method must be 'sum', 'mean', or 'median'")
    )
    
    cat(sprintf(
      "%s: %d/%d genes found\n",
      set_name, length(genes_available), length(genes_in_set)
    ))
    
    index
  })
  
  indices_df <- as.data.frame(indices)
  names(indices_df) <- names(gene_sets)
  indices_df$sample <- colnames(expression_matrix)
  
  indices_df %>%
    select(sample, everything()) %>%
    left_join(
      extract_sample_metadata(colnames(expression_matrix)),
      by = "sample"
    )
}

Normalize Expression Indices

#' Min-max normalization for transcriptomic expression indices
#'
#' @param xpindex_data Data frame with xpindex columns
#' @param index_names Character vector of column names to normalize
#' @param expression_matrix Optional: original expression matrix
#' @param gene_sets Optional: named list of gene vectors
#' @param method Normalization scope: "per_set", "per_union", or "global"
#'
#' @return Data frame with normalized xpindex values (0-1 scale)
normalize_xpindex <- function(xpindex_data,
                               index_names,
                               expression_matrix = NULL,
                               gene_sets = NULL,
                               method = "per_set") {
  
  stopifnot(
    method %in% c("per_set", "per_union", "global"),
    is.data.frame(xpindex_data),
    all(index_names %in% names(xpindex_data))
  )
  
  if (method == "per_set") {
    xpindex_data %>%
      mutate(across(
        all_of(index_names),
        ~ (. - min(.)) / (max(.) - min(.))
      ))
    
  } else if (method == "per_union") {
    stopifnot(
      !is.null(expression_matrix),
      !is.null(gene_sets)
    )
    
    union_genes <- unique(unlist(gene_sets))
    union_genes <- union_genes[union_genes %in% rownames(expression_matrix)]
    
    global_min <- min(expression_matrix[union_genes, ], na.rm = TRUE)
    global_max <- max(expression_matrix[union_genes, ], na.rm = TRUE)
    
    xpindex_data %>%
      mutate(across(
        all_of(index_names),
        ~ (. - global_min) / (global_max - global_min)
      ))
    
  } else if (method == "global") {
    stopifnot(!is.null(expression_matrix))
    
    global_min <- min(expression_matrix, na.rm = TRUE)
    global_max <- max(expression_matrix, na.rm = TRUE)
    
    xpindex_data %>%
      mutate(across(
        all_of(index_names),
        ~ (. - global_min) / (global_max - global_min)
      ))
  }
}

Summarize Expression Indices

#' Prepare summary statistics for transcriptomic expression indices
#'
#' @param xpindex_data Data frame with normalized xpindex values
#' @param index_names Character vector of xpindex column names
#' @param group_vars Character vector of grouping variables
#'
#' @return Data frame with mean and SE for each xpindex and group
summarize_xpindex <- function(xpindex_data,
                               index_names,
                               group_vars = "leaf_stage") {
  
  summary_data <- xpindex_data %>%
    group_by(across(all_of(group_vars))) %>%
    summarise(
      across(
        all_of(index_names),
        list(mean = mean, se = ~ sd(.) / sqrt(n())),
        .names = "{.col}_{.fn}"
      ),
      .groups = "drop"
    )
  
  summary_long <- summary_data %>%
    pivot_longer(
      cols = -all_of(group_vars),
      names_to = c("xpindex", ".value"),
      names_pattern = "(.+)_(mean|se)"
    )
  
  summary_long
}

Statistical Modeling Functions

#' Convert p-values to significance stars
#'
#' @param p Numeric vector of p-values
#' @return Character vector with significance stars
star_significance <- function(p) {
  sapply(p, function(x) {
    if (is.na(x)) return("ns")
    if (x < 0.0001) return("****")
    if (x < 0.001) return("***")
    if (x < 0.01) return("**")
    if (x < 0.05) return("*")
    return("ns")
  })
}

#' Fit linear models for expression indices
#'
#' @param xpindex_data Data frame with xpindex values
#' @param index_names Character vector of xpindex column names
#' @param formula_rhs Right-hand side of formula
#'
#' @return Named list of fitted lm objects
fit_xpindex_models <- function(xpindex_data,
                                index_names,
                                formula_rhs = "leaf_stage * treatment") {
  
  models <- lapply(index_names, function(xpindex) {
    formula_str <- paste(xpindex, "~", formula_rhs)
    lm(as.formula(formula_str), data = xpindex_data)
  })
  
  names(models) <- index_names
  models
}

#' Extract statistics from expression index linear models
#'
#' @param model_list Named list of lm objects
#' @return Data frame with model statistics for all predictors
extract_xpindex_stats <- function(model_list) {
  
  results <- lapply(names(model_list), function(index_name) {
    model <- model_list[[index_name]]
      model_summary <- summary(model)
    coef_summary <- coef(model_summary)
    predictors <- rownames(coef_summary)[-1]
    
    data.frame(
      xpindex = tools::toTitleCase(index_name),
      predictor = predictors,
      effect = coef_summary[predictors, "Estimate"],
      std_err = coef_summary[predictors, "Std. Error"],
      p_value = coef_summary[predictors, "Pr(>|t|)"],
      r2 =  model_summary$r.squared,
      row.names = NULL,
      stringsAsFactors = FALSE
    )
  })
  
  bind_rows(results) %>%
    mutate(
      p_adj = p.adjust(p_value, method = "fdr"),
      p_signif = star_significance(p_adj)
    ) %>%
    arrange(xpindex, predictor, p_value) %>%
    as_tibble()
}

Phase 1: Exploration

Purpose: Explore different gene sets and xpindex calculations to understand patterns before constructing the main figure.

Pigment Biosynthesis Indices (CornCyc Pathways)

Purpose: Calculate xpindex for pigment biosynthesis using CornCyc pathway annotations.

Approach: Extract gene sets from CornCyc, calculate mean xpindex, normalize per_set, and visualize developmental trajectories.

corncyc <- read.table(
  "/Users/fvrodriguez/Desktop/corncyc_pathways.20251021",
  sep = "\t",
  header = TRUE,
  quote = ""
)

pathway_ids <- list(
  chlorophyll = c("CHLOROPHYLL-SYN", "PWY-5068"),
  carotenoid = "CAROTENOID-PWY",
  anthocyanin = "PWY-5125",
  flavonoids = "PWY1F-FLAVSYN"
)

corncyc_gene_sets <- lapply(names(pathway_ids), function(pigment) {
  corncyc_genes <- corncyc %>%
    filter(Pathway.id %in% pathway_ids[[pigment]]) %>%
    pull(Protein.id) %>%
    gsub("ZM00001EB", "Zm00001eb", .) %>%
    gsub("_P\\d+$", "", ., perl = TRUE)
  
  missing <- corncyc_genes[!grepl("Zm00001eb", corncyc_genes)]
  if (length(missing) > 0) {
    cat(pigment, "- Protein.id missing:", 
        paste(missing, collapse = ", "), "\n")
  }
  
  corncyc_genes[grepl("Zm00001eb", corncyc_genes)] %>% unique()
})

## chlorophyll - Protein.id missing: GDQC-106314-MONOMER, GDQC-112611-MONOMER, MONOMERDQC-5752, GDQC-114541-MONOMER, GDQC-108880-MONOMER 
## carotenoid - Protein.id missing: MONOMER-15665, GBWI-61910-MONOMER, GBWI-61910-MONOMER, MONOMER-15665, MONOMER-15661

names(corncyc_gene_sets) <- names(pathway_ids)

{
  cat("\n=== Pigment Gene Set Sizes ===\n")
  print(sapply(corncyc_gene_sets, length))
}

## 
## === Pigment Gene Set Sizes ===
## chlorophyll  carotenoid anthocyanin  flavonoids 
##          32          35          15          51

corncyc_xpindex <- calculate_xpindex(
  expression_matrix = normalized_expression,
  gene_sets = corncyc_gene_sets,
  method = "mean"
)

## chlorophyll: 23/32 genes found
## carotenoid: 29/35 genes found
## anthocyanin: 10/15 genes found
## flavonoids: 32/51 genes found

corncyc_xpindex_norm <- normalize_xpindex(
  xpindex_data = corncyc_xpindex,
  index_names = names(pathway_ids),
  expression_matrix = normalized_expression,
  gene_sets = corncyc_gene_sets,
  method = "per_set"
)

{
  cat("\n=== Normalized Pigment Index Ranges ===\n")
  corncyc_xpindex_norm %>%
    summarise(across(
      all_of(names(pathway_ids)),
      list(min = min, max = max),
      .names = "{.col}_{.fn}"
    )) %>%
    pivot_longer(
      cols = everything(),
      names_to = c("index", "stat"),
      names_pattern = "(.+)_(min|max)"
    ) %>%
    pivot_wider(
      names_from = stat,
      values_from = value
    ) %>%
    print()
}

## 
## === Normalized Pigment Index Ranges ===
## # A tibble: 4 × 3
##   index         min   max
##   <chr>       <dbl> <dbl>
## 1 chlorophyll     0     1
## 2 carotenoid      0     1
## 3 anthocyanin     0     1
## 4 flavonoids      0     1

corncyc_summary <- summarize_xpindex(
  xpindex_data = corncyc_xpindex_norm,
  index_names = names(pathway_ids),
  group_vars = "leaf_stage"
)

base_size <- 30

p_pigments <- corncyc_summary %>%
  filter(xpindex != "flavonoids") %>%
  mutate(xpindex = tools::toTitleCase(xpindex)) %>%
  ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 5) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.2,
    linewidth = 0.8
  ) +
  labs(
    x = "Leaf",
    y = "Normalized Index",
    color = "Pigment"
  ) +
  scale_color_manual(
    values = c(
      "Chlorophyll" = "darkgreen",
      "Carotenoid" = "gold",
      "Anthocyanin" = "purple4"
    )
  ) +
  theme_classic(base_size = base_size) +
  theme(legend.position = c(0.9, 0.9))

print(p_pigments)

ggsave(
  "~/Desktop/corncyc_pigment_biosynthesis_indices.png",
  plot = p_pigments,
  width = 12,
  height = 8,
  dpi = 300
)

Photosynthesis vs Senescence by Leaf Stage

Purpose: Visualize photosynthesis and senescence xpindex trajectories with treatment effects.

Approach: Calculate GO-based xpindex, normalize per_set, and plot developmental patterns by treatment.

v3_v5 <- read.table(
  file = "/Users/fvrodriguez/Desktop/B73_gene_xref/B73v3_to_B73v5.tsv",
  sep = "\t",
  header = FALSE
) %>%
  rename(v3 = "V1", v5 = "V2") %>%
  separate_longer_delim(cols = v5, delim = ",")

v4_v5 <- read.table(
  file = "/Users/fvrodriguez/Desktop/B73_gene_xref/B73v4_to_B73v5.tsv",
  sep = "\t",
  header = FALSE
) %>%
  rename(v4 = "V1", v5 = "V2") %>%
  separate_longer_delim(cols = v5, delim = ",")

sag_orthologs <- read.csv("~/Desktop/SAG_orthologs.csv") %>%
  janitor::clean_names() %>%
  inner_join(v3_v5, by = c(gene_id_maize = "v3"))

staygreen_network <- read.csv(
  "~/Desktop/staygreen_network_sekhon2019.csv",
  na.strings = ""
) %>%
  janitor::clean_names() %>%
  inner_join(v4_v5, by = c(gene = "v4"))

natural_senescence <- read.csv("~/Desktop/natural_senescence.csv") %>%
  janitor::clean_names() %>%
  inner_join(v3_v5, by = c(gene_id = "v3"))

photosynthesis_genes <- TERM2GENE %>%
  filter(GO %in% c(
    "GO:0015979",
    "GO:0019684",
    "GO:0009768"
  )) %>%
  pull(gene) %>%
  unique()

leaf_senescence_genes <- TERM2GENE %>%
  filter(GO %in% c("GO:0010150")) %>%
  pull(gene) %>%
  unique()

gene_set_list <- list(
  photosynthesis = intersect(photosynthesis_genes, universe),
  senescence = intersect(leaf_senescence_genes, universe)
)

{
  cat("\n=== Gene Set Sizes ===\n")
  print(sapply(gene_set_list, length))
}

## 
## === Gene Set Sizes ===
## photosynthesis     senescence 
##            525            157

xpindex_data <- calculate_xpindex(
  expression_matrix = normalized_expression,
  gene_sets = gene_set_list,
  method = "mean"
)

## photosynthesis: 525/525 genes found
## senescence: 157/157 genes found

xpindex_norm <- normalize_xpindex(
  xpindex_data = xpindex_data,
  index_names = names(gene_set_list),
  method = "per_set"
)

xpindex_summary <- summarize_xpindex(
  xpindex_data = xpindex_norm,
  index_names = names(gene_set_list),
  group_vars = c("leaf_stage", "treatment")
) %>%
  mutate(
    xpindex = factor(
      tools::toTitleCase(xpindex),
      levels = c("Photosynthesis", "Senescence"),
      labels = c("Photosynthesis", "Leaf Senescence")
    )
  )

p_by_treatment <- ggplot(
  xpindex_summary,
  aes(x = leaf_stage, y = mean, color = xpindex, linetype = treatment)
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 4) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.2,
    linewidth = 1
  ) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen", 
               "Leaf Senescence" = "orange")
  ) +
  scale_linetype_manual(
    values = c("+P" = "solid", "-P" = "dashed")
  ) +
  labs(
    x = "Leaf",
    y = "Normalized Expression Index",
    color = NULL,
    linetype = "Treatment"
  ) +
  theme_classic(base_size = base_size) +
  theme(legend.position = "right")

print(p_by_treatment)

Main Figure: Left Panel Components

Purpose: Create publication-quality composite figure showing normalized xpindex, individual gene trajectories, and marker gene validation.

Panel A: Normalized Gene Set Indices

xpindex_summary_pooled <- summarize_xpindex(
  xpindex_data = xpindex_norm,
  index_names = names(gene_set_list),
  group_vars = "leaf_stage"
) %>%
  mutate(
    xpindex = factor(
      tools::toTitleCase(xpindex),
      levels = c("Photosynthesis", "Senescence"),
      labels = c("Photosynthesis", "Leaf Senescence")
    )
  )

lm_photo <- lm(photosynthesis ~ leaf_stage, data = xpindex_norm)
lm_sen <- lm(senescence ~ leaf_stage, data = xpindex_norm)

photo_stats <- summary(lm_photo)
sen_stats <- summary(lm_sen)

photo_r2 <- photo_stats$r.squared
photo_p <- coef(photo_stats)["leaf_stage", "Pr(>|t|)"]

sen_r2 <- sen_stats$r.squared
sen_p <- coef(sen_stats)["leaf_stage", "Pr(>|t|)"]

format_pval <- function(p) {
  if (p < 0.001) return("p < 0.001")
  if (p < 0.01) return(sprintf("p = %.3f", p))
  return(sprintf("p = %.2f", p))
}

stats_text <- sprintf(
  "R² = %.3f\n %s\n\n\nR² = %.3f\n %s",
  photo_r2, format_pval(photo_p),
  sen_r2, format_pval(sen_p)
)

base_family <- "Helvetica"
up <- 58

p_normalized <- ggplot(
  xpindex_summary_pooled,
  aes(x = leaf_stage, y = mean, color = xpindex)
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 4) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.2,
    linewidth = 1
  ) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen", 
               "Leaf Senescence" = "orange")
  ) +
  annotate(
    "text",
    x = 1.0,
    y = 0.45,
    label = stats_text,
    hjust = 0,
    vjust = 0,
    size = 4
  ) +
  labs(
    x = "Leaf",
    y = "Normalized Expression Index",
    color = NULL
  ) +
  ggpubr::theme_classic2(base_size = base_size) +
  theme(
    legend.position = c(0.7, 0.9),
    legend.text = element_text(size = 20),
    legend.background = element_rect(fill = "transparent"),
    legend.margin = margin(r = 50, unit = "pt"),
    plot.margin = margin(5.5, 5.5, up, 7, "pt")
  )

print(p_normalized)

Panel B: Spaghetti Plot (Individual Gene Trajectories)

xp_photo_genes <- intersect(
  gene_set_list$photosynthesis,
  rownames(normalized_expression)
)

xp_photosynthesis <- normalized_expression[xp_photo_genes, ] %>%
  as.data.frame() %>%
  tibble::rownames_to_column("gene") %>%
  pivot_longer(-gene, names_to = "sample", values_to = "expression") %>%
  mutate(
    leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
  ) %>%
  group_by(gene) %>%
  mutate(centered_expr = expression - mean(expression)) %>%
  ungroup() %>%
  group_by(gene, leaf_stage) %>%
  summarise(mean_expr = mean(centered_expr), .groups = "drop") %>%
  arrange(gene)

xp_senescence_genes <- intersect(
  gene_set_list$senescence,
  rownames(normalized_expression)
)

xp_senescence <- normalized_expression[xp_senescence_genes, ] %>%
  as.data.frame() %>%
  tibble::rownames_to_column("gene") %>%
  pivot_longer(-gene, names_to = "sample", values_to = "expression") %>%
  mutate(
    leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
  ) %>%
  group_by(gene) %>%
  mutate(centered_expr = expression - mean(expression)) %>%
  ungroup() %>%
  group_by(gene, leaf_stage) %>%
  summarise(mean_expr = mean(centered_expr), .groups = "drop") %>%
  arrange(gene)

{
  cat("\n=== Spaghetti Plot Gene Counts ===\n")
  cat("Photosynthesis genes:", length(xp_photo_genes), "\n")
  cat("Senescence genes:", length(xp_senescence_genes), "\n")
}

## 
## === Spaghetti Plot Gene Counts ===
## Photosynthesis genes: 525 
## Senescence genes: 157

p_spaghetti <- xp_photosynthesis %>%
  ggplot(aes(x = leaf_stage, y = mean_expr, group = gene)) +
  geom_line(alpha = 0.1, linewidth = 0.5, color = "darkgreen") +
  geom_smooth(
    data = xp_photosynthesis,
    aes(x = leaf_stage, y = mean_expr, group = 1),
    method = "lm",
    se = FALSE,
    linewidth = 2,
    color = "darkgreen"
  ) %>%
  with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
  geom_line(
    data = xp_senescence,
    aes(x = leaf_stage, y = mean_expr, group = gene),
    alpha = 0.1,
    linewidth = 0.5,
    color = "orange"
  ) %>%
  with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
  geom_smooth(
    data = xp_senescence,
    aes(x = leaf_stage, y = mean_expr, group = 1),
    method = "lm",
    se = FALSE,
    linewidth = 2,
    color = "orange"
  ) %>%
  with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 2) +
  labs(
    x = "Leaf",
    y = expression("Centered " * log[10] * "(CPM)")
  ) +
  coord_cartesian(ylim = c(-1, 1)) +
  scale_y_continuous(position = "right") +
  ggpubr::theme_classic2(base_size = base_size) +
  theme(plot.margin = margin(5.5, 7, up, 5.5, "pt"))

print(p_spaghetti)

Combine Top Panels

p_top_panel <- ggarrange(
  p_normalized + rremove("xlab"),
  p_spaghetti + rremove("xlab"),
  ncol = 2
) %>%
  annotate_figure(
    bottom = text_grob("Leaf", size = base_size, vjust = -2)
  )

print(p_top_panel)

Panel C: Marker Gene Validation

marker_genes <- data.frame(
  locus_label = c("pep1", "salt1", "ssu1", "mir3",
                  "nye2", "sgrl1", "cab1", "dapat3"),
  gene_set = c("Photosynthesis", "Senescence",
               "Photosynthesis", "Senescence",
               "Senescence", "Senescence",
               "Photosynthesis", "Senescence"),
  pair = c("Pair1", "Pair1", "Pair2", "Pair2",
           "Pair3", "Pair3", "Pair4", "Pair4"),
  stringsAsFactors = FALSE
)

gene_ids <- effects_df %>%
  filter(locus_label %in% marker_genes$locus_label, !is.na(locus_label)) %>%
  select(gene, locus_label, desc_merged) %>%
  distinct() %>%
  group_by(locus_label) %>%
  slice(1) %>%
  ungroup()

marker_data <- lapply(seq_len(nrow(gene_ids)), function(i) {
  g <- gene_ids$gene[i]
  data.frame(
    sample = colnames(normalized_expression),
    expression = normalized_expression[g, ],
    gene = g
  )
}) %>%
  bind_rows() %>%
  left_join(gene_ids, by = "gene") %>%
  left_join(marker_genes, by = "locus_label") %>%
  mutate(
    leaf_stage = as.numeric(sub(".*L([0-9]+)[LR]$", "\\1", sample))
  )

marker_summary <- marker_data %>%
  group_by(gene, locus_label, gene_set, desc_merged, pair, leaf_stage) %>%
  summarise(
    mean_expr = mean(expression),
    se_expr = sd(expression) / sqrt(n()),
    .groups = "drop"
  )

pair_max_expr <- marker_summary %>%
  group_by(pair) %>%
  summarise(max_expr = max(mean_expr), .groups = "drop") %>%
  arrange(desc(max_expr))

marker_summary_colored <- marker_summary %>%
  select(pair, locus_label, desc_merged, gene_set) %>%
  distinct() %>%
  mutate(
    wrapped_text = stringr::str_wrap(desc_merged, width = 25),
    with_br = gsub("\n", "<br>", wrapped_text),
    with_color = case_when(
      gene_set == "Photosynthesis" ~
        paste0("<span style='color:darkgreen'>", with_br, "</span>"),
      gene_set == "Senescence" ~
        paste0("<span style='color:orange'>", with_br, "</span>"),
      TRUE ~ with_br
    ),
    sort_order = ifelse(gene_set == "Photosynthesis", 1, 2)
  )

pair_labels <- marker_summary_colored %>%
  arrange(pair, sort_order) %>%
  group_by(pair) %>%
  summarise(
    facet_label = paste(with_color, collapse = "<br><br>"),
    .groups = "drop"
  ) %>%
  left_join(pair_max_expr, by = "pair") %>%
  arrange(desc(max_expr))

marker_summary <- marker_summary %>%
  left_join(
    pair_labels %>% select(pair, facet_label, max_expr),
    by = "pair"
  ) %>%
  mutate(
    facet_label = factor(facet_label, levels = pair_labels$facet_label)
  )

p_markers <- ggplot(
  marker_summary,
  aes(x = leaf_stage, y = mean_expr, color = gene_set, group = gene)
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 5) +
  geom_errorbar(
    aes(ymin = mean_expr - se_expr, ymax = mean_expr + se_expr),
    width = 0.2,
    linewidth = 0.8
  ) +
  geom_text(
    data = marker_summary %>% filter(leaf_stage == 1),
    aes(x = leaf_stage,
        y = mean_expr,
        label = locus_label,
        color = gene_set),
    nudge_x = -0.5,
    hjust = 1,
    fontface = "bold",
    size = 5
  ) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen", "Senescence" = "orange")
  ) +
  scale_x_continuous(expand = expansion(add = c(1.5, 0.2)), breaks = 1:4) +
  facet_wrap(
    ~ facet_label,
    nrow = 2,
    scales = "free_y"
  ) +
  labs(
    x = "Leaf",
    y = expression(log[10] * "(CPM)"),
    color = NULL
  ) +
  theme_classic(base_size = base_size) +
  theme(
    strip.background = element_blank(),
    strip.text = element_markdown(size = 15, hjust = 0, face = "bold"),
    plot.margin = margin(-50, 90, 5.5, 5.5, "pt"),
    axis.title.y = element_text(margin = margin(r = -10)),
    panel.spacing.x = unit(0, "cm"),
    panel.spacing.y = unit(0, "cm"),
    legend.position = "none"
  )

print(p_markers)

Combine All Panels: Left Panel

left_panel <- cowplot::plot_grid(
  p_top_panel, p_markers,
  ncol = 1,
  align = 'h',
  axis = 'lr'
)

ggsave(
  "~/Desktop/left_panel.png",
  plot = left_panel,
  width = 9,
  height = 15,
  dpi = 150
)

print(left_panel)

Phase 3: Statistical Analysis and Enrichment

Purpose: Detailed statistical models and gene set enrichment analysis.

Statistical Models for Main Figure

cat("\n=== Photosynthesis xpindex (Normalized) ===\n")

## 
## === Photosynthesis xpindex (Normalized) ===

summary(lm_photo) %>% print()

## 
## Call:
## lm(formula = photosynthesis ~ leaf_stage, data = xpindex_norm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44587 -0.07017  0.00795  0.10089  0.19383 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.97702    0.04848  20.154  < 2e-16 ***
## leaf_stage  -0.13279    0.01794  -7.402 4.49e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1302 on 41 degrees of freedom
## Multiple R-squared:  0.572,  Adjusted R-squared:  0.5615 
## F-statistic: 54.79 on 1 and 41 DF,  p-value: 4.486e-09

cat("\n=== Senescence xpindex (Normalized) ===\n")

## 
## === Senescence xpindex (Normalized) ===

summary(lm_sen) %>% print()

## 
## Call:
## lm(formula = senescence ~ leaf_stage, data = xpindex_norm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45703 -0.15322  0.00116  0.11296  0.49468 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.15554    0.07944   1.958 0.057056 .  
## leaf_stage   0.11659    0.02940   3.966 0.000286 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2134 on 41 degrees of freedom
## Multiple R-squared:  0.2773, Adjusted R-squared:  0.2597 
## F-statistic: 15.73 on 1 and 41 DF,  p-value: 0.0002864

xpindex_models <- fit_xpindex_models(
  xpindex_data = xpindex_norm,
  index_names = names(gene_set_list),
  formula_rhs = "leaf_stage * treatment"
)

xpindex_stats <- extract_xpindex_stats(xpindex_models)

{
  cat("\n=== Expression Index Model Statistics ===\n")
  print(xpindex_stats, n = Inf)
}

## 
## === Expression Index Model Statistics ===
## # A tibble: 6 × 8
##   xpindex        predictor         effect std_err p_value    r2   p_adj p_signif
##   <chr>          <chr>              <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>   
## 1 Photosynthesis leaf_stage       -0.0820  0.0210 3.55e-4 0.684 0.00197 **      
## 2 Photosynthesis leaf_stage:trea… -0.118   0.0320 6.56e-4 0.684 0.00197 **      
## 3 Photosynthesis treatment-P       0.283   0.0860 2.15e-3 0.684 0.00431 **      
## 4 Senescence     leaf_stage        0.0546  0.0316 9.14e-2 0.549 0.110   ns      
## 5 Senescence     leaf_stage:trea…  0.151   0.0481 3.18e-3 0.549 0.00477 **      
## 6 Senescence     treatment-P      -0.176   0.130  1.83e-1 0.549 0.183   ns

Supplementary Visualizations

Purpose: Additional exploratory plots to examine treatment effects and relationships between indices.

Individual Index Plots by Treatment

p1 <- xpindex_data %>%
  ggplot(aes(
    y = photosynthesis,
    x = leaf_stage,
    fill = factor(leaf_stage),
    shape = treatment
  )) +
  geom_point(size = 3, color = "black") +
  scale_shape_manual(values = c(21, 25)) +
  scale_fill_viridis_d() +
  labs(
    x = "Leaf",
    y = "Photosynthesis xpindex",
    fill = "Leaf",
    shape = "Treatment"
  ) +
  theme_classic(base_size = 14)

p2 <- xpindex_data %>%
  ggplot(aes(
    y = senescence,
    x = leaf_stage,
    fill = factor(leaf_stage),
    shape = treatment
  )) +
  geom_point(size = 3, color = "black") +
  scale_shape_manual(values = c(21, 25)) +
  scale_fill_viridis_d() +
  labs(
    x = "Leaf",
    y = "Senescence xpindex",
    fill = "Leaf",
    shape = "Treatment"
  ) +
  theme_classic(base_size = 14)

p_individual <- ggarrange(p1, p2, ncol = 2,
                          common.legend = TRUE, legend = "right")
print(p_individual)

Bivariate Relationship

p_bivariate <- xpindex_data %>%
  ggplot(aes(
    x = photosynthesis,
    y = senescence,
    fill = factor(leaf_stage),
    shape = factor(treatment)
  )) +
  geom_point(size = 3, color = "black") +
  scale_fill_viridis_d() +
  scale_shape_manual(values = c(21, 25)) +
  guides(fill = guide_legend(override.aes = list(shape = 21))) +
  labs(
    x = "Photosynthesis xpindex",
    y = "Senescence xpindex",
    fill = "Leaf",
    shape = "Treatment"
  ) +
  theme_classic(base_size = 14)

print(p_bivariate)

Gene Set Enrichment Analysis

Custom Enrichment Function

#' Fisher's Exact Test for Custom Gene Set Enrichment
#'
#' @param deg_lists Named list of DEG vectors by cluster
#' @param annotation Named list of gene sets
#' @param universe Background gene set
#' @return Data frame with enrichment results
test_custom_enrichment <- function(deg_lists, annotation, universe) {
  annotation_filtered <- lapply(annotation, function(x) intersect(x, universe))
  annotation_filtered <- annotation_filtered[
    sapply(annotation_filtered, length) > 0
  ]
  
  results <- list()
  
  for (cluster_name in names(deg_lists)) {
    deg_genes <- deg_lists[[cluster_name]]
    
    for (term_name in names(annotation_filtered)) {
      term_genes <- annotation_filtered[[term_name]]
      overlap <- intersect(deg_genes, term_genes)
      
      in_both <- length(overlap)
      in_deg_only <- length(setdiff(deg_genes, term_genes))
      in_term_only <- length(setdiff(term_genes, deg_genes))
      in_neither <- length(universe) - in_both - in_deg_only - in_term_only
      
      contingency <- matrix(
        c(in_both, in_deg_only, in_term_only, in_neither),
        nrow = 2
      )
      fisher_result <- fisher.test(contingency, alternative = "greater")
      
      results[[length(results) + 1]] <- data.frame(
        Cluster = cluster_name,
        ID = term_name,
        Description = term_name,
        GeneRatio = paste0(in_both, "/", length(deg_genes)),
        BgRatio = paste0(length(term_genes), "/", length(universe)),
        pvalue = fisher_result$p.value,
        geneID = paste(overlap, collapse = "/"),
        Count = in_both,
        stringsAsFactors = FALSE
      )
    }
  }
  
  results_df <- do.call(rbind, results)
  results_df$p.adjust <- p.adjust(results_df$pvalue, method = "BH")
  
  results_df %>% arrange(p.adjust)
}

#' Extract DEG Stats for Custom Annotation
#'
#' @param annotation_id Term ID to filter
#' @param gene_term_df Data frame with gene-term mappings
#' @param effects_data DEG results
#' @param filter_hiconf Filter to high-confidence DEGs
#' @return Tibble with DEG statistics
get_annotation_degs <- function(annotation_id,
                                gene_term_df,
                                effects_data,
                                filter_hiconf = TRUE) {
  genes_in_category <- gene_term_df %>%
    filter(ID == annotation_id) %>%
    pull(gene)
  
  result <- effects_data %>%
    filter(gene %in% genes_in_category)
  
  if (filter_hiconf) {
    result <- result %>% filter(is_hiconf_DEG)
  }
  
  result %>%
    select(
      predictor:gene, locus_label, desc_merged,
      logFC, neglogP, mahalanobis
    ) %>%
    group_by(predictor, regulation) %>%
    arrange(predictor, regulation, -mahalanobis) %>%
    tibble()
}

Build Custom Annotation

custom_annotation <- c(
  list(
    staygreen = staygreen_network$v5,
    sag = sag_orthologs$v5,
    photosynthesis = gene_set_list$photosynthesis,
    leaf_senescence = gene_set_list$senescence
  ),
  natural_senescence %>%
    select(term = set, gene = v5) %>%
    distinct() %>%
    split(.$term) %>%
    lapply(function(x) x$gene)
)

{
  cat("\n=== Custom Annotation Terms ===\n")
  cat("Number of terms:", length(custom_annotation), "\n")
  print(sapply(custom_annotation, length))
}

## 
## === Custom Annotation Terms ===
## Number of terms: 6 
##       staygreen             sag  photosynthesis leaf_senescence         natural 
##             771              48             525             157            3681 
## natural_induced 
##            1075

Run Enrichment Tests

DEGs <- with(effects_df %>% filter(is_hiconf_DEG), {
  list(
    Leaf.Down = unique(gene[
      predictor == "Leaf" & regulation == "Downregulated"
    ]),
    Leaf.Up = unique(gene[predictor == "Leaf" & regulation == "Upregulated"]),
    `-P.Down` = unique(gene[
      predictor == "-P" & regulation == "Downregulated"
    ]),
    `-P.Up` = unique(gene[predictor == "-P" & regulation == "Upregulated"])
  )
})

custom_enrichment_results <- test_custom_enrichment(
  DEGs,
  custom_annotation,
  universe
) %>%
  filter(p.adjust < 0.05)

{
  cat("\n=== Enrichment Results (FDR < 0.05) ===\n")
  custom_enrichment_results %>%
    select(-geneID) %>%
    print()
}

## 
## === Enrichment Results (FDR < 0.05) ===
##   Cluster              ID     Description GeneRatio    BgRatio       pvalue
## 1 Leaf.Up         natural         natural   175/634 3284/24011 7.312839e-21
## 2 Leaf.Up natural_induced natural_induced    83/634  991/24011 8.520661e-21
## 3   -P.Up natural_induced natural_induced    83/661  991/24011 1.205018e-19
## 4   -P.Up         natural         natural   161/661 3284/24011 6.002531e-14
## 5 Leaf.Up             sag             sag     8/634   40/24011 8.242430e-06
## 6 Leaf.Up       staygreen       staygreen    32/634  572/24011 6.252709e-05
## 7   -P.Up       staygreen       staygreen    27/661  572/24011 4.943556e-03
## 8   -P.Up             sag             sag     5/661   40/24011 4.620103e-03
##   Count     p.adjust
## 1   175 1.022479e-19
## 2    83 1.022479e-19
## 3    83 9.640141e-19
## 4   161 3.601519e-13
## 5     8 3.956366e-05
## 6    32 2.501084e-04
## 7    27 1.483067e-02
## 8     5 1.483067e-02

Gene-Level Statistics

genes_in_custom_terms <- custom_enrichment_results %>%
  separate_longer_delim(geneID, delim = "/") %>%
  rename(gene = "geneID") %>%
  select(Cluster, ID, Description, gene) %>%
  inner_join(
    effects_df %>% select(gene, locus_label, desc_merged) %>% distinct(),
    by = "gene"
  )

{
  cat("\n=== Natural Senescence DEGs (Leaf effect) ===\n")
  get_annotation_degs(
    annotation_id = "natural",
    gene_term_df = genes_in_custom_terms,
    effects_data = effects_df,
    filter_hiconf = TRUE
  ) %>%
    filter(predictor == "Leaf") %>%
    print(n = 50)
}

## 
## === Natural Senescence DEGs (Leaf effect) ===
## # A tibble: 176 × 8
##    predictor regulation gene  locus_label desc_merged  logFC neglogP mahalanobis
##    <chr>     <chr>      <chr> <chr>       <chr>        <dbl>   <dbl>       <dbl>
##  1 Leaf      Downregul… Zm00… glp1        germin-lik… -0.787    4.28        30.0
##  2 Leaf      Upregulat… Zm00… osm1        osmotin-li…  3.19     4.03       382. 
##  3 Leaf      Upregulat… Zm00… nactf122    NAC-transc…  2.12     6.07       177. 
##  4 Leaf      Upregulat… Zm00… ox1         Peroxidase   2.02     5.41       160. 
##  5 Leaf      Upregulat… Zm00… pht2        phosphate …  1.97     6.67       157. 
##  6 Leaf      Upregulat… Zm00… <NA>        Indole-2-m…  1.91     7.53       152. 
##  7 Leaf      Upregulat… Zm00… <NA>        <NA>         1.92     4.22       142. 
##  8 Leaf      Upregulat… Zm00… <NA>        Ergosterol…  1.76     7.14       131. 
##  9 Leaf      Upregulat… Zm00… fad15       fatty acid…  1.76     6.39       127. 
## 10 Leaf      Upregulat… Zm00… <NA>        Outer arm …  1.66     8.84       127. 
## 11 Leaf      Upregulat… Zm00… fad17       fatty acid…  1.75     6.07       126. 
## 12 Leaf      Upregulat… Zm00… fomt4       flavonoid …  1.78     4.57       124. 
## 13 Leaf      Upregulat… Zm00… <NA>        Chemocyanin  1.75     4.20       119. 
## 14 Leaf      Upregulat… Zm00… prp11       pathogenes…  1.72     5.05       117. 
## 15 Leaf      Upregulat… Zm00… prt1        Putative c…  1.45    10.9        116. 
## 16 Leaf      Upregulat… Zm00… <NA>        Tryptamine…  1.67     4.62       110. 
## 17 Leaf      Upregulat… Zm00… <NA>        Germin-lik…  1.66     3.06       105. 
## 18 Leaf      Upregulat… Zm00… prp4        pathogenes…  1.54     6.77       103. 
## 19 Leaf      Upregulat… Zm00… <NA>        Cytochrome…  1.61     4.56       102. 
## 20 Leaf      Upregulat… Zm00… <NA>        Cellulose …  1.43     8.23        97.9
## 21 Leaf      Upregulat… Zm00… <NA>        Peptidase …  1.59     3.72        97.8
## 22 Leaf      Upregulat… Zm00… chn33       chitinase33  1.52     5.35        95.0
## 23 Leaf      Upregulat… Zm00… <NA>        Putative a…  1.54     4.39        93.4
## 24 Leaf      Upregulat… Zm00… <NA>        Mono-/di-a…  1.37     8.52        92.8
## 25 Leaf      Upregulat… Zm00… prp9        pathogenes…  1.51     5.10        92.7
## 26 Leaf      Upregulat… Zm00… <NA>        Carbohydra…  1.46     6.12        90.7
## 27 Leaf      Upregulat… Zm00… <NA>        Armadillo    1.41     7.25        90.5
## 28 Leaf      Upregulat… Zm00… <NA>        Alpha-gluc…  1.44     5.91        87.6
## 29 Leaf      Upregulat… Zm00… pip2d       plasma mem…  1.50     3.55        87.5
## 30 Leaf      Upregulat… Zm00… <NA>        Probable p…  1.32     8.12        85.2
## 31 Leaf      Upregulat… Zm00… <NA>        Benzyl alc…  1.40     6.27        84.7
## 32 Leaf      Upregulat… Zm00… <NA>        <NA>         1.40     6.25        84.7
## 33 Leaf      Upregulat… Zm00… ks4         kaurene sy…  1.37     6.77        83.9
## 34 Leaf      Upregulat… Zm00… lbd41       LBD-transc…  1.46     3.58        83.2
## 35 Leaf      Upregulat… Zm00… <NA>        sphinganin…  1.40     5.74        82.7
## 36 Leaf      Upregulat… Zm00… <NA>        Cysteine-r…  1.40     5.25        81.3
## 37 Leaf      Upregulat… Zm00… <NA>        Epimerase …  1.32     6.72        79.1
## 38 Leaf      Upregulat… Zm00… nactf108    NAC-transc…  1.17     9.20        78.5
## 39 Leaf      Upregulat… Zm00… <NA>        <NA>         1.28     7.17        77.4
## 40 Leaf      Upregulat… Zm00… <NA>        Putative r…  1.40     3.49        76.7
## 41 Leaf      Upregulat… Zm00… <NA>        Receptor-l…  1.14     9.33        76.3
## 42 Leaf      Upregulat… Zm00… <NA>        Cysteine d…  1.08    10.00        76.0
## 43 Leaf      Upregulat… Zm00… <NA>        Bowman-Bir…  1.40     3.46        75.8
## 44 Leaf      Upregulat… Zm00… <NA>        Putrescine…  1.17     8.76        75.7
## 45 Leaf      Upregulat… Zm00… <NA>        Type IV in…  1.30     5.91        73.8
## 46 Leaf      Upregulat… Zm00… red1        Tropinone …  0.960   10.8         72.5
## 47 Leaf      Upregulat… Zm00… <NA>        Protein ki…  1.20     7.74        72.3
## 48 Leaf      Upregulat… Zm00… <NA>        Fucosyltra…  1.33     3.90        70.7
## 49 Leaf      Upregulat… Zm00… bhlh62      bHLH-trans…  1.18     7.71        70.4
## 50 Leaf      Upregulat… Zm00… chn17       chitinase17  1.24     6.43        70.0
## # ℹ 126 more rows

Export Results

write.csv(
  xpindex_data,
  file = "~/Desktop/expression_indices.csv",
  row.names = FALSE
)

write.csv(
  custom_enrichment_results,
  file = "~/Desktop/custom_enrichment_results.csv",
  row.names = FALSE
)

write.csv(
  xpindex_stats,
  file = "~/Desktop/xpindex_model_statistics.csv",
  row.names = FALSE
)

Build custom chlorophyll gene sets —

Manually cuarte sets

# Load CornCyc again if not in environment (safe re-load)
corncyc <- read.table(
  "/Users/fvrodriguez/Desktop/corncyc_pathways.20251021",
  sep = "\t",
  header = TRUE,
  quote = ""
)

# Extract synthesis genes: CHLOROPHYLL-SYN + PWY-5068
chloro_synthesis_genes <- corncyc %>%
  filter(Pathway.id %in% c("CHLOROPHYLL-SYN", "PWY-5068")) %>%
  pull(Protein.id) %>%
  gsub("ZM00001EB", "Zm00001eb", .) %>%
  gsub("_P\\d+$", "", ., perl = TRUE) %>%
  grep("Zm00001eb", ., value = TRUE) %>%
  unique()

# Extract degradation genes from PWY-5098
chloro_degradation_base <- corncyc %>%
  filter(Pathway.id == "PWY-5098") %>%
  pull(Protein.id) %>%
  gsub("ZM00001EB", "Zm00001eb", .) %>%
  gsub("_P\\d+$", "", ., perl = TRUE) %>%
  grep("Zm00001eb", ., value = TRUE) %>%
  unique()

# Manually add your curated degradation genes (using maizegdb_id = rownames in expression matrix)
manual_degradation_genes <- c(
  "Zm00001eb307720",  # ZmCHL
  "Zm00001eb047780",  # ZmCHL2
  "Zm00001eb231810",  # ZmPPH
  "Zm00001eb103480",  # ZmSGR
  "Zm00001eb076680",  # ZmSGRL
  "Zm00001eb140370"   # ZmPAO
)

chloro_degradation_genes <- unique(c(chloro_degradation_base, manual_degradation_genes))

# Create named list
chloro_gene_sets <- list(
  chlorophyll_synthesis = chloro_synthesis_genes,
  chlorophyll_degradation = chloro_degradation_genes
)

Calculate and normalize xpindex

cat("=== Chlorophyll Gene Sets ===\n")

## === Chlorophyll Gene Sets ===

cat("Synthesis genes:", length(chloro_gene_sets$chlorophyll_synthesis), "\n")

## Synthesis genes: 32

cat("Degradation genes:", length(chloro_gene_sets$chlorophyll_degradation), "\n")

## Degradation genes: 10

chloro_xpindex <- calculate_xpindex(
  expression_matrix = normalized_expression,
  gene_sets = chloro_gene_sets,
  method = "mean"
)

## chlorophyll_synthesis: 23/32 genes found
## chlorophyll_degradation: 9/10 genes found

chloro_xpindex_norm <- normalize_xpindex(
  xpindex_data = chloro_xpindex,
  index_names = names(chloro_gene_sets),
  method = "per_set"
)

lm(data=chloro_xpindex, chlorophyll_degradation~leaf_stage*treatment )

## 
## Call:
## lm(formula = chlorophyll_degradation ~ leaf_stage * treatment, 
##     data = chloro_xpindex)
## 
## Coefficients:
##            (Intercept)              leaf_stage             treatment-P  
##                5.38762                 0.06527                -0.17022  
## leaf_stage:treatment-P  
##                0.15017

Summarize for plotting

# Pooled (all leaves, ignore treatment)
summary_pooled <- summarize_xpindex(
  xpindex_data = chloro_xpindex_norm,
  index_names = names(chloro_gene_sets),
  group_vars = "leaf_stage"
) %>%
  mutate(
    xpindex = case_when(
      xpindex == "chlorophyll_synthesis" ~ "Synthesis",
      xpindex == "chlorophyll_degradation" ~ "Degradation"
    ),
    xpindex = factor(xpindex, levels = c("Synthesis", "Degradation"))
  )

# By treatment
summary_by_treatment <- summarize_xpindex(
  xpindex_data = chloro_xpindex_norm,
  index_names = names(chloro_gene_sets),
  group_vars = c("leaf_stage", "treatment")
) %>%
  mutate(
    xpindex = case_when(
      xpindex == "chlorophyll_synthesis" ~ "Synthesis",
      xpindex == "chlorophyll_degradation" ~ "Degradation"
    ),
    xpindex = factor(xpindex, levels = c("Synthesis", "Degradation"))
  )

Plotting 1

p_normalized <- ggplot(
  xpindex_summary_pooled,
  aes(x = leaf_stage, y = mean, color = xpindex)
) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 5) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.2,
    linewidth = 1
  ) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen", 
               "Leaf Senescence" = "orange")
  ) +
  # annotate(
  #   "text",
  #   x = 1.0,
  #   y = 0.45,
  #   label = stats_text,
  #   hjust = 0,
  #   vjust = 0,
  #   size = 4
  # ) +
  labs(
    x = "Leaf",
    y = "Normalized Expression",
    color = NULL
  ) +
  ggpubr::theme_classic2(base_size = base_size) +
  theme(
    legend.position = c(0.7, 0.9),
    legend.text = element_text(size = 25),
    legend.background = element_rect(fill = "transparent"),
    legend.margin = margin(r = 50, unit = "pt"),
    plot.margin = margin(5.5, 5.5, up, 7, "pt")
  )

print(p_normalized)

base_size <- 30

# Plot 1: Pooled
p_chloro_pooled <- summary_pooled %>%
  ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 5) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se),
    width = 0.2,
    linewidth = 0.8
  ) +
  scale_color_manual(
    values = c("Synthesis" = "darkgreen",
               "Degradation" = "darkorange")
  ) +
  labs(
    y= "Normalized Expression",
    x = "Leaf",
    color = NULL
  ) +
  theme_classic(base_size = base_size) +
  theme(legend.position = "none")

print(p_chloro_pooled)

ggsave(
  "~/Desktop/chlorophyll_synthesis_vs_degradation_pooled.png",
  plot = p_chloro_pooled,
  width = 10,
  height = 6,
  dpi = 300
)

Plotting 2 joins

pd <- position_dodge(width = 0.2)
# Plot 2: By treatment
p_chloro_treatment <- summary_by_treatment %>%
  ggplot(aes(x = leaf_stage, y = mean, 
             color = xpindex,
             fill = xpindex,
             linetype = treatment,
             shape=treatment)) +
  geom_line(linewidth = 2) +
  geom_point(position = pd,
             size = 5) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se), 
    position = pd,
    width = 0.2,
    linewidth = 1,
    linetype = "solid",
  ) +
  scale_x_continuous(
    expand=expansion(add = c(0.2,0.2))
  ) +
  scale_y_continuous(
    expand=expansion(add = c(0,0.2)),
    breaks = 2*(0:5)/10
  ) +
  scale_color_manual(
    values = c("Synthesis" = "darkgreen",
               "Degradation" = "darkorange"),
    labels = c("Chlorophyll\nSynthesis","Degradation")
  ) +
    scale_fill_manual(
    values = c("Synthesis" = "darkgreen",
               "Degradation" = "darkorange", guide= "none")
  ) +
  scale_linetype_manual(
    values = c("+P" = "solid", "-P" = "dashed", guide= "none")
  ) +
    scale_shape_manual(
    values = c("+P" = 19, "-P" = 25)
  ) +
  labs(
    x = "Leaf",
    y = "Normalized Expression",
    color = NULL,
    shape = NULL,
  ) +
  guides(
    fill = "none",
    linetype = "none",
    shape = guide_legend(
      override.aes = list(fill = "black"),
      order = 2,
      #label.position = "left",
      #label.hjust = 1 ,
      reverse = TRUE
      ),
    color = guide_legend(
      override.aes = list(shape = NA),
      order = 1,
      # label.position = "left",
      # label.hjust = 1, 
    )
  )+
  theme_classic(base_size = base_size) +
  theme(
    legend.title = element_blank(),
    legend.position = c(0.1, 1),
    legend.justification = c(0, 1),
    legend.text = element_text(
      size = 25, 
      margin = margin(l =unit(2, "lines"))
      ),
    legend.box = "horizontal",
    legend.box.just = "bottom",
    legend.margin = margin(0, 0, 0, 0),
    legend.spacing.x = unit(1, "lines"),
    legend.key.spacing.x = unit(2, "lines"),
    legend.key.size = unit(2, "lines"),  
    legend.key = element_rect(fill = "transparent"),
    legend.background = element_rect(fill = "transparent")
  )

 p_chloro <- ggpubr::ggarrange(p_chloro_pooled,
                              p_chloro_treatment,
                              align="hv")
p_chloro

ggsave(
  "~/Desktop/chlorophyll_synthesis_vs_degradation_by_treatment.png",
  plot = p_chloro,
  width = 12,
  height = 7,
  dpi = 300
)

# --- Step 1: Prepare both datasets with consistent structure ---


p_photo_sene <- xpindex_summary %>%
  ggplot(aes(x = leaf_stage, y = mean, 
             color = xpindex,
             fill = xpindex,
             linetype = treatment,
             shape=treatment)) +
  geom_line(linewidth = 1.5) +
  geom_point(position = pd,
             size = 5) +
  geom_errorbar(
    aes(ymin = mean - se, ymax = mean + se), 
    position = pd,
    width = 0.2,
    linewidth = 1,
    linetype = "solid",
  ) +
  scale_y_continuous(
    expand=expansion(add = c(0,0.2))
  ) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen",
               "Leaf Senescence" = "darkorange"),
  ) +
    scale_fill_manual(
     values = c("Photosynthesis" = "darkgreen",
               "Leaf Senescence" = "darkorange"),
     guide= "none"
  ) +
  scale_linetype_manual(
    values = c("+P" = "solid", "-P" = "dashed", guide= "none")
  ) +
    scale_shape_manual(
    values = c("+P" = 19, "-P" = 25),
    guide="none"
  ) +
  labs(
    x = "Leaf",
    y = "Normalized Expression",
    color = NULL,
    shape = NULL,
  ) +
  guides(
    fill = "none",
    linetype = "none",
    shape="none",
    color = guide_legend(
      override.aes = list(shape = NA),
      order = 1,
      #label.position = "left",
      #label.hjust = 1, 
    )
    
  )+
  theme_classic(base_size = base_size) +
  theme(
   legend.title = element_blank(),
    legend.position = c(0, 1),
    legend.justification = c(0, 1),
    legend.text = element_text(
      size = 25,
      margin = margin(l =unit(2, "lines"))                ),
      # legend.text.align = 1,           
      # legend.justification = "center",
      # legend.box.just = "right",       
      # legend.margin = margin(0, 0, 0, 0),
      # legend.spacing.y = unit(0, "lines"),
      # legend.key.spacing.y = unit(0, "lines"),
    legend.key.size = unit(2, "lines"),  
    legend.key = element_rect(fill = "transparent"),
      legend.background = element_rect(fill = "transparent"),
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.line.y = element_blank()
  ) 

print(p_photo_sene)

p_combined_by_treatment <- ggpubr::ggarrange(
  p_chloro_treatment + rremove("xlab"), 
 p_photo_sene + rremove("xlab"),
 nrow=1,
 widths= c(1.4,1)) %>%
    annotate_figure(
    bottom = text_grob("Leaf", size = base_size, 
                       vjust = -0.25,
                       hjust = 0)
  )

p_combined_by_treatment

ggsave(
  "~/Desktop/combined_by_treatment_xpindex.png",
  plot = p_combined_by_treatment,
  width = 9.25,
  height = 9,
  dpi = 150
)

Pooled indices

# Common y-axis: 0 to 1, breaks every 0.2
common_y_scale <- scale_y_continuous(
  limits = c(0, 1),
  breaks = seq(0, 1, by = 0.2),
  expand = expansion(add = c(0, 0.2))
)

# Left panel: Chlorophyll
p_chloro_pooled_legend <- summary_pooled %>%
  ggplot(aes(x = leaf_stage, y = mean, color = xpindex)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, linewidth = 0.8) +
  scale_color_manual(
    values = c("Synthesis" = "darkgreen", "Degradation" = "darkorange"),
    labels = c("Chlorophyll\nSynthesis", "Degradation")
  ) +
  common_y_scale +
  labs(x = "Leaf", y = "Normalized Expression") +
  theme_classic(base_size = base_size) +
  theme(
    legend.position = c(0, 1),
    legend.justification = c(0, 1),
    legend.text = element_text(
    size = 25,
    margin = margin(l =unit(2, "lines"))
    ),
    legend.title = element_blank(),
    legend.key = element_rect(fill = "transparent"),
   legend.background = element_rect(fill = "transparent")
  )

# Right panel: Photosynthesis & Senescence
p_normalized_legend <- ggplot(xpindex_summary_pooled, 
                              aes(x = leaf_stage, y = mean, color = xpindex)) +
  geom_line(linewidth = 1.5) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2, linewidth = 1) +
  scale_color_manual(
    values = c("Photosynthesis" = "darkgreen", "Leaf Senescence" = "orange"),
    labels = c("Photosynthesis", "Leaf Senescence")
  ) +
  common_y_scale +
  labs(x = "Leaf", y = "Normalized Expression") +
  ggpubr::theme_classic2(base_size = base_size) +
  theme(
    legend.position = c(0, 1),
    legend.justification = c(0, 1),
    legend.text = element_text(
      size = 25,
      margin = margin(l =unit(2, "lines"))
      ),
    legend.title = element_blank(),
    legend.key = element_rect(fill = "transparent"),
    legend.background = element_rect(fill = "transparent"),
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.line.y = element_blank()
  )

# Combine
p_combined_pooled_separate_legends <- ggpubr::ggarrange(
  p_chloro_pooled_legend + rremove("xlab"),
  p_normalized_legend + rremove("xlab"),
  nrow = 1,
  widths = c(1.33, 1)
) %>%
  annotate_figure(
    bottom = text_grob("Leaf", size = base_size, vjust = -0.3, hjust = 0)
  )

print(p_combined_pooled_separate_legends)

ggsave(
  "~/Desktop/combined_pooled_xpindex_separate_legends.png",
  plot = p_combined_pooled_separate_legends,
  width = 8.2,
  height = 7,
  dpi = 150
)

Caluclate model stats

# --- Define combined gene sets ---
panel_sets <- c(chloro_gene_sets, gene_set_list)

# --- Calculate and normalize expression indices ---
panel_data <- calculate_xpindex(
  expression_matrix = normalized_expression,
  gene_sets = panel_sets,
  method = "mean"
)

## chlorophyll_synthesis: 23/32 genes found
## chlorophyll_degradation: 9/10 genes found
## photosynthesis: 525/525 genes found
## senescence: 157/157 genes found

panel_norm <- normalize_xpindex(
  xpindex_data = panel_data,
  index_names = names(panel_sets),
  method = "per_set"
)

# --- Model 1: Full model with interaction (leaf_stage * treatment) ---
cat("=== MODEL 1: Leaf Stage × Treatment Interaction ===\n")

## === MODEL 1: Leaf Stage × Treatment Interaction ===

panel_models_interaction <- fit_xpindex_models(
  xpindex_data = panel_norm,
  index_names = names(panel_sets),
  formula_rhs = "leaf_stage * treatment"
)

stats_interaction <- extract_xpindex_stats(panel_models_interaction)
print(stats_interaction)

## # A tibble: 12 × 8
##    xpindex              predictor  effect std_err p_value    r2   p_adj p_signif
##    <chr>                <chr>       <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>   
##  1 Chlorophyll_degrada… leaf_sta…  0.0376  0.0203 7.13e-2 0.517 0.107   ns      
##  2 Chlorophyll_degrada… leaf_sta…  0.0864  0.0309 7.96e-3 0.517 0.0159  *       
##  3 Chlorophyll_degrada… treatmen… -0.0980  0.0832 2.46e-1 0.517 0.246   ns      
##  4 Chlorophyll_synthes… leaf_sta… -0.0897  0.0275 2.29e-3 0.572 0.00688 **      
##  5 Chlorophyll_synthes… leaf_sta… -0.109   0.0419 1.29e-2 0.572 0.0221  *       
##  6 Chlorophyll_synthes… treatmen…  0.191   0.113  9.81e-2 0.572 0.118   ns      
##  7 Photosynthesis       leaf_sta… -0.0820  0.0210 3.55e-4 0.684 0.00393 **      
##  8 Photosynthesis       leaf_sta… -0.118   0.0320 6.56e-4 0.684 0.00393 **      
##  9 Photosynthesis       treatmen…  0.283   0.0860 2.15e-3 0.684 0.00688 **      
## 10 Senescence           leaf_sta…  0.0546  0.0316 9.14e-2 0.549 0.118   ns      
## 11 Senescence           leaf_sta…  0.151   0.0481 3.18e-3 0.549 0.00763 **      
## 12 Senescence           treatmen… -0.176   0.130  1.83e-1 0.549 0.200   ns

# --- Model 2: Additive model (leaf_stage + treatment) ---
cat("\n=== MODEL 2: Additive Effects (Leaf Stage + Treatment) ===\n")

## 
## === MODEL 2: Additive Effects (Leaf Stage + Treatment) ===

panel_models_additive <- fit_xpindex_models(
  xpindex_data = panel_norm,
  index_names = names(panel_sets),
  formula_rhs = "leaf_stage + treatment"
)

stats_additive <- extract_xpindex_stats(panel_models_additive)
print(stats_additive)

## # A tibble: 8 × 8
##   xpindex              predictor   effect std_err p_value    r2   p_adj p_signif
##   <chr>                <chr>        <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>   
## 1 Chlorophyll_degrada… leaf_sta…  0.0747   0.0166 5.47e-5 0.420 1.09e-4 ***     
## 2 Chlorophyll_degrada… treatmen…  0.114    0.0369 3.58e-3 0.420 4.77e-3 **      
## 3 Chlorophyll_synthes… leaf_sta… -0.137    0.0222 2.86e-7 0.498 1.14e-6 ****    
## 4 Chlorophyll_synthes… treatmen… -0.0770   0.0495 1.28e-1 0.498 1.46e-1 ns      
## 5 Photosynthesis       leaf_sta… -0.133    0.0182 6.81e-9 0.572 5.45e-8 ****    
## 6 Photosynthesis       treatmen… -0.00796  0.0405 8.45e-1 0.572 8.45e-1 ns      
## 7 Senescence           leaf_sta…  0.120    0.0263 4.99e-5 0.435 1.09e-4 ***     
## 8 Senescence           treatmen…  0.196    0.0587 1.85e-3 0.435 2.95e-3 **

# --- Model 3: Leaf stage only (pooled across treatments) ---
cat("\n=== MODEL 3: Leaf Stage Only (Pooled) ===\n")

## 
## === MODEL 3: Leaf Stage Only (Pooled) ===

panel_models_pooled <- fit_xpindex_models(
  xpindex_data = panel_norm,
  index_names = names(panel_sets),
  formula_rhs = "leaf_stage"
)

stats_pooled <- extract_xpindex_stats(panel_models_pooled)
print(stats_pooled)

## # A tibble: 4 × 8
##   xpindex               predictor  effect std_err p_value    r2   p_adj p_signif
##   <chr>                 <chr>       <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>   
## 1 Chlorophyll_degradat… leaf_sta…  0.0729  0.0182 2.52e-4 0.282 2.86e-4 ***     
## 2 Chlorophyll_synthesis leaf_sta… -0.135   0.0226 4.30e-7 0.468 8.61e-7 ****    
## 3 Photosynthesis        leaf_sta… -0.133   0.0179 4.49e-9 0.572 1.79e-8 ****    
## 4 Senescence            leaf_sta…  0.117   0.0294 2.86e-4 0.277 2.86e-4 ***

# --- Optional: Compare models via AIC (for one example index) ---
# Example: Chlorophyll synthesis
example_index <- "chlorophyll_synthesis"
if (example_index %in% names(panel_sets)) {
  m_int <- panel_models_interaction[[example_index]]
  m_add <- panel_models_additive[[example_index]]
  m_pooled <- panel_models_pooled[[example_index]]
  
  cat("\n=== AIC Comparison (", example_index, ") ===\n", sep = "")
  AIC_comparison <- AIC(m_int, m_add, m_pooled)
  rownames(AIC_comparison) <- c("Interaction", "Additive", "Pooled")
  print(AIC_comparison)
}

## 
## === AIC Comparison (chlorophyll_synthesis) ===
##             df       AIC
## Interaction  5 -34.99953
## Additive     4 -30.09655
## Pooled       3 -29.57391

# --- Export results (optional) ---
# write.csv(stats_interaction, "~/Desktop/xpindex_interaction_stats.csv", row.names = FALSE)
# write.csv(stats_pooled, "~/Desktop/xpindex_pooled_stats.csv", row.names = FALSE)

Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggrepel_0.9.6          ggfx_1.0.3             ggtext_0.1.2          
## [4] clusterProfiler_4.17.0 stringr_1.5.2          tidyr_1.3.1           
## [7] ggpubr_0.6.2           ggplot2_4.0.0          dplyr_1.1.4           
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.17.1       jsonlite_2.0.0         
##   [4] magrittr_2.0.4          ggtangle_0.0.7          magick_2.9.0           
##   [7] farver_2.1.2            rmarkdown_2.30          fs_1.6.6               
##  [10] ragg_1.5.0              vctrs_0.6.5             memoise_2.0.1          
##  [13] ggtree_3.99.2           rstatix_0.7.3           janitor_2.2.1          
##  [16] htmltools_0.5.8.1       broom_1.0.10            Formula_1.2-5          
##  [19] gridGraphics_0.5-1      sass_0.4.10             bslib_0.9.0            
##  [22] htmlwidgets_1.6.4       plyr_1.8.9              lubridate_1.9.4        
##  [25] cachem_1.1.0            commonmark_2.0.0        igraph_2.2.0           
##  [28] lifecycle_1.0.4         pkgconfig_2.0.3         Matrix_1.7-4           
##  [31] R6_2.6.1                fastmap_1.2.0           gson_0.1.0             
##  [34] snakecase_0.11.1        digest_0.6.37           aplot_0.2.9            
##  [37] enrichplot_1.29.3       patchwork_1.3.2         AnnotationDbi_1.71.2   
##  [40] S4Vectors_0.47.4        textshaping_1.0.4       RSQLite_2.4.3          
##  [43] labeling_0.4.3          timechange_0.3.0        httr_1.4.7             
##  [46] abind_1.4-8             mgcv_1.9-3              compiler_4.5.1         
##  [49] bit64_4.6.0-1           fontquiver_0.2.1        withr_3.0.2            
##  [52] S7_0.2.0                backports_1.5.0         BiocParallel_1.43.4    
##  [55] carData_3.0-5           DBI_1.2.3               R.utils_2.13.0         
##  [58] ggsignif_0.6.4          rappdirs_0.3.3          tools_4.5.1            
##  [61] ape_5.8-1               R.oo_1.27.1             glue_1.8.0             
##  [64] nlme_3.1-168            GOSemSim_2.35.2         gridtext_0.1.5         
##  [67] grid_4.5.1              reshape2_1.4.4          fgsea_1.35.8           
##  [70] generics_0.1.4          gtable_0.3.6            R.methodsS3_1.8.2      
##  [73] data.table_1.17.8       xml2_1.4.0              car_3.1-3              
##  [76] utf8_1.2.6              XVector_0.49.1          BiocGenerics_0.55.4    
##  [79] pillar_1.11.1           markdown_2.0            yulab.utils_0.2.1      
##  [82] splines_4.5.1           treeio_1.33.0           lattice_0.22-7         
##  [85] bit_4.6.0               tidyselect_1.2.1        fontLiberation_0.1.0   
##  [88] GO.db_3.22.0            Biostrings_2.77.2       knitr_1.50             
##  [91] fontBitstreamVera_0.1.1 gridExtra_2.3           litedown_0.7           
##  [94] IRanges_2.43.5          Seqinfo_0.99.2          stats4_4.5.1           
##  [97] xfun_0.53               Biobase_2.69.1          stringi_1.8.7          
## [100] lazyeval_0.2.2          ggfun_0.2.0             yaml_2.3.10            
## [103] evaluate_1.0.5          codetools_0.2-20        gdtools_0.4.4          
## [106] tibble_3.3.0            qvalue_2.41.0           ggplotify_0.1.3        
## [109] cli_3.6.5               systemfonts_1.3.1       jquerylib_0.1.4        
## [112] Rcpp_1.1.0              png_0.1-8               parallel_4.5.1         
## [115] blob_1.2.4              DOSE_4.3.0              viridisLite_0.4.2      
## [118] tidytree_0.4.6          ggiraph_0.9.2           scales_1.4.0           
## [121] purrr_1.1.0             crayon_1.5.3            rlang_1.1.6            
## [124] cowplot_1.2.0           fastmatch_1.1-6         KEGGREST_1.49.2

Gene Set Transcription Indices

Maize Genetics Analysis

2025-11-06