PSU 2022 Inv4m Phenotype Analysis: Treatment and Genotype Effects

Overview

This analysis uses spatially corrected phenotype data from the SpATS analysis to examine treatment effects (phosphorus limitation) and genotype × treatment interactions for the Inv4m inversion in maize.

Analysis workflow:

Load spatially corrected data
Fit linear models: Phenotype ~ Genotype × Treatment
Identify significant treatment and interaction effects (FDR < 0.05)
Visualize treatment effects: Pairwise comparisons of +P vs -P within each genotype
Export statistical results and summaries

Focus: This analysis emphasizes treatment comparisons (phosphorus stress response) and how the Inv4m genotype modifies this response (genotype × treatment interactions). All visualizations show treatment effects faceted by genotype.

1. Setup and Data Loading

Load libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggtext)
library(ggfx)
library(ggpubr)
library(ggbeeswarm)
library(rstatix)
library(knitr)

Define aesthetic constants

# Color palette: gold for CTRL, purple for Inv4m
pal <- c("gold", "purple4")

# Base theme for plots
pheno_theme <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.x = element_blank(),
    axis.text.x = element_text(
      face = "bold", 
      color = "black", 
      size = 20, 
      angle = 45, 
      hjust = 1
    ),
    strip.background = element_blank(),
    strip.text = element_text(size = 25),
    legend.position = "none"
  )

# Theme for treatment comparison plots
pheno_theme2 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 25, face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 20),
    legend.position = "none"
  )

# Theme for time-series plots
pheno_theme3 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(color = "black", size = 20),
    strip.background = element_blank(),
    strip.text = element_text(size = 20),
    legend.position = "none"
  )

Load spatially corrected data

# Load the spatially corrected data from the SpATS analysis
sp_corrected_file <- "PSU2022_spatially_corrected_both_treatments.csv"

sp_corrected_combined <- read.csv(sp_corrected_file) %>%
  mutate(
    Treatment = factor(Treatment, levels = c("+P", "-P")),
    genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
  )

cat("Spatially corrected data dimensions:", dim(sp_corrected_combined), "\n")

## Spatially corrected data dimensions: 127 39

cat("Treatment levels:", levels(sp_corrected_combined$Treatment), "\n")

## Treatment levels: +P -P

cat("Genotype levels:", levels(sp_corrected_combined$genotype), "\n")

## Genotype levels: CTRL Inv4m

cat("\nTreatment × Genotype combinations:\n")

## 
## Treatment × Genotype combinations:

print(table(sp_corrected_combined$Treatment, sp_corrected_combined$genotype))

##     
##      CTRL Inv4m
##   +P   16    16
##   -P   16    16

Prepare plotting data

# Filter to NIL genotypes only (CTRL and Inv4m)
pheno <- sp_corrected_combined %>%
  filter(genotype %in% c("CTRL", "Inv4m")) %>%
  mutate(
    genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
  ) %>%
  droplevels()

# Rename genotype column for consistency
pheno <- pheno %>%
  rename(Genotype = genotype)

# Create formatted genotype labels with CTRL as reference
pheno <- pheno %>%
  mutate(
    Genotype_label = case_when(
      Genotype == "CTRL" ~ "CTRL",
      Genotype == "Inv4m" ~ "<i>Inv4m</i>",
      TRUE ~ as.character(Genotype)
    ),
    Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))
  )

cat("Analysis data dimensions:", dim(pheno), "\n")

## Analysis data dimensions: 64 40

cat("Genotype levels:", levels(pheno$Genotype), "\n")

## Genotype levels: CTRL Inv4m

cat("Genotype_label levels:", levels(pheno$Genotype_label), "\n")

## Genotype_label levels: CTRL <i>Inv4m</i>

2. Statistical Analysis

Identify phenotype variables

# Define phenotype groups
base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")

# Check which phenotypes are available in the spatially corrected data
available_phenos <- intersect(
  c(base_phenotypes, ear_phenotypes),
  colnames(pheno)
)

cat("Available phenotypes for analysis:\n")

## Available phenotypes for analysis:

cat(paste(available_phenos, collapse = ", "), "\n")

## PH, DTA, DTS, STW40, STW50, STW60, STWHV, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

vars <- available_phenos

Fit linear models for all phenotypes

Since the data is already spatially corrected, we can use simple linear models without spatial correlation terms.

# Fit models and extract effects
effects <- lapply(vars, FUN = function(x) {
  # Check if there's enough data
  n_obs <- sum(!is.na(pheno[[x]]))
  
  if (n_obs < 20) {
    message("Skipping ", x, " - insufficient data (n=", n_obs, ")")
    return(NULL)
  }
  
  # Fit linear model: phenotype ~ Genotype * Treatment
  form <- paste(x, "~ Genotype * Treatment")
  
  model <- tryCatch(
    lm(as.formula(form), data = pheno),
    error = function(e) {
      message("Model failed for ", x, ": ", e$message)
      return(NULL)
    }
  )
  
  if (is.null(model)) return(NULL)
  
  # Extract coefficients
  out <- coef(summary(model)) %>% 
    as.data.frame() %>% 
    tibble::rownames_to_column("predictor")
  
  colnames(out)[3] <- "Std.Error"
  colnames(out)[5] <- "p.value"
  out$response <- x
  out %>% select(response, everything())
}) %>% 
  bind_rows() %>%
  mutate(p.adjust = p.adjust(p.value, method = "fdr")) %>% 
  arrange(p.adjust)


cat("\n=== Model Summary ===\n")

## 
## === Model Summary ===

cat("Total models fitted:", length(unique(effects$response)), "\n")

## Total models fitted: 16

cat("Total effects tested:", nrow(effects), "\n")

## Total effects tested: 64

Identify significant effects

# Filter for significant effects (FDR < 0.05)
significant <- effects %>%
  filter(
    p.adjust < 0.05, 
    !grepl("Intercept", predictor)
  ) %>%
  arrange(p.adjust)

# Recode predictor names for plotting
significant <- significant %>%
  mutate(
    predictor = case_when(
      predictor == "Treatment-P" ~ "-P",
      predictor == "GenotypeInv4m" ~ "Inv4m",
      predictor == "GenotypeInv4m:Treatment-P" ~ "Inv4m:-P",
      TRUE ~ predictor
    ),
    predictor = factor(predictor, levels = c("-P", "Inv4m", "Inv4m:-P"))
  )

# Calculate reference means for each response
reference_means <- effects %>%
  filter(grepl("Intercept|Treatment-P|GenotypeInv4m", predictor)) %>%
  select(response, predictor, Estimate) %>%
  pivot_wider(
    names_from = predictor,
    values_from = Estimate
  ) %>%
  rename(
    intercept = `(Intercept)`,
    treatment_effect = `Treatment-P`,
    genotype_effect = GenotypeInv4m,
    interaction_effect = `GenotypeInv4m:Treatment-P`
  ) %>%
  mutate(
    # +P marginal mean = (CTRL+P + Inv4m+P) / 2
    plus_p_marginal_mean = intercept + (genotype_effect / 2),
    
    # CTRL marginal mean = (CTRL+P + CTRL-P) / 2
    ctrl_marginal_mean = intercept + (treatment_effect / 2),
    
    # CTRL-P mean (not marginal, just this cell)
    ctrl_minus_p_mean = intercept + treatment_effect
  ) %>%
  select(
    response, 
    plus_p_marginal_mean, 
    ctrl_marginal_mean, 
    ctrl_minus_p_mean
  )

# Join and calculate percentages with appropriate denominator
significant <- significant %>%
  left_join(reference_means, by = "response") %>%
  mutate(
    reference_mean = case_when(
      predictor == "-P" ~ plus_p_marginal_mean,
      predictor == "Inv4m" ~ ctrl_marginal_mean,
      predictor == "Inv4m:-P" ~ ctrl_minus_p_mean,
      TRUE ~ NA_real_
    ),
    Estimate_pct = (Estimate / reference_mean) * 100
  ) %>%
  select(
    response, 
    predictor, 
    reference_mean,
    Estimate, 
    Estimate_pct,
    p.value,
    p.adjust,
) %>%
  arrange(predictor, response)



cat("\n=== Significant Effects (FDR < 0.05) ===\n")

## 
## === Significant Effects (FDR < 0.05) ===

print(significant)

##    response predictor reference_mean    Estimate Estimate_pct      p.value
## 1       DTA        -P     71.3810741   3.4328993     4.809257 2.668769e-13
## 2       DTS        -P     73.0122598   3.0621067     4.193962 1.383332e-13
## 3      KW50        -P     10.5360290  -1.8013514   -17.097062 1.521219e-03
## 4     STW40        -P     10.6064535  -6.5170078   -61.443797 2.137127e-08
## 5     STW50        -P     33.9278296 -18.7588852   -55.290555 1.598759e-16
## 6     STW60        -P     75.5501994 -33.3434615   -44.134181 1.741324e-08
## 7     STWHV        -P    101.1614418 -22.4074887   -22.150227 1.299584e-05
## 8       DTA     Inv4m     73.8364156  -1.4777836    -2.001429 1.648877e-04
## 9       DTS     Inv4m     75.1854065  -1.2841867    -1.708027 1.833645e-04
## 10       HI     Inv4m      0.7263306   0.1718492    23.659923 1.168579e-02
## 11       PH     Inv4m    162.9516087   5.5697807     3.418058 3.853788e-04
## 12    STW40     Inv4m      8.8152061  -2.9345129   -33.289215 5.056342e-03
## 13    STW50     Inv4m     26.6048092  -4.1128444   -15.459026 1.515412e-02
## 14    STWHV     Inv4m     95.7851134 -11.6548319   -12.167686 1.609593e-02
## 15       CD  Inv4m:-P     25.9716819  -2.6176674   -10.078929 1.000036e-02
##        p.adjust
## 1  8.989538e-13
## 2  4.918515e-13
## 3  3.744540e-03
## 4  6.513149e-08
## 5  6.018857e-16
## 6  5.572237e-08
## 7  3.780609e-05
## 8  4.588180e-04
## 9  4.889721e-04
## 10 2.578932e-02
## 11 9.865697e-04
## 12 1.198540e-02
## 13 3.232880e-02
## 14 3.323031e-02
## 15 2.285796e-02

# Count by effect type
cat("\n=== Summary by Effect Type ===\n")

## 
## === Summary by Effect Type ===

print(table(significant$predictor))

## 
##       -P    Inv4m Inv4m:-P 
##        7        7        1

3. Pairwise Treatment Comparisons

Prepare t-test results

We focus on comparing phosphorus treatments (+P vs -P) within each genotype. This captures both main treatment effects and genotype × treatment interactions.

# Prepare long format data for t-tests
to_t_test <- pheno %>%
  select(plotId, Genotype, Treatment, all_of(vars)) %>%
  pivot_longer(
    cols = all_of(vars), 
    names_to = "trait", 
    values_to = "value"
  ) %>%
  filter(trait %in% significant$response)

# Identify traits with treatment effects OR interactions
treatment_and_interaction <- significant %>%
  filter(predictor %in% c("-P", "Inv4m:-P")) %>%
  pull(response) %>%
  unique()

cat("Traits with treatment effects or interactions:", 
    length(treatment_and_interaction), "\n")

## Traits with treatment effects or interactions: 8

cat(paste(treatment_and_interaction, collapse = ", "), "\n\n")

## DTA, DTS, KW50, STW40, STW50, STW60, STWHV, CD

# Perform pairwise t-tests: compare treatments within each genotype
if (length(treatment_and_interaction) > 0) {
  treatment_pairwise <- to_t_test %>%
    filter(trait %in% treatment_and_interaction) %>%
    group_by(Genotype, trait) %>%
    t_test(value ~ Treatment, ref.group = "+P") %>%
    add_significance() %>%
    add_x_position(x = "Treatment") %>%
    add_y_position(scales = "free") %>%
    mutate(
      Genotype_label = factor(
        case_when(
          Genotype == "CTRL" ~ "CTRL",
          Genotype == "Inv4m" ~ "<i>Inv4m</i>",
          TRUE ~ as.character(Genotype)
        ),
        levels = c("CTRL", "<i>Inv4m</i>")
      )
    )
  
  cat("Pairwise t-tests completed for", 
      length(treatment_and_interaction), "traits\n")
  
  # Show summary
  print(treatment_pairwise %>% 
        select(trait, Genotype, statistic, p, p.signif) %>%
        head(10))
} else {
  treatment_pairwise <- NULL
  cat("No significant treatment or interaction effects found\n")
}

## Pairwise t-tests completed for 8 traits
## # A tibble: 10 × 5
##    trait Genotype statistic        p p.signif
##    <chr> <fct>        <dbl>    <dbl> <chr>   
##  1 CD    CTRL         0.187 8.55e- 1 ns      
##  2 DTA   CTRL        -9.54  1.4 e-10 ****    
##  3 DTS   CTRL       -10.4   3.12e-11 ****    
##  4 KW50  CTRL         2.55  2.06e- 2 *       
##  5 STW40 CTRL         5.93  3.90e- 6 ****    
##  6 STW50 CTRL        10.2   3.24e- 8 ****    
##  7 STW60 CTRL         7.18  9.61e- 7 ****    
##  8 STWHV CTRL         4.73  9.95e- 5 ****    
##  9 CD    Inv4m        6.92  3.45e- 7 ****    
## 10 DTA   Inv4m      -10.0   4.18e-11 ****

4. Phenotype Visualization

Create plotting function

# Function to create treatment comparison plots
plot_treatment_effect <- function(trait_name, 
                                   trait_title, 
                                   trait_ylab,
                                   y_limits = NULL,
                                   y_breaks = NULL,
                                   bracket_nudge = 0.05) {  # Fraction of y-range (0-1)
  
  # Get t-test results for this trait
  t_test_data <- NULL
  if (!is.null(treatment_pairwise)) {
    t_test_data <- treatment_pairwise %>%
      filter(trait == trait_name)
    
    # Adjust bracket position RELATIVE to visible plotting area
    # This solves the issue of brackets being in absolute units
    # while coord_cartesian() shows only a portion of the range
    if (!is.null(t_test_data) && nrow(t_test_data) > 0) {
      if (!is.null(y_limits)) {
        # Use specified y-axis limits
        y_range <- diff(y_limits)
        y_max <- y_limits[2]
        
        # Position brackets as: top - (nudge_fraction × visible_range)
        t_test_data <- t_test_data %>%
          mutate(y.position = y_max - (bracket_nudge * y_range))
      } else {
        # If no limits specified, use actual data range
        data_range <- pheno %>%
          filter(!is.na(.data[[trait_name]])) %>%
          pull(.data[[trait_name]]) %>%
          range()
        
        y_range <- diff(data_range)
        y_max <- max(data_range)
        
        # Position brackets above data
        t_test_data <- t_test_data %>%
          mutate(y.position = y_max + (bracket_nudge * y_range))
      }
    }
  }
  
  # Create base plot
  p <- pheno %>%
    ggplot(aes(x = Treatment, y = .data[[trait_name]], col = Treatment)) +
    ggtitle(trait_title) +
    ylab(trait_ylab) +
    geom_boxplot(width = 0.25, linewidth = 1, alpha = 0) %>% 
      with_shadow(
        colour = "black",
        x_offset = 0,
        y_offset = 0,
        sigma = 1
      ) +
    geom_quasirandom(size = 2) %>% 
      with_shadow(
        colour = "black",
        x_offset = 0,
        y_offset = 0,
        sigma = 1
      ) +
    scale_color_manual(values = pal) +
    facet_wrap(. ~ Genotype_label) +
    pheno_theme2 +
    theme(strip.text = element_markdown())
  
  # Add significance brackets if available
  if (!is.null(t_test_data) && nrow(t_test_data) > 0) {
    p <- p + stat_pvalue_manual(
      t_test_data,
      size = 10,
      bracket.size = 0.8,
      hide.ns = FALSE
    )
  }
  
  # Add y-axis limits if specified
  if (!is.null(y_limits)) {
    p <- p + coord_cartesian(ylim = y_limits)
  }
  
  # Add y-axis breaks if specified
  if (!is.null(y_breaks)) {
    p <- p + scale_y_continuous(breaks = y_breaks)
  }
  
  return(p)
}

Days to Anthesis

if ("DTA" %in% treatment_and_interaction) {
  dta_plot <- plot_treatment_effect(
    trait_name = "DTA",
    trait_title = "Anthesis",
    trait_ylab = "Days",
    y_limits = c(68, 80),
    y_breaks = 68:80,
    bracket_nudge = 0.08
  )
  print(dta_plot)
}

Days to Silking

if ("DTS" %in% treatment_and_interaction) {
  dts_plot <- plot_treatment_effect(
    trait_name = "DTS",
    trait_title = "Silking",
    trait_ylab = "Days",
    y_limits = c(68, 80),
    y_breaks = 68:80,
    bracket_nudge = 0
  )
  print(dts_plot)
}

Plant Height

if ("PH" %in% treatment_and_interaction) {
  ph_plot <- plot_treatment_effect(
    trait_name = "PH",
    trait_title = "Plant Height",
    trait_ylab = "cm",
    y_limits = c(140, 190),
    y_breaks = seq(140, 190, by = 10),
    bracket_nudge = 0.08
  )
  print(ph_plot)
}

Cob Diameter

if ("CD" %in% treatment_and_interaction) {
  cd_plot <- plot_treatment_effect(
    trait_name = "CD",
    trait_title = "Cob Diameter",
    trait_ylab = "cm",
    y_limits = c(21, 31),
    y_breaks = 21:31,
    bracket_nudge = 0.08
  )
  print(cd_plot)
}

50 Kernel Weight

if ("KW50" %in% treatment_and_interaction) {
  kw50_plot <- plot_treatment_effect(
    trait_name = "KW50",
    trait_title = "50 Kernel Weight",
    trait_ylab = "g",
    y_limits = c(5, 17),
    y_breaks = 5:17,
    bracket_nudge = 0.08
  )
  print(kw50_plot)
}

Stover Weight at 40 DAP

if ("STW40" %in% treatment_and_interaction) {
  stw40_plot <- plot_treatment_effect(
    trait_name = "STW40",
    trait_title = "Stover Weight <br> at 40 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw40_plot)
}

Stover Weight at 50 DAP

if ("STW50" %in% treatment_and_interaction) {
  stw50_plot <- plot_treatment_effect(
    trait_name = "STW50",
    trait_title = "Stover Weight <br> at 50 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw50_plot)
}

Stover Weight at 60 DAP

if ("STW60" %in% treatment_and_interaction) {
  stw60_plot <- plot_treatment_effect(
    trait_name = "STW60",
    trait_title = "Stover Weight <br> at 60 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw60_plot)
}

Stover Weight at Harvest

if ("STWHV" %in% treatment_and_interaction) {
  stwhv_plot <- plot_treatment_effect(
    trait_name = "STWHV",
    trait_title = "Stover Dry Weight <br> at Harvest",
    trait_ylab = "g",
    y_limits = c(50, 175),
    bracket_nudge = 0.08
  )
  print(stwhv_plot)
}

5. Stover Dry Weight Time Course

Prepare time-series data

# Create long-format data with all time points
dw_raw <- pheno %>%
  mutate(STW121 = STWHV) %>%
  select(
    plotId, 
    Genotype, 
    Genotype_label,
    Treatment, 
    starts_with("STW")
  ) %>%
  pivot_longer(
    cols = c("STW40", "STW50", "STW60", "STW121"),
    names_to = "DAP_string",
    values_to = "STW"
  ) %>%
  mutate(
    DAP = as.integer(gsub("STW", "", DAP_string)),
    # Ensure factor levels are maintained
    Genotype = factor(Genotype, levels = c("CTRL", "Inv4m")),
    Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))
  ) %>%
  filter(!is.na(STW)) %>%
  droplevels()

cat("Time-series data dimensions:", dim(dw_raw), "\n")

## Time-series data dimensions: 236 8

cat("Time points:", sort(unique(dw_raw$DAP)), "\n")

## Time points: 40 50 60 121

cat("Genotype_label levels:", levels(dw_raw$Genotype_label), "\n")

## Genotype_label levels: CTRL <i>Inv4m</i>

Time-point specific t-tests

# Perform t-tests at each time point within each genotype
treatment_timeseries_test <- dw_raw %>%
  ungroup() %>%
  filter(DAP > 1) %>%
  mutate(DAP = as.factor(DAP)) %>%
  group_by(Genotype, DAP) %>%
  t_test(STW ~ Treatment, ref.group = "+P") %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance() %>%
  add_y_position(scales = "free_y") %>%
  add_x_position(x = "DAP", group = NULL, dodge = 1) %>%
  mutate(
    Genotype_label = factor(
      case_when(
        Genotype == "CTRL" ~ "CTRL",
        Genotype == "Inv4m" ~ "<i>Inv4m</i>",
        TRUE ~ as.character(Genotype)
      ),
      levels = c("CTRL", "<i>Inv4m</i>")
    )
  )

# Adjust bracket positions relative to data range (not absolute units)
data_range <- range(dw_raw$STW[dw_raw$DAP > 1], na.rm = TRUE)
y_range <- diff(data_range)

treatment_timeseries_test <- treatment_timeseries_test %>%
  mutate(y.position = y.position + 0.02 * y_range)  # 8% of data range

cat("\n=== Time-series t-test summary ===\n")

## 
## === Time-series t-test summary ===

print(treatment_timeseries_test %>% 
      select(Genotype, DAP, statistic, p, p.adj, p.adj.signif))

## # A tibble: 8 × 6
##   Genotype DAP   statistic            p       p.adj p.adj.signif
##   <fct>    <fct>     <dbl>        <dbl>       <dbl> <chr>       
## 1 CTRL     40         5.93 0.0000039    0.0000078   ****        
## 2 CTRL     50        10.2  0.0000000324 0.000000130 ****        
## 3 CTRL     60         7.18 0.000000961  0.00000256  ****        
## 4 CTRL     121        4.73 0.0000995    0.000159    ***         
## 5 Inv4m    40         4.01 0.000445     0.000593    ***         
## 6 Inv4m    50         9.49 0.0000000271 0.000000130 ****        
## 7 Inv4m    60         3.97 0.00141      0.00161     **          
## 8 Inv4m    121        3.25 0.00369      0.00369     **

Visualize stover weight trajectory

pd <- position_dodge(1)

p_dw <- dw_raw %>%
  ungroup() %>%
  filter(DAP > 1) %>%
  ggplot(aes(x = as.factor(DAP), y = STW, col = Treatment)) +
  ggtitle("Stover Dry Weight") +
  ylab("g") +
  xlab("Days After Planting") +
  geom_boxplot(
    width = 0.25, 
    linewidth = 1, 
    alpha = 0, 
    position = pd
  ) %>% 
    with_shadow(
      colour = "black",
      x_offset = 0,
      y_offset = 0,
      sigma = 1
    ) +
  geom_quasirandom(
    size = 2, 
    width = 0.25, 
    dodge.width = 1
  ) %>% 
    with_shadow(
      colour = "black",
      x_offset = 0,
      y_offset = 0,
      sigma = 1
    ) +
  facet_wrap(. ~ Genotype_label) +
  stat_pvalue_manual(
    treatment_timeseries_test,
    size = 10,
    bracket.size = 0.8,
    tip.length = 0.01,
    hide.ns = TRUE
  ) +
  scale_color_manual(values = pal) +
  scale_y_continuous(breaks = seq(0,150,25),
                    expand = expansion(mult = c(0.05, 0.1))) +
  pheno_theme3 +
  theme(
    legend.position = c(0.12, 0.7),
    strip.text = element_markdown()
  )

print(p_dw)

# Extract Inv4m genotype effects for all stover weight traits with CIs
# Extract both Inv4m genotype effects and -P treatment effects for stover weights
stover_effects <- effects %>%
  filter(
    predictor %in% c("GenotypeInv4m", "Treatment-P"),
    grepl("^STW", response)
  ) %>%
  # Calculate reference means
  left_join(
    effects %>%
      filter(grepl("Intercept|Treatment-P|GenotypeInv4m", predictor)) %>%
      select(response, predictor, Estimate) %>%
      pivot_wider(names_from = predictor, values_from = Estimate) %>%
      rename(
        intercept = `(Intercept)`,
        treatment_effect = `Treatment-P`,
        genotype_effect = GenotypeInv4m
      ) %>%
      mutate(
        # +P marginal mean for treatment effects
        plus_p_marginal_mean = intercept + (genotype_effect / 2),
        # CTRL marginal mean for genotype effects
        ctrl_marginal_mean = intercept + (treatment_effect / 2)
      ) %>%
      select(response, plus_p_marginal_mean, ctrl_marginal_mean),
    by = "response"
  ) %>%
  mutate(
    # Use appropriate denominator based on effect type
    reference_mean = if_else(
      predictor == "Treatment-P",
      plus_p_marginal_mean,
      ctrl_marginal_mean
    ),
    
    # Calculate 95% CI
    ci_lower = Estimate - 1.96 * Std.Error,
    ci_upper = Estimate + 1.96 * Std.Error,
    
    # Calculate percentage and its CI
    estimate_pct = (Estimate / reference_mean) * 100,
    pct_ci_lower = (ci_lower / reference_mean) * 100,
    pct_ci_upper = (ci_upper / reference_mean) * 100,
    
    # Extract time point
    DAP = case_when(
      response == "STW40" ~ "40",
      response == "STW50" ~ "50",
      response == "STW60" ~ "60",
      response == "STWHV" ~ "121",
      TRUE ~ NA_character_
    ),
    DAP = factor(DAP, levels = c("40", "50", "60", "121")),
    
    # Clean predictor names
    effect_type = case_when(
      predictor == "GenotypeInv4m" ~ "Inv4m",
      predictor == "Treatment-P" ~ "-P",
      TRUE ~ predictor
    ),
    effect_type = factor(effect_type, levels = c("Inv4m", "-P")),
    
    significant = p.adjust < 0.05
  ) %>%
  arrange(DAP, effect_type)

# Fit regression for -P data
minus_p_data <- stover_effects %>%
  filter(effect_type == "-P") %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP)))

lm_minus_p <- lm(estimate_pct ~ DAP_numeric, data = minus_p_data)
r_squared_minus_p <- summary(lm_minus_p)$r.squared
p_value_minus_p <- summary(lm_minus_p)$coefficients[2, 4]

# Fit regression for Inv4m data
inv4m_data <- stover_effects %>%
  filter(effect_type == "Inv4m") %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP)))

lm_inv4m <- lm(estimate_pct ~ DAP_numeric, data = inv4m_data)
r_squared_inv4m <- summary(lm_inv4m)$r.squared
p_value_inv4m <- summary(lm_inv4m)$coefficients[2, 4]

# Create annotation text for both
regression_label_minus_p <- sprintf(
  "italic(R)^2 == %.3f~~italic(p) == %.3f",
  r_squared_minus_p,
  p_value_minus_p
)

regression_label_inv4m <- sprintf(
  "italic(R)^2 == %.3f~~italic(p) == %.3f",
  r_squared_inv4m,
  p_value_inv4m
)

# Create plot comparing both effects
p_biomass_comparison <- stover_effects %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP))) %>%
  ggplot(aes(x = DAP_numeric, y = -estimate_pct, group = effect_type)) +
  # Add regression line for -P
  geom_smooth(
    data = minus_p_data,
    method = "lm",
    se = FALSE,
    linewidth = 1,
    linetype = "dashed",
    color = "black"
  ) +
  # Add regression line for Inv4m
  geom_smooth(
    data = inv4m_data,
    method = "lm",
    se = FALSE,
    linewidth = 1,
    linetype = "dotted",
    color = "black"
  ) +
  geom_pointrange(
    aes(ymin = -pct_ci_lower, ymax = -pct_ci_upper, 
        shape = effect_type),
    color = "black",
    size = 1,
    linewidth = 1,
    fatten = 5,
    position = position_dodge(width = 3)
  ) +
  # Add R² and p-value annotation for -P
  annotate(
    "text",
    x = 80,
    y = 45,
    label = regression_label_minus_p,
    parse = TRUE,
    size = 6,
    hjust = 0
  ) +
  # Add R² and p-value annotation for Inv4m
  annotate(
    "text",
    x = 80,
    y = 5,
    label = regression_label_inv4m,
    parse = TRUE,
    size = 6,
    hjust = 0
  ) +
  scale_x_continuous(
    breaks = c(40, 50, 60, 121),
    labels = c("40", "50", "60", "121")
  ) +
  scale_shape_manual(
    values = c(18, 16),
    labels = c("<i>Inv4m</i>", "-P")
  ) + 
  guides(shape = guide_legend(reverse = TRUE)) +
  labs(
    title = "Stover Biomass Reduction",
    x = "Days After Planting",
    y = "% reference mean",
    shape = "Predictor"
  ) +
  theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(color = "black", size = 18),
    legend.position = c(0.8, 0.8),
    legend.text = element_markdown(size = 20)
  )

print(p_biomass_comparison)

p_stw <- ggarrange(
  p_dw,
  p_biomass_comparison,
  nrow = 1,
  ncol = 2,
  labels = c("E", "F"),
  font.label = list(size = 30)
)

p_stw

6. Combined Panels

Main phenotype panel

# Create list of key phenotype plots
plot_list <- list()

# Add plots if they exist
if (exists("dta_plot")) plot_list$dta <- dta_plot
if (exists("dts_plot")) plot_list$dts <- dts_plot
if (exists("kw50_plot")) plot_list$kw50 <- kw50_plot
if (exists("cd_plot")) plot_list$cd <- cd_plot

# Create combined panel if we have plots
if (length(plot_list) >= 4) {
  # Select first 8 plots for main panel
  main_plots <- plot_list[1:min(8, length(plot_list))]
  
  combined_plot <- ggarrange(
    plotlist = main_plots,
    ncol = 4,
    nrow = 1,
    labels = LETTERS[1:length(main_plots)],
    font.label = list(size = 30)
  )
  
  print(combined_plot)
} else if (length(plot_list) > 0) {
  # If fewer than 4 plots, arrange in rows
  combined_plot <- ggarrange(
    plotlist = plot_list,
    ncol = 2,
    nrow = ceiling(length(plot_list) / 2),
    labels = LETTERS[1:length(plot_list)],
    font.label = list(size = 30)
  )
  
  print(combined_plot)
}

Combined figure with time series

# Create comprehensive figure including time series
if (exists("combined_plot") && exists("p_stw")) {
  full_figure <- ggarrange(
    combined_plot,
    p_stw,
    ncol = 1,
    nrow = 2,
    # heights = c(1, 1.2),
    labels = c("", "E"),
    font.label = list(size = 30)
  )
  
  print(full_figure)
}

Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.50       rstatix_0.7.2    ggbeeswarm_0.7.2 ggpubr_0.6.1    
##  [5] ggfx_1.0.2       ggtext_0.1.2     lubridate_1.9.4  forcats_1.0.0   
##  [9] stringr_1.5.1    dplyr_1.1.4      purrr_1.1.0      readr_2.1.5     
## [13] tidyr_1.3.1      tibble_3.3.0     ggplot2_3.5.2    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       beeswarm_0.4.0     xfun_0.53          bslib_0.9.0       
##  [5] lattice_0.22-7     tzdb_0.5.0         vctrs_0.6.5        tools_4.5.1       
##  [9] generics_0.1.4     pkgconfig_2.0.3    Matrix_1.7-3       RColorBrewer_1.1-3
## [13] lifecycle_1.0.4    compiler_4.5.1     farver_2.1.2       textshaping_1.0.1 
## [17] carData_3.0-5      vipor_0.4.7        litedown_0.7       htmltools_0.5.8.1 
## [21] sass_0.4.10        yaml_2.3.10        Formula_1.2-5      pillar_1.11.0     
## [25] car_3.1-3          jquerylib_0.1.4    cachem_1.1.0       magick_2.8.7      
## [29] abind_1.4-8        nlme_3.1-168       commonmark_2.0.0   tidyselect_1.2.1  
## [33] digest_0.6.37      stringi_1.8.7      splines_4.5.1      labeling_0.4.3    
## [37] cowplot_1.2.0      fastmap_1.2.0      grid_4.5.1         cli_3.6.5         
## [41] magrittr_2.0.3     utf8_1.2.6         broom_1.0.9        withr_3.0.2       
## [45] scales_1.4.0       backports_1.5.0    timechange_0.3.0   rmarkdown_2.29    
## [49] ggsignif_0.6.4     ragg_1.4.0         hms_1.1.3          evaluate_1.0.4    
## [53] mgcv_1.9-3         markdown_2.0       rlang_1.1.6        gridtext_0.1.5    
## [57] Rcpp_1.1.0         glue_1.8.0         xml2_1.4.0         rstudioapi_0.17.1 
## [61] jsonlite_2.0.0     R6_2.6.1           systemfonts_1.2.3