PSU 2022 Inv4m Phenotype Analysis: Marginal Effects Approach

Overview

This analysis uses a two-stage approach: 1. Test all model coefficients (Genotype, Treatment, Interaction) with single FDR correction 2. For traits without significant interactions: report marginal contrasts (averaged effects) For traits with significant interactions: report conditional contrasts (within-level effects)

1. Setup and Data Loading

Load libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggtext)
library(ggfx)
library(ggpubr)
library(ggbeeswarm)
library(rstatix)
library(emmeans)
library(knitr)

Define aesthetic constants

pal <- c("gold", "purple4")

pheno_theme <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.x = element_blank(),
    axis.text.x = element_text(
      face = "bold", 
      color = "black", 
      size = 20, 
      angle = 45, 
      hjust = 1
    ),
    strip.background = element_blank(),
    strip.text = element_text(size = 25),
    legend.position = "none"
  )

pheno_theme2 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(size = 25, face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 20),
    legend.position = "none"
  )

pheno_theme3 <- theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(color = "black", size = 20),
    strip.background = element_blank(),
    strip.text = element_text(size = 20),
    legend.position = "none"
  )

Load spatially corrected data

sp_corrected_file <- "PSU2022_spatially_corrected_both_treatments.csv"

sp_corrected_combined <- read.csv(sp_corrected_file) %>%
  mutate(
    Treatment = factor(Treatment, levels = c("+P", "-P")),
    genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
  )

cat("Spatially corrected data dimensions:", dim(sp_corrected_combined), "\n")

## Spatially corrected data dimensions: 127 39

cat("Treatment levels:", levels(sp_corrected_combined$Treatment), "\n")

## Treatment levels: +P -P

cat("Genotype levels:", levels(sp_corrected_combined$genotype), "\n")

## Genotype levels: CTRL Inv4m

Prepare plotting data

pheno <- sp_corrected_combined %>%
  filter(genotype %in% c("CTRL", "Inv4m")) %>%
  mutate(genotype = factor(genotype, levels = c("CTRL", "Inv4m"))) %>%
  droplevels() %>%
  rename(Genotype = genotype) %>%
  mutate(
    Genotype_label = case_when(
      Genotype == "CTRL" ~ "CTRL",
      Genotype == "Inv4m" ~ "<i>Inv4m</i>",
      TRUE ~ as.character(Genotype)
    ),
    Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))
  )

cat("Analysis data dimensions:", dim(pheno), "\n")

## Analysis data dimensions: 64 40

2. Statistical Analysis

Identify phenotype variables

base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")

available_phenos <- intersect(
  c(base_phenotypes, ear_phenotypes),
  colnames(pheno)
)

cat("Available phenotypes for analysis:\n")

## Available phenotypes for analysis:

cat(paste(available_phenos, collapse = ", "), "\n")

## PH, DTA, DTS, STW40, STW50, STW60, STWHV, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

vars <- available_phenos

Stage 1: Fit models and extract all effects with single FDR

# Fit models and extract ALL non-intercept coefficients
all_effects <- lapply(vars, function(x) {
  n_obs <- sum(!is.na(pheno[[x]]))
  
  if (n_obs < 20) {
    message("Skipping ", x, " - insufficient data")
    return(NULL)
  }
  
  model <- tryCatch(
    lm(as.formula(paste(x, "~ Genotype * Treatment")), data = pheno),
    error = function(e) {
      message("Model failed for ", x)
      return(NULL)
    }
  )
  
  if (is.null(model)) return(NULL)
  
  # Extract coefficients, excluding intercept
  coef_summary <- coef(summary(model))
  coef_summary <- coef_summary[!grepl("Intercept", rownames(coef_summary)), , drop = FALSE]
  
  if (nrow(coef_summary) == 0) return(NULL)
  
  out <- coef_summary %>%
    as.data.frame() %>%
    tibble::rownames_to_column("predictor")
  
  colnames(out)[3] <- "Std.Error"
  colnames(out)[5] <- "p.value"
  out$response <- x
  out %>% select(response, predictor, Estimate, Std.Error, p.value)
}) %>%
  bind_rows()

# Apply single FDR correction to all effects
all_effects <- all_effects %>%
  mutate(p.adjust = p.adjust(p.value, method = "fdr")) %>%
  arrange(p.adjust)

cat("\n=== All Model Effects (non-intercept) ===\n")

## 
## === All Model Effects (non-intercept) ===

cat("Total effects tested:", nrow(all_effects), "\n")

## Total effects tested: 48

print(all_effects %>% head(20))

##    response                 predictor    Estimate  Std.Error      p.value
## 1     STW50               Treatment-P -18.7588852 1.58289202 1.598759e-16
## 2       DTS               Treatment-P   3.0621067 0.32199366 1.383332e-13
## 3       DTA               Treatment-P   3.4328993 0.36759849 2.668769e-13
## 4     STW40               Treatment-P  -6.5170078 1.00758227 2.137127e-08
## 5     STW60               Treatment-P -33.3434615 4.97340523 1.741324e-08
## 6     STWHV               Treatment-P -22.4074887 4.70104209 1.299584e-05
## 7       DTA             GenotypeInv4m  -1.4777836 0.36759849 1.648877e-04
## 8       DTS             GenotypeInv4m  -1.2841867 0.32199366 1.833645e-04
## 9        PH             GenotypeInv4m   5.5697807 1.48063541 3.853788e-04
## 10     KW50               Treatment-P  -1.8013514 0.53628018 1.521219e-03
## 11    STW40             GenotypeInv4m  -2.9345129 1.00758227 5.056342e-03
## 12       CD GenotypeInv4m:Treatment-P  -2.6176674 0.97676410 1.000036e-02
## 13       HI             GenotypeInv4m   0.1718492 0.06560131 1.168579e-02
## 14    STW50             GenotypeInv4m  -4.1128444 1.63844890 1.515412e-02
## 15    STWHV             GenotypeInv4m -11.6548319 4.70104209 1.609593e-02
## 16       EL             GenotypeInv4m   0.8936462 0.38817860 2.561592e-02
## 17      TKW             GenotypeInv4m  11.8709854 5.37554617 3.193065e-02
## 18       EL               Treatment-P   0.8272244 0.38244294 3.544784e-02
## 19    STW50 GenotypeInv4m:Treatment-P   4.6198626 2.22224856 4.248080e-02
## 20    STW40 GenotypeInv4m:Treatment-P   2.8619497 1.41339838 4.741356e-02
##        p.adjust
## 1  7.674042e-15
## 2  3.319998e-12
## 3  4.270031e-12
## 4  2.051642e-07
## 5  2.051642e-07
## 6  1.039667e-04
## 7  1.100187e-03
## 8  1.100187e-03
## 9  2.055354e-03
## 10 7.301852e-03
## 11 2.206404e-02
## 12 4.000144e-02
## 13 4.314752e-02
## 14 5.150698e-02
## 15 5.150698e-02
## 16 7.684777e-02
## 17 9.015714e-02
## 18 9.452758e-02
## 19 1.073199e-01
## 20 1.089860e-01

Identify significant effects and traits with interactions

# Significant effects at FDR < 0.05
significant_effects <- all_effects %>%
  filter(p.adjust < 0.05)

cat("\n=== Significant Effects (FDR < 0.05) ===\n")

## 
## === Significant Effects (FDR < 0.05) ===

print(significant_effects)

##    response                 predictor    Estimate  Std.Error      p.value
## 1     STW50               Treatment-P -18.7588852 1.58289202 1.598759e-16
## 2       DTS               Treatment-P   3.0621067 0.32199366 1.383332e-13
## 3       DTA               Treatment-P   3.4328993 0.36759849 2.668769e-13
## 4     STW40               Treatment-P  -6.5170078 1.00758227 2.137127e-08
## 5     STW60               Treatment-P -33.3434615 4.97340523 1.741324e-08
## 6     STWHV               Treatment-P -22.4074887 4.70104209 1.299584e-05
## 7       DTA             GenotypeInv4m  -1.4777836 0.36759849 1.648877e-04
## 8       DTS             GenotypeInv4m  -1.2841867 0.32199366 1.833645e-04
## 9        PH             GenotypeInv4m   5.5697807 1.48063541 3.853788e-04
## 10     KW50               Treatment-P  -1.8013514 0.53628018 1.521219e-03
## 11    STW40             GenotypeInv4m  -2.9345129 1.00758227 5.056342e-03
## 12       CD GenotypeInv4m:Treatment-P  -2.6176674 0.97676410 1.000036e-02
## 13       HI             GenotypeInv4m   0.1718492 0.06560131 1.168579e-02
##        p.adjust
## 1  7.674042e-15
## 2  3.319998e-12
## 3  4.270031e-12
## 4  2.051642e-07
## 5  2.051642e-07
## 6  1.039667e-04
## 7  1.100187e-03
## 8  1.100187e-03
## 9  2.055354e-03
## 10 7.301852e-03
## 11 2.206404e-02
## 12 4.000144e-02
## 13 4.314752e-02

# Identify traits with significant interactions
traits_with_interaction <- significant_effects %>%
  filter(grepl(":", predictor)) %>%
  pull(response) %>%
  unique()

cat("\n=== Traits with Significant Interactions ===\n")

## 
## === Traits with Significant Interactions ===

if (length(traits_with_interaction) > 0) {
  cat(paste(traits_with_interaction, collapse = ", "), "\n")
} else {
  cat("None\n")
}

## CD

# Get all traits with any significant effect
traits_with_effects <- significant_effects %>%
  pull(response) %>%
  unique()

cat("\n=== All Traits with Significant Effects ===\n")

## 
## === All Traits with Significant Effects ===

cat(paste(traits_with_effects, collapse = ", "), "\n")

## STW50, DTS, DTA, STW40, STW60, STWHV, PH, KW50, CD, HI

cat("Total:", length(traits_with_effects), "traits\n")

## Total: 10 traits

Stage 2: Extract marginal or conditional contrasts for reporting

# Extract contrasts for significant traits only
contrasts_for_reporting <- lapply(traits_with_effects, function(x) {
  model <- lm(as.formula(paste(x, "~ Genotype + Treatment")), data = pheno)
  
  has_interaction <- x %in% traits_with_interaction
  
  if (has_interaction) {
    # Conditional contrasts for interaction traits
    treatment_cond <- summary(pairs(emmeans(model, ~ Treatment | Genotype)))
    genotype_cond <- summary(pairs(emmeans(model, ~ Genotype | Treatment)))
    
    bind_rows(
      treatment_cond %>%
        mutate(
          response = x,
          predictor = paste0("Treatment|", Genotype),
          contrast_type = "conditional"
        ) %>%
        select(response, predictor, contrast_type, estimate, SE, p.value),
      genotype_cond %>%
        mutate(
          response = x,
          predictor = paste0("Genotype|", Treatment),
          contrast_type = "conditional"
        ) %>%
        mutate(estimate = -estimate) %>%  # invert sign for -P vs +P I couldn't invert the contrast levels
        select(response, predictor, contrast_type, estimate, SE, p.value)
    )
  } else {
    # Marginal contrasts for non-interaction traits
    treatment_marg <- summary(pairs(emmeans(model, ~ Treatment)))
    genotype_marg <- summary(pairs(emmeans(model, ~ Genotype)))
    
    bind_rows(
      data.frame(
        response = x,
        predictor = "Treatment",
        contrast_type = "marginal",
        estimate = -treatment_marg$estimate, # invert sign for -P vs +P I couldn't invert the contrast levels
        SE = treatment_marg$SE,
        p.value = treatment_marg$p.value
      ),
      data.frame(
        response = x,
        predictor = "Genotype",
        contrast_type = "marginal",
        estimate = -genotype_marg$estimate, # invert sign for -P vs +P I couldn't invert the contrast levels
        SE = genotype_marg$SE,
        p.value = genotype_marg$p.value
      )
    )
  }
}) %>%
  bind_rows()

cat("\n=== Contrasts for Reporting ===\n")

## 
## === Contrasts for Reporting ===

print(contrasts_for_reporting)

##    response       predictor contrast_type     estimate         SE      p.value
## 1     STW50       Treatment      marginal -16.41494589 1.14466632 4.662928e-20
## 2     STW50        Genotype      marginal  -1.60148084 1.14042941 1.659616e-01
## 3       DTS       Treatment      marginal   3.41646064 0.23032280 6.674410e-22
## 4       DTS        Genotype      marginal  -0.92983272 0.23032280 1.534890e-04
## 5       DTA       Treatment      marginal   3.60161149 0.25869545 1.339050e-20
## 6       DTA        Genotype      marginal  -1.30907138 0.25869545 4.113753e-06
## 7     STW40       Treatment      marginal  -5.06257438 0.72462888 2.638380e-09
## 8     STW40        Genotype      marginal  -1.48007949 0.72462888 4.549988e-02
## 9     STW60       Treatment      marginal -27.79553850 3.51749524 2.067418e-10
## 10    STW60        Genotype      marginal   2.87358466 3.51024017 4.168069e-01
## 11    STWHV       Treatment      marginal -18.56909001 3.27431036 4.511543e-07
## 12    STWHV        Genotype      marginal  -7.81643322 3.27431036 2.020047e-02
## 13       PH       Treatment      marginal   3.42625576 1.04388584 1.706870e-03
## 14       PH        Genotype      marginal   6.40830361 1.04388584 6.819607e-08
## 15     KW50       Treatment      marginal  -1.86372143 0.36979252 6.475186e-06
## 16     KW50        Genotype      marginal   0.13864020 0.36926537 7.089149e-01
## 17       CD  Treatment|CTRL   conditional   1.53892221 0.51745913 4.515382e-03
## 18       CD Treatment|Inv4m   conditional   1.53892221 0.51745913 4.515382e-03
## 19       CD     Genotype|+P   conditional  -1.23073512 0.51672148 2.107391e-02
## 20       CD     Genotype|-P   conditional  -1.23073512 0.51672148 2.107391e-02
## 21       HI       Treatment      marginal   0.07999539 0.04505377 8.189278e-02
## 22       HI        Genotype      marginal   0.12166268 0.04498954 9.332652e-03

Calculate percentage effects

# Calculate reference means for each trait
reference_means_list <- lapply(traits_with_effects, function(x) {
  group_means <- pheno %>%
    filter(!is.na(.data[[x]])) %>%
    group_by(Genotype, Treatment) %>%
    summarise(mean_val = mean(.data[[x]], na.rm = TRUE), .groups = "drop")
  
  plus_p_marginal <- group_means %>%
    filter(Treatment == "+P") %>%
    summarise(mean = mean(mean_val)) %>%
    pull(mean)
  
  ctrl_marginal <- group_means %>%
    filter(Genotype == "CTRL") %>%
    summarise(mean = mean(mean_val)) %>%
    pull(mean)
  
  ctrl_minus_p <- group_means %>%
    filter(Genotype == "CTRL", Treatment == "-P") %>%
    pull(mean_val)
  
  if (length(ctrl_minus_p) == 0) ctrl_minus_p <- NA_real_
  
  data.frame(
    response = x,
    plus_p_marginal_mean = plus_p_marginal,
    ctrl_marginal_mean = ctrl_marginal,
    ctrl_minus_p_mean = ctrl_minus_p
  )
}) %>%
  bind_rows()

# Add percentages to contrasts
contrasts_with_pct <- contrasts_for_reporting %>%
  left_join(reference_means_list, by = "response") %>%
  mutate(
    reference_mean = case_when(
      predictor == "Treatment" ~ plus_p_marginal_mean,
      predictor == "Genotype" ~ ctrl_marginal_mean,
      grepl("Treatment\\|CTRL", predictor) ~ plus_p_marginal_mean,
      grepl("Treatment\\|Inv4m", predictor) ~ plus_p_marginal_mean,
      grepl("Genotype\\|\\+P", predictor) ~ ctrl_marginal_mean,
      grepl("Genotype\\|-P", predictor) ~ ctrl_minus_p_mean,
      TRUE ~ NA_real_
    ),
    estimate_pct = (estimate / reference_mean) * 100
  ) %>% select(-starts_with("ctrl_"), -starts_with("plus_"))

cat("\n=== Effects with Percentages ===\n")

## 
## === Effects with Percentages ===

print(contrasts_with_pct %>%
  select(response, predictor, contrast_type, estimate, estimate_pct, SE, p.value))

##    response       predictor contrast_type     estimate estimate_pct         SE
## 1     STW50       Treatment      marginal -16.41494589   -48.381951 1.14466632
## 2     STW50        Genotype      marginal  -1.60148084    -6.019516 1.14042941
## 3       DTS       Treatment      marginal   3.41646064     4.679297 0.23032280
## 4       DTS        Genotype      marginal  -0.92983272    -1.236720 0.23032280
## 5       DTA       Treatment      marginal   3.60161149     5.045611 0.25869545
## 6       DTA        Genotype      marginal  -1.30907138    -1.772935 0.25869545
## 7     STW40       Treatment      marginal  -5.06257438   -47.731076 0.72462888
## 8     STW40        Genotype      marginal  -1.48007949   -16.790072 0.72462888
## 9     STW60       Treatment      marginal -27.79553850   -36.790821 3.51749524
## 10    STW60        Genotype      marginal   2.87358466     4.763014 3.51024017
## 11    STWHV       Treatment      marginal -18.56909001   -18.355897 3.27431036
## 12    STWHV        Genotype      marginal  -7.81643322    -8.160384 3.27431036
## 13       PH       Treatment      marginal   3.42625576     2.083557 1.04388584
## 14       PH        Genotype      marginal   6.40830361     3.932642 1.04388584
## 15     KW50       Treatment      marginal  -1.86372143   -17.689031 0.36979252
## 16     KW50        Genotype      marginal   0.13864020     1.454181 0.36926537
## 17       CD  Treatment|CTRL   conditional   1.53892221     5.864469 0.51745913
## 18       CD Treatment|Inv4m   conditional   1.53892221     5.864469 0.51745913
## 19       CD     Genotype|+P   conditional  -1.23073512    -4.721431 0.51672148
## 20       CD     Genotype|-P   conditional  -1.23073512    -4.738758 0.51672148
## 21       HI       Treatment      marginal   0.07999539    10.695997 0.04505377
## 22       HI        Genotype      marginal   0.12166268    16.750318 0.04498954
##         p.value
## 1  4.662928e-20
## 2  1.659616e-01
## 3  6.674410e-22
## 4  1.534890e-04
## 5  1.339050e-20
## 6  4.113753e-06
## 7  2.638380e-09
## 8  4.549988e-02
## 9  2.067418e-10
## 10 4.168069e-01
## 11 4.511543e-07
## 12 2.020047e-02
## 13 1.706870e-03
## 14 6.819607e-08
## 15 6.475186e-06
## 16 7.089149e-01
## 17 4.515382e-03
## 18 4.515382e-03
## 19 2.107391e-02
## 20 2.107391e-02
## 21 8.189278e-02
## 22 9.332652e-03

Identify traits for visualization

# Traits with treatment effects (marginal or conditional)
treatment_traits <- contrasts_for_reporting %>%
  filter(grepl("Treatment", predictor)) %>%
  pull(response) %>%
  unique()

cat("\n=== Traits for Visualization ===\n")

## 
## === Traits for Visualization ===

cat(paste(treatment_traits, collapse = ", "), "\n")

## STW50, DTS, DTA, STW40, STW60, STWHV, PH, KW50, CD, HI

cat("Total:", length(treatment_traits), "traits\n")

## Total: 10 traits

3. Pairwise Comparisons for Visualization

to_t_test <- pheno %>%
  select(plotId, Genotype, Treatment, all_of(vars)) %>%
  pivot_longer(
    cols = all_of(vars),
    names_to = "trait",
    values_to = "value"
  ) %>%
  filter(trait %in% treatment_traits)

if (length(treatment_traits) > 0) {
  treatment_pairwise <- to_t_test %>%
    group_by(Genotype, trait) %>%
    t_test(value ~ Treatment, ref.group = "+P") %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance() %>%
    add_x_position(x = "Treatment") %>%
    add_y_position(scales = "free") %>%
    mutate(
      Genotype_label = factor(
        case_when(
          Genotype == "CTRL" ~ "CTRL",
          Genotype == "Inv4m" ~ "<i>Inv4m</i>",
          TRUE ~ as.character(Genotype)
        ),
        levels = c("CTRL", "<i>Inv4m</i>")
      )
    )
  
  cat("\n=== Pairwise t-test summary ===\n")
  print(treatment_pairwise %>% select(trait, Genotype, statistic, p.adj,p.adj.signif), n=20)
} else {
  treatment_pairwise <- NULL
  cat("No traits with treatment effects for visualization\n")
}

## 
## === Pairwise t-test summary ===
## # A tibble: 20 × 5
##    trait Genotype statistic    p.adj p.adj.signif
##    <chr> <fct>        <dbl>    <dbl> <chr>       
##  1 CD    CTRL         0.187 8.55e- 1 ns          
##  2 DTA   CTRL        -9.54  7   e-10 ****        
##  3 DTS   CTRL       -10.4   2.79e-10 ****        
##  4 HI    CTRL        -1.70  1.24e- 1 ns          
##  5 KW50  CTRL         2.55  2.58e- 2 *           
##  6 PH    CTRL        -2.06  5.66e- 2 ns          
##  7 STW40 CTRL         5.93  8.67e- 6 ****        
##  8 STW50 CTRL        10.2   1.08e- 7 ****        
##  9 STW60 CTRL         7.18  2.40e- 6 ****        
## 10 STWHV CTRL         4.73  1.81e- 4 ***         
## 11 CD    Inv4m        6.92  9.86e- 7 ****        
## 12 DTA   Inv4m      -10.0   2.79e-10 ****        
## 13 DTS   Inv4m      -10.9   1.72e-10 ****        
## 14 HI    Inv4m       -0.585 5.95e- 1 ns          
## 15 KW50  Inv4m        5.56  1.75e- 5 ****        
## 16 PH    Inv4m       -2.54  2.19e- 2 *           
## 17 STW40 Inv4m        4.01  7.42e- 4 ***         
## 18 STW50 Inv4m        9.49  1.08e- 7 ****        
## 19 STW60 Inv4m        3.97  2.17e- 3 **          
## 20 STWHV Inv4m        3.25  5.27e- 3 **

4. Plotting Function

plot_treatment_effect <- function(trait_name, 
                                   trait_title, 
                                   trait_ylab,
                                   y_limits = NULL,
                                   y_breaks = NULL,
                                   bracket_nudge = 0.05) {
  
  t_test_data <- NULL
  if (!is.null(treatment_pairwise)) {
    t_test_data <- treatment_pairwise %>%
      filter(trait == trait_name)
    
    if (!is.null(t_test_data) && nrow(t_test_data) > 0) {
      if (!is.null(y_limits)) {
        y_range <- diff(y_limits)
        y_max <- y_limits[2]
        t_test_data <- t_test_data %>%
          mutate(y.position = y_max - (bracket_nudge * y_range))
      } else {
        data_range <- pheno %>%
          filter(!is.na(.data[[trait_name]])) %>%
          pull(.data[[trait_name]]) %>%
          range()
        y_range <- diff(data_range)
        y_max <- max(data_range)
        t_test_data <- t_test_data %>%
          mutate(y.position = y_max + (bracket_nudge * y_range))
      }
    }
  }
  
  p <- pheno %>%
    ggplot(aes(x = Treatment, y = .data[[trait_name]], col = Treatment)) +
    ggtitle(trait_title) +
    ylab(trait_ylab) +
    geom_boxplot(width = 0.25, linewidth = 1, alpha = 0) %>% 
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    geom_quasirandom(size = 2) %>% 
      with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
    scale_color_manual(values = pal) +
    facet_wrap(. ~ Genotype_label) +
    pheno_theme2 +
    theme(strip.text = element_markdown())
  
  if (!is.null(t_test_data) && nrow(t_test_data) > 0) {
    p <- p + stat_pvalue_manual(
      t_test_data,
      size = 10,
      label = "p.adj.signif"  ,
      bracket.size = 0.8,
      hide.ns = FALSE
    )
  }
  
  if (!is.null(y_limits)) {
    p <- p + coord_cartesian(ylim = y_limits)
  }
  
  if (!is.null(y_breaks)) {
    p <- p + scale_y_continuous(breaks = y_breaks)
  }
  
  return(p)
}

5. Individual Trait Plots

Days to Anthesis

if ("DTA" %in% treatment_traits) {
  dta_plot <- plot_treatment_effect(
    trait_name = "DTA",
    trait_title = "Anthesis",
    trait_ylab = "Days",
    y_limits = c(68, 80),
    y_breaks = 68:80,
    bracket_nudge = 0.08
  )
  print(dta_plot)
}

Days to Silking

if ("DTS" %in% treatment_traits) {
  dts_plot <- plot_treatment_effect(
    trait_name = "DTS",
    trait_title = "Silking",
    trait_ylab = "Days",
    y_limits = c(68, 80),
    y_breaks = 68:80,
    bracket_nudge = 0
  )
  print(dts_plot)
}

Plant Height

if ("PH" %in% treatment_traits) {
  ph_plot <- plot_treatment_effect(
    trait_name = "PH",
    trait_title = "Plant Height",
    trait_ylab = "cm",
    y_limits = c(140, 180),
    y_breaks = seq(140, 180, by = 10),
    bracket_nudge = 0.08
  )
  print(ph_plot)
}

50 Kernel Weight

if ("KW50" %in% treatment_traits) {
  kw50_plot <- plot_treatment_effect(
    trait_name = "KW50",
    trait_title = "50 Kernel Weight",
    trait_ylab = "g",
    y_limits = c(5, 17),
    y_breaks = 5:17,
    bracket_nudge = 0.08
  )
  print(kw50_plot)
}

Cob Diameter

if ("CD" %in% treatment_traits) {
  cd_plot <- plot_treatment_effect(
    trait_name = "CD",
    trait_title = "Cob Diameter",
    trait_ylab = "cm",
    y_limits = c(21, 31),
    y_breaks = 21:31,
    bracket_nudge = 0.08
  )
  print(cd_plot)
}

Stover Weight at 40 DAP

if ("STW40" %in% treatment_traits) {
  stw40_plot <- plot_treatment_effect(
    trait_name = "STW40",
    trait_title = "Stover Weight <br> at 40 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw40_plot)
}

Stover Weight at 50 DAP

if ("STW50" %in% treatment_traits) {
  stw50_plot <- plot_treatment_effect(
    trait_name = "STW50",
    trait_title = "Stover Weight <br> at 50 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw50_plot)
}

Stover Weight at 60 DAP

if ("STW60" %in% treatment_traits) {
  stw60_plot <- plot_treatment_effect(
    trait_name = "STW60",
    trait_title = "Stover Weight <br> at 60 DAP",
    trait_ylab = "g",
    bracket_nudge = 0.08
  )
  print(stw60_plot)
}

Stover Weight at Harvest

if ("STWHV" %in% treatment_traits) {
  stwhv_plot <- plot_treatment_effect(
    trait_name = "STWHV",
    trait_title = "Stover Dry Weight <br> at Harvest",
    trait_ylab = "g",
    y_limits = c(50, 175),
    bracket_nudge = 0.08
  )
  print(stwhv_plot)
}

6. Stover Dry Weight Time Course

Prepare time-series data

dw_raw <- pheno %>%
  mutate(STW121 = STWHV) %>%
  select(
    plotId, 
    Genotype, 
    Genotype_label,
    Treatment, 
    starts_with("STW")
  ) %>%
  pivot_longer(
    cols = c("STW40", "STW50", "STW60", "STW121"),
    names_to = "DAP_string",
    values_to = "STW"
  ) %>%
  mutate(
    DAP = as.integer(gsub("STW", "", DAP_string)),
    Genotype = factor(Genotype, levels = c("CTRL", "Inv4m")),
    Genotype_label = factor(Genotype_label, levels = c("CTRL", "<i>Inv4m</i>"))
  ) %>%
  filter(!is.na(STW)) %>%
  droplevels()

Time-point specific t-tests

treatment_timeseries_test <- dw_raw %>%
  ungroup() %>%
  filter(DAP > 1) %>%
  mutate(DAP = as.factor(DAP)) %>%
  group_by(Genotype, DAP) %>%
  t_test(STW ~ Treatment, ref.group = "+P") %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance() %>%
  add_y_position(scales = "free_y") %>%
  add_x_position(x = "DAP", group = NULL, dodge = 1) %>%
  mutate(
    Genotype_label = factor(
      case_when(
        Genotype == "CTRL" ~ "CTRL",
        Genotype == "Inv4m" ~ "<i>Inv4m</i>",
        TRUE ~ as.character(Genotype)
      ),
      levels = c("CTRL", "<i>Inv4m</i>")
    )
  )

data_range <- range(dw_raw$STW[dw_raw$DAP > 1], na.rm = TRUE)
y_range <- diff(data_range)

treatment_timeseries_test <- treatment_timeseries_test %>%
  mutate(y.position = y.position + (0.08 * y_range))

cat("\n=== Time-series t-test summary ===\n")

## 
## === Time-series t-test summary ===

print(treatment_timeseries_test %>% 
      select(Genotype, DAP, statistic, p, p.adj, p.adj.signif))

## # A tibble: 8 × 6
##   Genotype DAP   statistic            p       p.adj p.adj.signif
##   <fct>    <fct>     <dbl>        <dbl>       <dbl> <chr>       
## 1 CTRL     40         5.93 0.0000039    0.0000078   ****        
## 2 CTRL     50        10.2  0.0000000324 0.000000130 ****        
## 3 CTRL     60         7.18 0.000000961  0.00000256  ****        
## 4 CTRL     121        4.73 0.0000995    0.000159    ***         
## 5 Inv4m    40         4.01 0.000445     0.000593    ***         
## 6 Inv4m    50         9.49 0.0000000271 0.000000130 ****        
## 7 Inv4m    60         3.97 0.00141      0.00161     **          
## 8 Inv4m    121        3.25 0.00369      0.00369     **

Visualize stover weight trajectory

pd <- position_dodge(1)

p_dw <- dw_raw %>%
  ungroup() %>%
  filter(DAP > 1) %>%
  ggplot(aes(x = as.factor(DAP), y = STW, col = Treatment)) +
  ggtitle("Stover Dry Weight") +
  ylab("g") +
  xlab("Days After Planting") +
  geom_boxplot(
    width = 0.25, 
    linewidth = 1, 
    alpha = 0, 
    position = pd
  ) %>% 
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
  geom_quasirandom(
    size = 2, 
    width = 0.25, 
    dodge.width = 1
  ) %>% 
    with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
  facet_wrap(. ~ Genotype_label) +
  stat_pvalue_manual(
    treatment_timeseries_test,
    size = 10,
    bracket.size = 0.8,
    tip.length = 0.01,
    hide.ns = TRUE
  ) +
  scale_color_manual(values = pal) +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1))) +
  pheno_theme3 +
  theme(
    legend.position = c(0.12, 0.8),
    strip.text = element_markdown()
  )

print(p_dw)

7. Biomass Trajectory Analysis

Extract stover weight effects

# Get marginal contrasts for stover weights
stover_contrasts <- contrasts_for_reporting %>%
  filter(
    grepl("^STW", response),
    predictor %in% c("Genotype", "Treatment")
  )

# Join with reference means and calculate percentages
stover_effects <- stover_contrasts %>%
  left_join(reference_means_list, by = "response") %>%
  mutate(
    reference_mean = if_else(
      predictor == "Treatment",
      plus_p_marginal_mean,
      ctrl_marginal_mean
    ),
    ci_lower = estimate - 1.96 * SE,
    ci_upper = estimate + 1.96 * SE,
    estimate_pct = (estimate / reference_mean) * 100,
    pct_ci_lower = (ci_lower / reference_mean) * 100,
    pct_ci_upper = (ci_upper / reference_mean) * 100,
    DAP = case_when(
      response == "STW40" ~ "40",
      response == "STW50" ~ "50",
      response == "STW60" ~ "60",
      response == "STWHV" ~ "121",
      TRUE ~ NA_character_
    ),
    DAP = factor(DAP, levels = c("40", "50", "60", "121")),
    effect_type = factor(predictor, levels = c("Genotype", "Treatment"))
  ) %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP))) %>%
  arrange(DAP, effect_type)

cat("\n=== Stover Effects Summary ===\n")

## 
## === Stover Effects Summary ===

print(stover_effects %>%
  select(response, DAP, predictor, estimate_pct, p.value))

##   response DAP predictor estimate_pct      p.value
## 1    STW40  40  Genotype   -16.790072 4.549988e-02
## 2    STW40  40 Treatment   -47.731076 2.638380e-09
## 3    STW50  50  Genotype    -6.019516 1.659616e-01
## 4    STW50  50 Treatment   -48.381951 4.662928e-20
## 5    STW60  60  Genotype     4.763014 4.168069e-01
## 6    STW60  60 Treatment   -36.790821 2.067418e-10
## 7    STWHV 121  Genotype    -8.160384 2.020047e-02
## 8    STWHV 121 Treatment   -18.355897 4.511543e-07

Fit regressions

lm(estimate_pct ~ predictor*DAP_numeric , data = stover_effects %>%
       filter(predictor %in% c("Genotype","Treatment")) 
) %>% summary()

## 
## Call:
## lm(formula = estimate_pct ~ predictor * DAP_numeric, data = stover_effects %>% 
##     filter(predictor %in% c("Genotype", "Treatment")))
## 
## Residuals:
##       1       2       3       4       5       6       7       8 
## -9.5320  0.4777  0.9840 -3.9187 11.5120  3.9269 -2.9640 -0.4858 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     -8.27610    9.62994  -0.859   0.4386  
## predictorTreatment             -54.91468   13.61879  -4.032   0.0157 *
## DAP_numeric                      0.02545    0.12886   0.198   0.8531  
## predictorTreatment:DAP_numeric   0.34910    0.18223   1.916   0.1279  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.13 on 4 degrees of freedom
## Multiple R-squared:  0.9049, Adjusted R-squared:  0.8336 
## F-statistic: 12.69 on 3 and 4 DF,  p-value: 0.01641

minus_p_data <- stover_effects %>%
  filter(predictor == "Treatment") 

lm_minus_p <- lm(estimate_pct ~ DAP_numeric, data = minus_p_data)
r_squared_minus_p <- summary(lm_minus_p)$r.squared
p_value_minus_p <- summary(lm_minus_p)$coefficients[2, 4]

inv4m_data <- stover_effects %>%
  filter(predictor == "Genotype") %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP)))

lm_inv4m <- lm(estimate_pct ~ DAP_numeric, data = inv4m_data)
r_squared_inv4m <- summary(lm_inv4m)$r.squared
p_value_inv4m <- summary(lm_inv4m)$coefficients[2, 4]

regression_label_minus_p <- sprintf(
  "italic(R)^2 == %.3f~~italic(p) == %.3f",
  r_squared_minus_p,
  p_value_minus_p
)

regression_label_inv4m <- sprintf(
  "italic(R)^2 == %.3f~~italic(p) == %.3f",
  r_squared_inv4m,
  p_value_inv4m
)

cat("\n=== Regression Statistics ===\n")

## 
## === Regression Statistics ===

cat("Treatment effect:\n")

## Treatment effect:

cat("  R² =", round(r_squared_minus_p, 4), "\n")

##   R² = 0.947

cat("  p-value =", format.pval(p_value_minus_p, digits = 3), "\n")

##   p-value = 0.0268

cat("  Slope =", round(coef(lm_minus_p)[2], 4), "% per day\n\n")

##   Slope = 0.3746 % per day

cat("Genotype effect:\n")

## Genotype effect:

cat("  R² =", round(r_squared_inv4m, 4), "\n")

##   R² = 0.0109

cat("  p-value =", format.pval(p_value_inv4m, digits = 3), "\n")

##   p-value = 0.895

cat("  Slope =", round(coef(lm_inv4m)[2], 4), "% per day\n")

##   Slope = 0.0255 % per day

Plot biomass trajectory

p_biomass_comparison <- stover_effects %>%
  mutate(DAP_numeric = as.numeric(as.character(DAP))) %>%
  ggplot(aes(x = DAP_numeric, y = -estimate_pct, group = effect_type)) +
  geom_hline(yintercept = 0, linetype = "solid", color = "gray50", linewidth = 0.8) +
  geom_smooth(
    data = minus_p_data,
    method = "lm",
    se = FALSE,
    linewidth = 1,
    linetype = "dashed",
    color = "black"
  ) +
  geom_smooth(
    data = inv4m_data,
    method = "lm",
    se = FALSE,
    linewidth = 1,
    linetype = "dotted",
    color = "black"
  ) +
  geom_pointrange(
    aes(ymin = -pct_ci_lower, ymax = -pct_ci_upper, shape = effect_type),
    color = "black",
    size = 1,
    linewidth = 1,
    fatten = 5,
    position = position_dodge(width = 3)
  ) +
  annotate(
    "text",
    x = 80,
    y = 40,
    label = regression_label_minus_p,
    parse = TRUE,
    size = 6,
    hjust = 0
  ) +
  annotate(
    "text",
    x = 80,
    y = 10,
    label = regression_label_inv4m,
    parse = TRUE,
    size = 6,
    hjust = 0
  ) +
  scale_x_continuous(
    breaks = c(40, 50, 60, 121),
    labels = c("40", "50", "60", "121")
  ) +
  scale_shape_manual(
    values = c(16, 18),
    labels = c("<i>Inv4m</i>", "-P")
  ) +
  guides(shape = guide_legend(reverse = TRUE)) +
  labs(
    title = "Stover Biomass Reduction ",
    x = "Days After Planting",
    y = "% reference mean",
    shape = "Predictor"
  ) +
  theme_classic2(base_size = 20) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.text.x = element_text(color = "black", size = 18),
    legend.position = c(0.8, 0.9),
    legend.text = element_markdown(size = 20)
  )

#the odd value at DAP60 is real
summary(pairs(emmeans(lm(data=pheno,  STW60 ~ Genotype * Treatment), ~ Genotype), reverse = TRUE))

##  contrast     estimate   SE df t.ratio p.value
##  Inv4m - CTRL      2.5 3.47 50   0.720  0.4750
## 
## Results are averaged over the levels of: Treatment

print(p_biomass_comparison)

8. Combined Panels

Main phenotype panel

plot_list <- list()

if (exists("dta_plot")) plot_list$dta <- dta_plot
if (exists("dts_plot")) plot_list$dts <- dts_plot
# if (exists("ph_plot")) plot_list$ph <- ph_plot
if (exists("kw50_plot")) plot_list$kw50 <- kw50_plot
if (exists("cd_plot")) plot_list$cd <- cd_plot

if (length(plot_list) >= 4) {
  main_plots <- plot_list[1:min(8, length(plot_list))]
  
  combined_plot <- ggarrange(
    plotlist = main_plots,
    ncol = 4,
    nrow = 1,
    labels = LETTERS[1:length(main_plots)],
    font.label = list(size = 30)
  )
  
  print(combined_plot)
} else if (length(plot_list) > 0) {
  combined_plot <- ggarrange(
    plotlist = plot_list,
    ncol = 2,
    nrow = ceiling(length(plot_list) / 2),
    labels = LETTERS[1:length(plot_list)],
    font.label = list(size = 30)
  )
  
  print(combined_plot)
}

Combined stover weight panel

p_stw <- ggarrange(
  p_dw,
  p_biomass_comparison,
  nrow = 1,
  ncol = 2,
  labels = c("E", "F"),
  font.label = list(size = 30)
)

print(p_stw)

### Combined figure with time series

# Create comprehensive figure including time series
if (exists("combined_plot") && exists("p_stw")) {
  full_figure <- ggarrange(
    combined_plot,
    p_stw,
    ncol = 1,
    nrow = 2,
    # heights = c(1, 1.2),
    #labels = c("", "E"),
    font.label = list(size = 30)
  )
  
  print(full_figure)
}

Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.50       emmeans_1.11.2   rstatix_0.7.2    ggbeeswarm_0.7.2
##  [5] ggpubr_0.6.1     ggfx_1.0.2       ggtext_0.1.2     lubridate_1.9.4 
##  [9] forcats_1.0.0    stringr_1.5.1    dplyr_1.1.4      purrr_1.1.0     
## [13] readr_2.1.5      tidyr_1.3.1      tibble_3.3.0     ggplot2_3.5.2   
## [17] tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       beeswarm_0.4.0     xfun_0.53          bslib_0.9.0       
##  [5] lattice_0.22-7     tzdb_0.5.0         vctrs_0.6.5        tools_4.5.1       
##  [9] generics_0.1.4     pkgconfig_2.0.3    Matrix_1.7-3       RColorBrewer_1.1-3
## [13] lifecycle_1.0.4    compiler_4.5.1     farver_2.1.2       textshaping_1.0.1 
## [17] carData_3.0-5      litedown_0.7       vipor_0.4.7        htmltools_0.5.8.1 
## [21] sass_0.4.10        yaml_2.3.10        Formula_1.2-5      pillar_1.11.0     
## [25] car_3.1-3          jquerylib_0.1.4    cachem_1.1.0       magick_2.8.7      
## [29] abind_1.4-8        nlme_3.1-168       commonmark_2.0.0   tidyselect_1.2.1  
## [33] digest_0.6.37      mvtnorm_1.3-3      stringi_1.8.7      splines_4.5.1     
## [37] labeling_0.4.3     cowplot_1.2.0      fastmap_1.2.0      grid_4.5.1        
## [41] cli_3.6.5          magrittr_2.0.3     utf8_1.2.6         broom_1.0.9       
## [45] withr_3.0.2        scales_1.4.0       backports_1.5.0    timechange_0.3.0  
## [49] estimability_1.5.1 rmarkdown_2.29     ggsignif_0.6.4     ragg_1.4.0        
## [53] hms_1.1.3          evaluate_1.0.4     mgcv_1.9-3         markdown_2.0      
## [57] rlang_1.1.6        gridtext_0.1.5     Rcpp_1.1.0         xtable_1.8-4      
## [61] glue_1.8.0         xml2_1.4.0         rstudioapi_0.17.1  jsonlite_2.0.0    
## [65] R6_2.6.1           systemfonts_1.2.3