Overview

This analysis examines Genotype × Environment (GxE) interactions for the inv4m introgression across three environments:

PSU2022: Pennsylvania environment (2022 season)
PSU2025: Pennsylvania environment (2025 season)
CLY2025: North Carolina environment (southern, 2025 season)

Research Questions: 1. Does inv4m show consistent effects across environments? 2. Which traits (PH, DTA, DTS, GDDA, GDDS) exhibit significant GxE interactions? 3. How do environmental conditions modify inv4m effects?

1. Load Libraries and Data

library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggtext)
library(ggbeeswarm)
library(emmeans)
library(effectsize)
library(broom)
library(patchwork)
library(viridis)

psu2022 <- read_csv("PSU2022_spatially_corrected_phenotypes.csv")
psu2022$donor <- "MI21"

psu2025 <- read_csv("PSU2025_spatially_corrected_phenotypes.csv")
cly2025 <- read_csv("CLY2025_spatially_corrected_phenotypes.csv")

cat("PSU2022 dimensions:", dim(psu2022), "\n")

## PSU2022 dimensions: 127 27

cat("PSU2025 dimensions:", dim(psu2025), "\n")

## PSU2025 dimensions: 64 13

cat("CLY2025 dimensions:", dim(cly2025), "\n")

## CLY2025 dimensions: 64 19

cat("PSU2022 genotypes:", paste(unique(psu2022$genotype), collapse = ", "), "\n")

## PSU2022 genotypes: Inv4m, CTRL, B73, NUE

cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")

## PSU2025 genotypes: Inv4m, CTRL

cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")

## CLY2025 genotypes: INV4M, CTRL

cat("PSU2022 donors:", paste(unique(psu2022$donor), collapse = ", "), "\n")

## PSU2022 donors: MI21

cat("PSU2025 donors:", paste(unique(psu2025$donor), collapse = ", "), "\n")

## PSU2025 donors: TMEX, MI21

cat("CLY2025 donors:", paste(unique(cly2025$donor), collapse = ", "), "\n")

## CLY2025 donors: TMEX, MI21

psu2022 <- psu2022 %>%
  mutate(
    genotype = case_when(
      genotype == "Inv4m" ~ "INV4M",
      genotype == "CTRL" ~ "CTRL",
      TRUE ~ genotype
    )
  )

psu2025 <- psu2025 %>%
  mutate(
    genotype = case_when(
      genotype == "Inv4m" ~ "INV4M",
      genotype == "CTRL" ~ "CTRL",
      TRUE ~ genotype
    )
  )

cat("\nAfter standardization:\n")

## 
## After standardization:

cat("PSU2022 genotypes:", paste(unique(psu2022$genotype), collapse = ", "), "\n")

## PSU2022 genotypes: INV4M, CTRL, B73, NUE

cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")

## PSU2025 genotypes: INV4M, CTRL

cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")

## CLY2025 genotypes: INV4M, CTRL

shared_phenotypes <- c("PH", "DTA", "DTS", "GDDA", "GDDS")
common_donors <- "MI21"

2. Data Integration for GxE Analysis

gxe_data_list <- list()

for (donor in "MI21") {
  cat("\n=== Preparing GxE data for", donor, "===\n")
  
  psu2022_donor <- psu2022 %>%
    filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(environment = "PSU2022", donor = donor) %>%
    filter(complete.cases(.))
  
  psu2025_donor <- psu2025 %>%
    filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(environment = "PSU2025", donor = donor) %>%
    filter(complete.cases(.))
  
  cly2025_donor <- cly2025 %>%
    filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(environment = "CLY2025", donor = donor) %>%
    filter(complete.cases(.))
  
  donor_gxe <- bind_rows(psu2022_donor, psu2025_donor, cly2025_donor) %>%
    mutate(
      genotype = factor(genotype, levels = c("CTRL", "INV4M")),
      environment = factor(environment, levels = c("PSU2022", "PSU2025", "CLY2025"))
    )
  
  gxe_data_list[[donor]] <- donor_gxe
  
  cat("Sample sizes by environment and genotype:\n")
  print(table(donor_gxe$environment, donor_gxe$genotype))
}

## 
## === Preparing GxE data for MI21 ===
## Sample sizes by environment and genotype:
##          
##           CTRL INV4M
##   PSU2022   32    32
##   PSU2025   16    16
##   CLY2025   16    16

gxe_all_data <- bind_rows(gxe_data_list, .id = "donor")

cat("\nFinal combined dataset dimensions:", dim(gxe_all_data), "\n")

## 
## Final combined dataset dimensions: 128 9

cat("Traits available:", paste(shared_phenotypes, collapse = ", "), "\n")

## Traits available: PH, DTA, DTS, GDDA, GDDS

3. Phase 1: Within-Environment Analysis

3.1 PSU2022 Analysis

psu2022_results_list <- list()

for (current_donor in "MI21") {
  cat("\n=== PSU2022", current_donor, "Analysis ===\n")
  
  psu2022_donor <- gxe_all_data %>%
    filter(environment == "PSU2022") %>%
    filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
    filter(complete.cases(.))
  
  if (nrow(psu2022_donor) < 6) {
    cat("Insufficient data for", current_donor, "(n =", nrow(psu2022_donor), ")\n")
    next
  }
  
  cat("Sample size:", nrow(psu2022_donor), "plots\n")
  print(table(psu2022_donor$genotype))
  
  psu2022_results <- psu2022_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    t_test(value ~ genotype) %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance("p.adj")
  
  psu2022_cohens_d <- psu2022_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", 
                      ci = TRUE, var.equal = FALSE)
  
  psu2022_summary <- psu2022_results %>%
    left_join(psu2022_cohens_d %>% select(trait, effsize, magnitude), 
              by = "trait") %>%
    mutate(donor = current_donor, environment = "PSU2022")
  
  psu2022_results_list[[current_donor]] <- psu2022_summary
  
  cat("Results for", current_donor, ":\n")
  print(psu2022_summary)
}

## 
## === PSU2022 MI21 Analysis ===
## Sample size: 64 plots
## 
##  CTRL INV4M 
##    32    32 
## Results for MI21 :
## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     32    32      4.89  60.4 0.00000793  0.0000198 
## 2 DTS   value CTRL   INV4M     32    32      4.73  58.9 0.0000143   0.0000205 
## 3 GDDA  value CTRL   INV4M     32    32      4.69  60.1 0.0000164   0.0000205 
## 4 GDDS  value CTRL   INV4M     32    32      4.60  55.7 0.0000247   0.0000247 
## 5 PH    value CTRL   INV4M     32    32     -5.82  61.3 0.000000232 0.00000116
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

psu2022_all_results <- bind_rows(psu2022_results_list)

3.2 PSU2025 Analysis

psu2025_results_list <- list()

for (current_donor in "MI21") {
  cat("\n=== PSU2025", current_donor, "Analysis ===\n")
  
  psu2025_donor <- gxe_all_data %>%
    filter(environment == "PSU2025") %>%
    filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
    filter(complete.cases(.))
  
  if (nrow(psu2025_donor) < 6) {
    cat("Insufficient data for", current_donor, "(n =", nrow(psu2025_donor), ")\n")
    next
  }
  
  cat("Sample size:", nrow(psu2025_donor), "plots\n")
  print(table(psu2025_donor$genotype))
  
  psu_results <- psu2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    t_test(value ~ genotype) %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance("p.adj")
  
  psu_cohens_d <- psu2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", 
                      ci = TRUE, var.equal = FALSE) 
  
  psu_summary <- psu_results %>%
    left_join(psu_cohens_d %>% select(trait, effsize, magnitude), 
              by = "trait") %>%
    mutate(donor = current_donor, environment = "PSU2025")
  
  psu2025_results_list[[current_donor]] <- psu_summary
  
  cat("Results for", current_donor, ":\n")
  print(psu_summary)
}

## 
## === PSU2025 MI21 Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for MI21 :
## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     16    16    -1.92   24.7 0.0663      0.110     
## 2 DTS   value CTRL   INV4M     16    16    -0.584  25.8 0.564       0.609     
## 3 GDDA  value CTRL   INV4M     16    16    -1.96   24.8 0.0619      0.110     
## 4 GDDS  value CTRL   INV4M     16    16    -0.517  26.1 0.609       0.609     
## 5 PH    value CTRL   INV4M     16    16    -6.30   27.9 0.000000847 0.00000424
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

psu2025_all_results <- bind_rows(psu2025_results_list)

3.3 CLY2025 Analysis

cly2025_results_list <- list()

for (current_donor in "MI21") {
  cat("\n=== CLY2025", current_donor, "Analysis ===\n")
  
  cly2025_donor <- gxe_all_data %>%
    filter(environment == "CLY2025") %>%
    filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
    filter(complete.cases(.))
  
  if (nrow(cly2025_donor) < 6) {
    cat("Insufficient data for", current_donor, "(n =", nrow(cly2025_donor), ")\n")
    next
  }
  
  cat("Sample size:", nrow(cly2025_donor), "plots\n")
  print(table(cly2025_donor$genotype))
  
  cly_results <- cly2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    t_test(value ~ genotype) %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance("p.adj")
  
  cly_cohens_d <- cly2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", 
                      ci = TRUE, var.equal = FALSE)
  
  cly_summary <- cly_results %>%
    left_join(cly_cohens_d %>% select(trait, effsize, magnitude), 
              by = "trait") %>%
    mutate(donor = current_donor, environment = "CLY2025")
  
  cly2025_results_list[[current_donor]] <- cly_summary
  
  cat("Results for", current_donor, ":\n")
  print(cly_summary)
}

## 
## === CLY2025 MI21 Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for MI21 :
## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df       p   p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 DTA   value CTRL   INV4M     16    16      2.19  29.7 0.0362  0.0384 
## 2 DTS   value CTRL   INV4M     16    16      3.16  30.0 0.00357 0.00655
## 3 GDDA  value CTRL   INV4M     16    16      2.17  29.6 0.0384  0.0384 
## 4 GDDS  value CTRL   INV4M     16    16      3.13  30.0 0.00393 0.00655
## 5 PH    value CTRL   INV4M     16    16      3.62  29.7 0.00109 0.00545
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cly2025_all_results <- bind_rows(cly2025_results_list)

4. Phase 2: GxE Statistical Analysis

4.1 GxE ANOVA by Donor

gxe_results_by_donor <- list()

for (donor in common_donors) {
  cat("\n=== GxE Analysis for", donor, "===\n")
  
  donor_data <- gxe_all_data
  gxe_anova_results <- list()
  gxe_effect_sizes <- list()
  gxe_emmeans_results <- list()
  
  for (trait in shared_phenotypes) {
    cat("\n--- Analyzing", trait, "for", donor, "---\n")
    
    trait_data <- donor_data %>%
      select(plotId, genotype, environment, value = all_of(trait)) %>%
      filter(!is.na(value))
    
    if (nrow(trait_data) < 8) {
      cat("Insufficient data for", trait, "in", donor, "\n")
      next
    }
    
    anova_model <- aov(value ~ genotype * environment, data = trait_data)
    anova_summary <- summary(anova_model)
    
    effect_sizes <- eta_squared(anova_model, partial = TRUE)
    
    gxe_anova_results[[trait]] <- anova_summary
    gxe_effect_sizes[[trait]] <- effect_sizes
    
    cat("ANOVA Results for", trait, "in", donor, ":\n")
    print(anova_summary)
    
    if (length(unique(trait_data$environment)) > 1 && 
        length(unique(trait_data$genotype)) > 1) {
      emmeans_result <- emmeans(anova_model, ~ genotype | environment)
      emmeans_contrast <- pairs(emmeans_result)
      
      gxe_emmeans_results[[trait]] <- list(
        means = emmeans_result,
        contrasts = emmeans_contrast
      )
      
      cat("Estimated Marginal Means:\n")
      print(emmeans_result)
    }
  }
  
  gxe_results_by_donor[[donor]] <- list(
    anova = gxe_anova_results,
    effect_sizes = gxe_effect_sizes,
    emmeans = gxe_emmeans_results
  )
}

## 
## === GxE Analysis for MI21 ===
## 
## --- Analyzing PH for MI21 ---
## ANOVA Results for PH in MI21 :
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype               1    509     509   31.08 1.51e-07 ***
## environment            2 105049   52524 3209.80  < 2e-16 ***
## genotype:environment   2    814     407   24.88 8.65e-10 ***
## Residuals            122   1996      16                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = PSU2022:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL        163 0.715 122      161      164
##  INV4M       169 0.715 122      168      171
## 
## environment = PSU2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL        232 1.010 122      230      234
##  INV4M       240 1.010 122      238      242
## 
## environment = CLY2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL        187 1.010 122      185      189
##  INV4M       183 1.010 122      181      185
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTA for MI21 ---
## ANOVA Results for DTA in MI21 :
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype               1   12.3    12.3   14.67 0.000204 ***
## environment            2  671.5   335.7  400.71  < 2e-16 ***
## genotype:environment   2   19.1     9.6   11.40 2.89e-05 ***
## Residuals            122  102.2     0.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = PSU2022:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      73.85 0.162 122    73.53    74.17
##  INV4M     72.55 0.162 122    72.23    72.87
## 
## environment = PSU2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      77.76 0.229 122    77.30    78.21
##  INV4M     78.33 0.229 122    77.88    78.79
## 
## environment = CLY2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      77.71 0.229 122    77.26    78.16
##  INV4M     77.26 0.229 122    76.80    77.71
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTS for MI21 ---
## ANOVA Results for DTS in MI21 :
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype               1   13.7   13.72  17.986 4.36e-05 ***
## environment            2  385.7  192.86 252.812  < 2e-16 ***
## genotype:environment   2    8.6    4.28   5.616  0.00464 ** 
## Residuals            122   93.1    0.76                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = PSU2022:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      75.21 0.154 122    74.90    75.52
##  INV4M     74.16 0.154 122    73.85    74.46
## 
## environment = PSU2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      78.20 0.218 122    77.76    78.63
##  INV4M     78.41 0.218 122    77.98    78.84
## 
## environment = CLY2025:
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      78.36 0.218 122    77.93    78.79
##  INV4M     77.63 0.218 122    77.20    78.07
## 
## Confidence level used: 0.95 
## 
## --- Analyzing GDDA for MI21 ---
## ANOVA Results for GDDA in MI21 :
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype               1   3013  3013.3  15.918 0.000113 ***
## environment            2    255   127.4   0.673 0.512038    
## genotype:environment   2   3917  1958.3  10.345 7.08e-05 ***
## Residuals            122  23095   189.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = PSU2022:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL        886 2.43 122      881      891
##  INV4M       867 2.43 122      862      872
## 
## environment = PSU2025:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL        870 3.44 122      863      877
##  INV4M       877 3.44 122      871      884
## 
## environment = CLY2025:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL        877 3.44 122      870      884
##  INV4M       870 3.44 122      863      877
## 
## Confidence level used: 0.95 
## 
## --- Analyzing GDDS for MI21 ---
## ANOVA Results for GDDS in MI21 :
##                       Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype               1   3010    3010  19.833 1.88e-05 ***
## environment            2  11474    5737  37.796 1.68e-13 ***
## genotype:environment   2   1616     808   5.324  0.00607 ** 
## Residuals            122  18518     152                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = PSU2022:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL      905.4 2.18 122    901.1    909.7
##  INV4M     890.5 2.18 122    886.2    894.8
## 
## environment = PSU2025:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL      875.6 3.08 122    869.5    881.7
##  INV4M     878.0 3.08 122    871.9    884.1
## 
## environment = CLY2025:
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL      887.6 3.08 122    881.5    893.7
##  INV4M     876.3 3.08 122    870.2    882.4
## 
## Confidence level used: 0.95

4.2 GxE Interaction Summary

gxe_interaction_summary_list <- list()

get_anova_df <- function(anobj) {
  if (inherits(anobj, "aov")) {
    s <- summary(anobj)
    if (!is.null(s[[1]])) return(as.data.frame(s[[1]]))
  }
  if (inherits(anobj, "summary.aov") || is.list(anobj)) {
    first <- anobj[[1]]
    if (is.data.frame(first) || inherits(first, "matrix")) 
      return(as.data.frame(first))
  }
  safe <- tryCatch(as.data.frame(anobj), error = function(e) NULL)
  return(safe)
}

find_row_index <- function(df, term_patterns) {
  if (is.null(df)) return(NA_integer_)
  rn <- rownames(df)
  for (pat in term_patterns) {
    idx <- which(rn == pat)
    if (length(idx) == 1) return(idx[1])
  }
  for (pat in term_patterns) {
    words <- unlist(strsplit(gsub("[^[:alnum:]]", " ", pat), "\\s+"))
    words <- words[nzchar(words)]
    if (length(words) == 0) next
    logicals <- sapply(rn, function(x) 
      all(sapply(words, function(w) grepl(w, x, ignore.case = TRUE))))
    if (any(logicals)) return(which(logicals)[1])
  }
  idx_colon <- which(grepl(":", rn))
  if (length(idx_colon) > 0) {
    for (pat in term_patterns) {
      words <- unlist(strsplit(gsub("[^[:alnum:]]", " ", pat), "\\s+"))
      words <- words[nzchar(words)]
      cand <- idx_colon[sapply(idx_colon, function(i) 
        all(sapply(words, function(w) grepl(w, rn[i], ignore.case = TRUE))))]
      if (length(cand) > 0) return(cand[1])
    }
    return(idx_colon[1])
  }
  return(NA_integer_)
}

for (donor in common_donors) {
  if (!donor %in% names(gxe_results_by_donor)) next
  donor_results <- gxe_results_by_donor[[donor]]

  interaction_summary <- tibble(
    donor = donor,
    trait = shared_phenotypes,
    interaction_p = NA_real_,
    interaction_eta2 = NA_real_,
    genotype_p = NA_real_,
    environment_p = NA_real_
  )

  for (i in seq_along(shared_phenotypes)) {
    trait <- shared_phenotypes[i]
    if (!trait %in% names(donor_results$anova)) next

    anobj <- donor_results$anova[[trait]]
    an_df <- get_anova_df(anobj)
    if (is.null(an_df)) next

    pcol_idx <- grep("^Pr\\(>F\\)$", colnames(an_df))
    if (length(pcol_idx) == 0) pcol_idx <- ncol(an_df)

    idx_int <- find_row_index(an_df, c("genotype:environment", 
                                       "genotype: environment", 
                                       "environment:genotype", 
                                       "genotype x environment"))
    idx_gen <- find_row_index(an_df, c("genotype"))
    idx_env <- find_row_index(an_df, c("environment"))

    if (!is.na(idx_int) && idx_int <= nrow(an_df)) {
      interaction_summary$interaction_p[i] <- as.numeric(an_df[idx_int, pcol_idx])
    }
    if (!is.na(idx_gen) && idx_gen <= nrow(an_df)) {
      interaction_summary$genotype_p[i] <- as.numeric(an_df[idx_gen, pcol_idx])
    }
    if (!is.na(idx_env) && idx_env <= nrow(an_df)) {
      interaction_summary$environment_p[i] <- as.numeric(an_df[idx_env, pcol_idx])
    }

if (trait %in% names(donor_results$effect_sizes)) {
  es_df <- donor_results$effect_sizes[[trait]]
  
  # Check for either Eta2 or Eta2_partial column
  eta_col <- if ("Eta2_partial" %in% colnames(es_df)) {
    "Eta2_partial"
  } else if ("Eta2" %in% colnames(es_df)) {
    "Eta2"
  } else {
    NULL
  }
  
  if (!is.null(eta_col) && "Parameter" %in% colnames(es_df)) {
    int_row <- which(es_df$Parameter == "genotype:environment")
    if (length(int_row) == 0) {
      int_row <- grep("genotype.*environment", es_df$Parameter, 
                     ignore.case = TRUE)
    }
    if (length(int_row) >= 1) {
      interaction_summary$interaction_eta2[i] <- 
        as.numeric(es_df[[eta_col]][int_row[1]])
    }
  }
}

  }

  interaction_summary <- interaction_summary %>%
    mutate(
      interaction_sig = case_when(
        !is.na(interaction_p) & interaction_p < 0.001 ~ "***",
        !is.na(interaction_p) & interaction_p < 0.01  ~ "**",
        !is.na(interaction_p) & interaction_p < 0.05  ~ "*",
        !is.na(interaction_p)                         ~ "ns",
        TRUE                                          ~ NA_character_
      ),
      genotype_sig = case_when(
        !is.na(genotype_p) & genotype_p < 0.001 ~ "***",
        !is.na(genotype_p) & genotype_p < 0.01  ~ "**",
        !is.na(genotype_p) & genotype_p < 0.05  ~ "*",
        !is.na(genotype_p)                      ~ "ns",
        TRUE                                    ~ NA_character_
      ),
      environment_sig = case_when(
        !is.na(environment_p) & environment_p < 0.001 ~ "***",
        !is.na(environment_p) & environment_p < 0.01  ~ "**",
        !is.na(environment_p) & environment_p < 0.05  ~ "*",
        !is.na(environment_p)                         ~ "ns",
        TRUE                                          ~ NA_character_
      )
    )

  gxe_interaction_summary_list[[donor]] <- interaction_summary

  cat("\nGxE Interaction Summary for", donor, ":\n")
  print(interaction_summary)
}

## 
## GxE Interaction Summary for MI21 :
## # A tibble: 5 × 9
##   donor trait interaction_p interaction_eta2  genotype_p environment_p
##   <chr> <chr>         <dbl>            <dbl>       <dbl>         <dbl>
## 1 MI21  PH         8.65e-10           0.290  0.000000151     3.24e-106
## 2 MI21  DTA        2.89e- 5           0.157  0.000204        2.39e- 54
## 3 MI21  DTS        4.64e- 3           0.0843 0.0000436       4.06e- 44
## 4 MI21  GDDA       7.08e- 5           0.145  0.000113        5.12e-  1
## 5 MI21  GDDS       6.07e- 3           0.0803 0.0000188       1.68e- 13
## # ℹ 3 more variables: interaction_sig <chr>, genotype_sig <chr>,
## #   environment_sig <chr>

gxe_all_interactions <- bind_rows(gxe_interaction_summary_list)

4.3 Overall Genotype Main Effects (Marginal Means)

overall_genotype_effects <- list()

for (donor in common_donors) {
  cat("\n=== Overall Genotype Main Effects for", donor, "===\n")
  
  donor_data <- gxe_all_data
  
  for (trait in shared_phenotypes) {
    cat("\n--- Analyzing", trait, "---\n")
    
    trait_data <- donor_data %>%
      select(plotId, genotype, environment, value = all_of(trait)) %>%
      filter(!is.na(value))
    
    if (nrow(trait_data) < 8) next
    
    anova_model <- aov(value ~ genotype * environment, data = trait_data)
    
    # Marginal means across all environments
    genotype_marginal <- emmeans(anova_model, ~ genotype)
    genotype_contrast <- pairs(genotype_marginal, reverse = TRUE)
    
    overall_genotype_effects[[trait]] <- list(
      marginal_means = genotype_marginal,
      contrast = genotype_contrast
    )
    
    cat("\nMarginal Means (averaged across environments):\n")
    print(genotype_marginal)
    cat("\nPairwise Contrast:\n")
    print(genotype_contrast)
  }
}

## 
## === Overall Genotype Main Effects for MI21 ===
## 
## --- Analyzing PH ---

## 
## Marginal Means (averaged across environments):
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL        194 0.533 122      193      195
##  INV4M       197 0.533 122      196      198
## 
## Results are averaged over the levels of: environment 
## Confidence level used: 0.95 
## 
## Pairwise Contrast:
##  contrast     estimate    SE  df t.ratio p.value
##  INV4M - CTRL     3.15 0.754 122   4.185  0.0001
## 
## Results are averaged over the levels of: environment 
## 
## --- Analyzing DTA ---

## 
## Marginal Means (averaged across environments):
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      76.44 0.121 122    76.20    76.68
##  INV4M     76.05 0.121 122    75.81    76.28
## 
## Results are averaged over the levels of: environment 
## Confidence level used: 0.95 
## 
## Pairwise Contrast:
##  contrast     estimate    SE  df t.ratio p.value
##  INV4M - CTRL   -0.393 0.171 122  -2.302  0.0231
## 
## Results are averaged over the levels of: environment 
## 
## --- Analyzing DTS ---

## 
## Marginal Means (averaged across environments):
##  genotype emmean    SE  df lower.CL upper.CL
##  CTRL      77.26 0.115 122    77.03    77.48
##  INV4M     76.73 0.115 122    76.51    76.96
## 
## Results are averaged over the levels of: environment 
## Confidence level used: 0.95 
## 
## Pairwise Contrast:
##  contrast     estimate    SE  df t.ratio p.value
##  INV4M - CTRL   -0.522 0.163 122  -3.209  0.0017
## 
## Results are averaged over the levels of: environment 
## 
## --- Analyzing GDDA ---

## 
## Marginal Means (averaged across environments):
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL      877.8 1.81 122    874.2    881.4
##  INV4M     871.4 1.81 122    867.8    875.0
## 
## Results are averaged over the levels of: environment 
## Confidence level used: 0.95 
## 
## Pairwise Contrast:
##  contrast     estimate   SE  df t.ratio p.value
##  INV4M - CTRL    -6.42 2.56 122  -2.506  0.0135
## 
## Results are averaged over the levels of: environment 
## 
## --- Analyzing GDDS ---

## 
## Marginal Means (averaged across environments):
##  genotype emmean   SE  df lower.CL upper.CL
##  CTRL      889.5 1.62 122    886.3    892.8
##  INV4M     881.6 1.62 122    878.4    884.8
## 
## Results are averaged over the levels of: environment 
## Confidence level used: 0.95 
## 
## Pairwise Contrast:
##  contrast     estimate  SE  df t.ratio p.value
##  INV4M - CTRL    -7.97 2.3 122  -3.470  0.0007
## 
## Results are averaged over the levels of: environment

# Extract into a summary table
overall_summary <- tibble(
  trait = shared_phenotypes,
  CTRL = NA_real_,
  INV4M = NA_real_,
  effect = NA_real_,
  SE = NA_real_,
  df = NA_real_,
  t_ratio = NA_real_,
  p_value = NA_real_
)

for (i in seq_along(shared_phenotypes)) {
  trait <- shared_phenotypes[i]
  if (!trait %in% names(overall_genotype_effects)) next
  
  means_df <- as.data.frame(overall_genotype_effects[[trait]]$marginal_means)
  contrast_df <- as.data.frame(overall_genotype_effects[[trait]]$contrast)
  
  overall_summary$CTRL[i] <- means_df$emmean[means_df$genotype == "CTRL"]
  overall_summary$INV4M[i] <- means_df$emmean[means_df$genotype == "INV4M"]
  overall_summary$effect[i] <- contrast_df$estimate[1]
  overall_summary$SE[i] <- contrast_df$SE[1]
  overall_summary$df[i] <- contrast_df$df[1]
  overall_summary$t_ratio[i] <- contrast_df$t.ratio[1]
  overall_summary$p_value[i] <- contrast_df$p.value[1]
}

overall_summary <- overall_summary %>%
  mutate(
    CI_lower = effect - qt(0.975, df) * SE,
    CI_upper = effect + qt(0.975, df) * SE,
    significance = case_when(
      p_value < 0.001 ~ "***",
      p_value < 0.01 ~ "**",
      p_value < 0.05 ~ "*",
      TRUE ~ "ns"
    )
  )

cat("\n=== OVERALL GENOTYPE MAIN EFFECTS SUMMARY (emmeans) ===\n")

## 
## === OVERALL GENOTYPE MAIN EFFECTS SUMMARY (emmeans) ===

print(overall_summary)

## # A tibble: 5 × 11
##   trait  CTRL INV4M effect    SE    df t_ratio   p_value CI_lower CI_upper
##   <chr> <dbl> <dbl>  <dbl> <dbl> <dbl>   <dbl>     <dbl>    <dbl>    <dbl>
## 1 PH    194.  197.   3.15  0.754   122    4.18 0.0000541    1.66    4.65  
## 2 DTA    76.4  76.0 -0.393 0.171   122   -2.30 0.0231      -0.730  -0.0549
## 3 DTS    77.3  76.7 -0.522 0.163   122   -3.21 0.00170     -0.844  -0.200 
## 4 GDDA  878.  871.  -6.42  2.56    122   -2.51 0.0135     -11.5    -1.35  
## 5 GDDS  890.  882.  -7.97  2.30    122   -3.47 0.000721   -12.5    -3.42  
## # ℹ 1 more variable: significance <chr>

write_csv(overall_summary, "overall_genotype_main_effects.csv")

5. Phase 3: Comprehensive Visualization

5.1 Interaction Plots by Donor

pheno_theme <- theme_classic2(base_size = 16) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 14),
    legend.position = "none"
  )

for (donor in common_donors) {
  cat("\n=== Creating interaction plots for", donor, "===\n")
  
  interaction_stats <- gxe_all_data %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait, genotype, environment) %>%
    summarise(
      mean_value = mean(value, na.rm = TRUE),
      se_value = sd(value, na.rm = TRUE) / sqrt(n()),
      n = n(),
      .groups = "drop"
    ) %>%
    mutate(
      trait = factor(trait, levels = shared_phenotypes),
      environment = factor(environment, levels = c("PSU2022", "PSU2025", "CLY2025"))
    )
  
  interaction_plot <- ggplot(interaction_stats, 
                            aes(x = environment, y = mean_value, 
                                color = genotype, group = genotype)) +
    geom_line(size = 1.2, alpha = 0.8) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = mean_value - se_value, 
                     ymax = mean_value + se_value),
                  width = 0.1, size = 1) +
    scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
    facet_wrap(~ factor(trait, 
                        levels= c("DTA", "DTS","PH", "GDDA", "GDDS",  "BL", "BW", "EBA")),
               scales = "free_y", ncol = 3) +
    labs(
      title = paste("**Genotype × Environment Interaction Effects:**", 
                   donor, "**donor**"),
      subtitle = "Genotype means ± SE across environments",
      x = "Environment",
      y = "Trait Value",
      color = "Genotype"
    ) +
    pheno_theme +
    theme(legend.position = "top")
  
  print(interaction_plot)
}

## 
## === Creating interaction plots for MI21 ===

5.2 Effect Size Comparisons

all_effect_sizes <- bind_rows(
  psu2022_all_results %>% select(trait, donor, environment, magnitude, effsize),
  psu2025_all_results %>% select(trait, donor, environment, magnitude, effsize),
  cly2025_all_results %>% select(trait, donor, environment, magnitude, effsize)
) %>%
  filter(donor %in% common_donors) %>%
  mutate(
    trait = factor(trait, levels = shared_phenotypes),
    environment = factor(environment, levels = c("PSU2022", "PSU2025", "CLY2025"))
  )

forest_plot <- ggplot(all_effect_sizes, 
                     aes(x = effsize, y = interaction(trait, donor), 
                         color = environment)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  geom_point(size = 3, position = position_dodge(width = 0.4)) +
  geom_text(aes(label = paste0("(", magnitude, ")")),
            position = position_dodge(width = 0.4),
            hjust = -0.2, size = 3) +
  scale_color_manual(values = c("PSU2022" = "forestgreen", 
                                "PSU2025" = "royalblue", 
                                "CLY2025" = "tomato")) +
  labs(
    title = "**Effect Sizes (Cohen's d) of inv4m Across Environments**",
    subtitle = "Positive values indicate INV4M > CTRL",
    x = "Cohen's d (Effect Size)",
    y = "Trait × Donor",
    color = "Environment"
  ) +
  pheno_theme +
  theme(
    legend.position = "top",
    axis.text.y = element_text(size = 10)
  )

print(forest_plot)

6. Summary and Export Results

6.1 Summary Tables

cat("=== FINAL SUMMARY ===\n\n")

## === FINAL SUMMARY ===

cat("1. PSU2022 Results:\n")

## 1. PSU2022 Results:

if (nrow(psu2022_all_results) > 0) {
  print(psu2022_all_results)
} else {
  cat("No PSU2022 results available\n")
}

## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     32    32      4.89  60.4 0.00000793  0.0000198 
## 2 DTS   value CTRL   INV4M     32    32      4.73  58.9 0.0000143   0.0000205 
## 3 GDDA  value CTRL   INV4M     32    32      4.69  60.1 0.0000164   0.0000205 
## 4 GDDS  value CTRL   INV4M     32    32      4.60  55.7 0.0000247   0.0000247 
## 5 PH    value CTRL   INV4M     32    32     -5.82  61.3 0.000000232 0.00000116
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cat("\n2. PSU2025 Results:\n")

## 
## 2. PSU2025 Results:

if (nrow(psu2025_all_results) > 0) {
  print(psu2025_all_results)
} else {
  cat("No PSU2025 results available\n")
}

## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     16    16    -1.92   24.7 0.0663      0.110     
## 2 DTS   value CTRL   INV4M     16    16    -0.584  25.8 0.564       0.609     
## 3 GDDA  value CTRL   INV4M     16    16    -1.96   24.8 0.0619      0.110     
## 4 GDDS  value CTRL   INV4M     16    16    -0.517  26.1 0.609       0.609     
## 5 PH    value CTRL   INV4M     16    16    -6.30   27.9 0.000000847 0.00000424
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cat("\n3. CLY2025 Results:\n")

## 
## 3. CLY2025 Results:

if (nrow(cly2025_all_results) > 0) {
  print(cly2025_all_results)
} else {
  cat("No CLY2025 results available\n")
}

## # A tibble: 5 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df       p   p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 DTA   value CTRL   INV4M     16    16      2.19  29.7 0.0362  0.0384 
## 2 DTS   value CTRL   INV4M     16    16      3.16  30.0 0.00357 0.00655
## 3 GDDA  value CTRL   INV4M     16    16      2.17  29.6 0.0384  0.0384 
## 4 GDDS  value CTRL   INV4M     16    16      3.13  30.0 0.00393 0.00655
## 5 PH    value CTRL   INV4M     16    16      3.62  29.7 0.00109 0.00545
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cat("\n4. GxE Interaction Effects:\n")

## 
## 4. GxE Interaction Effects:

if (nrow(gxe_all_interactions) > 0) {
  print(gxe_all_interactions)
} else {
  cat("No GxE interaction results available\n")
}

## # A tibble: 5 × 9
##   donor trait interaction_p interaction_eta2  genotype_p environment_p
##   <chr> <chr>         <dbl>            <dbl>       <dbl>         <dbl>
## 1 MI21  PH         8.65e-10           0.290  0.000000151     3.24e-106
## 2 MI21  DTA        2.89e- 5           0.157  0.000204        2.39e- 54
## 3 MI21  DTS        4.64e- 3           0.0843 0.0000436       4.06e- 44
## 4 MI21  GDDA       7.08e- 5           0.145  0.000113        5.12e-  1
## 5 MI21  GDDS       6.07e- 3           0.0803 0.0000188       1.68e- 13
## # ℹ 3 more variables: interaction_sig <chr>, genotype_sig <chr>,
## #   environment_sig <chr>

6.2 Export Results

if (nrow(psu2022_all_results) > 0) {
  write_csv(psu2022_all_results, "PSU2022_inv4m_results.csv")
}

if (nrow(psu2025_all_results) > 0) {
  write_csv(psu2025_all_results, "PSU2025_inv4m_results.csv")
}

if (nrow(cly2025_all_results) > 0) {
  write_csv(cly2025_all_results, "CLY2025_inv4m_results.csv")
}

if (nrow(gxe_all_interactions) > 0) {
  write_csv(gxe_all_interactions, "three_env_GxE_interaction_results.csv")
}

if (nrow(all_effect_sizes) > 0) {
  write_csv(all_effect_sizes, "three_env_effect_sizes_by_environment.csv")
}

if (nrow(gxe_all_data) > 0) {
  write_csv(gxe_all_data, "three_env_combined_gxe_dataset.csv")
}

cat("Results exported to CSV files\n")

## Results exported to CSV files

7. Key Findings

cat("=== KEY FINDINGS ===\n\n")

## === KEY FINDINGS ===

if (nrow(gxe_all_interactions) > 0) {
  sig_interactions <- gxe_all_interactions %>%
    filter(interaction_p < 0.05, !is.na(interaction_p))
  
  if (nrow(sig_interactions) > 0) {
    cat("Significant GxE interactions (p < 0.05):\n")
    print(sig_interactions %>% 
            select(donor, trait, interaction_p, interaction_sig, interaction_eta2))
  } else {
    cat("No significant GxE interactions detected (p < 0.05)\n")
  }
  
  cat("\nSummary by donor:\n")
  donor_summary <- gxe_all_interactions %>%
    group_by(donor) %>%
    summarise(
      traits_tested = sum(!is.na(interaction_p)),
      significant_interactions = sum(interaction_p < 0.05, na.rm = TRUE),
      .groups = "drop"
    )
  print(donor_summary)
  
} else {
  cat("No GxE analysis results available\n")
}

## Significant GxE interactions (p < 0.05):
## # A tibble: 5 × 5
##   donor trait interaction_p interaction_sig interaction_eta2
##   <chr> <chr>         <dbl> <chr>                      <dbl>
## 1 MI21  PH         8.65e-10 ***                       0.290 
## 2 MI21  DTA        2.89e- 5 ***                       0.157 
## 3 MI21  DTS        4.64e- 3 **                        0.0843
## 4 MI21  GDDA       7.08e- 5 ***                       0.145 
## 5 MI21  GDDS       6.07e- 3 **                        0.0803
## 
## Summary by donor:
## # A tibble: 1 × 3
##   donor traits_tested significant_interactions
##   <chr>         <int>                    <int>
## 1 MI21              5                        5

Calculate Raw Effect Sizes (Mean Differences)

calculate_raw_effects <- function(data, traits) {
  data %>%
    pivot_longer(cols = all_of(traits), 
                 names_to = "trait", 
                 values_to = "value") %>%
    group_by(trait, genotype) %>%
    summarise(
      mean_value = mean(value, na.rm = TRUE),
      sd_value = sd(value, na.rm = TRUE),
      n = n(),
      se = sd_value / sqrt(n),
      .groups = "drop"
    ) %>%
    pivot_wider(
      names_from = genotype,
      values_from = c(mean_value, sd_value, n, se)
    ) %>%
    mutate(
      raw_effect = mean_value_INV4M - mean_value_CTRL,
      se_diff = sqrt(se_INV4M^2 + se_CTRL^2)
    )
}

psu2022_raw <- gxe_all_data %>%
  filter(environment == "PSU2022") %>%
  calculate_raw_effects(shared_phenotypes) %>%
  mutate(environment = "PSU2022")

psu2025_raw <- gxe_all_data %>%
  filter(environment == "PSU2025") %>%
  calculate_raw_effects(shared_phenotypes) %>%
  mutate(environment = "PSU2025")

cly2025_raw <- gxe_all_data %>%
  filter(environment == "CLY2025") %>%
  calculate_raw_effects(shared_phenotypes) %>%
  mutate(environment = "CLY2025")

all_raw_effects <- bind_rows(psu2022_raw, psu2025_raw, cly2025_raw)

cat("=== RAW EFFECT SIZES (INV4M - CTRL) ===\n\n")

## === RAW EFFECT SIZES (INV4M - CTRL) ===

print(all_raw_effects %>%
  select(environment, trait, mean_value_CTRL, mean_value_INV4M, 
         raw_effect, se_diff))

## # A tibble: 15 × 6
##    environment trait mean_value_CTRL mean_value_INV4M raw_effect se_diff
##    <chr>       <chr>           <dbl>            <dbl>      <dbl>   <dbl>
##  1 PSU2022     DTA              73.9             72.5     -1.30    0.266
##  2 PSU2022     DTS              75.2             74.2     -1.05    0.222
##  3 PSU2022     GDDA            886.             867.     -19.5     4.17 
##  4 PSU2022     GDDS            905.             891.     -14.9     3.24 
##  5 PSU2022     PH              163.             169.       6.48    1.11 
##  6 PSU2025     DTA              77.8             78.3      0.577   0.300
##  7 PSU2025     DTS              78.2             78.4      0.211   0.362
##  8 PSU2025     GDDA            870.             877.       7.29    3.73 
##  9 PSU2025     GDDS            876.             878.       2.35    4.54 
## 10 PSU2025     PH              232.             240.       7.69    1.22 
## 11 CLY2025     DTA              77.7             77.3     -0.453   0.207
## 12 CLY2025     DTS              78.4             77.6     -0.725   0.229
## 13 CLY2025     GDDA            877.             870.      -7.01    3.24 
## 14 CLY2025     GDDS            888.             876.     -11.3     3.63 
## 15 CLY2025     PH              187.             183.      -4.71    1.30

across_env_raw <- gxe_all_data %>%
  calculate_raw_effects(shared_phenotypes)

cat("\n=== MAIN EFFECT ACROSS ALL ENVIRONMENTS ===\n")

## 
## === MAIN EFFECT ACROSS ALL ENVIRONMENTS ===

print(across_env_raw %>%
  select(trait, mean_value_CTRL, mean_value_INV4M, raw_effect, se_diff))

## # A tibble: 5 × 5
##   trait mean_value_CTRL mean_value_INV4M raw_effect se_diff
##   <chr>           <dbl>            <dbl>      <dbl>   <dbl>
## 1 DTA              75.8             75.2     -0.620   0.443
## 2 DTS              76.7             76.1     -0.655   0.348
## 3 GDDA            880.             870.      -9.70    2.60 
## 4 GDDS            894.             884.      -9.70    2.80 
## 5 PH              186.             190.       3.99    5.17

write_csv(all_raw_effects, "three_env_raw_effect_sizes.csv")
write_csv(across_env_raw, "main_effect_raw_sizes.csv")

Temperature Sensitivity Analysis

hourly_temp <- read_csv("hourly_temp_100_DAP.csv")

# Calculate temperature metrics per environment
temp_metrics <- hourly_temp %>%
  group_by(environment) %>%
  summarise(
    mean_temp = mean(T2M, na.rm = TRUE),
    min_temp = min(T2M, na.rm = TRUE),
    max_temp = max(T2M, na.rm = TRUE),
    temp_range = max_temp - min_temp,
    gdd_total = sum(pmax(T2M - 10, 0) / 24, na.rm = TRUE),  # base 10°C
    .groups = "drop"
  )

print(temp_metrics)

## # A tibble: 3 × 6
##   environment mean_temp min_temp max_temp temp_range gdd_total
##   <chr>           <dbl>    <dbl>    <dbl>      <dbl>     <dbl>
## 1 CLY2025          22.7     2.44     37.1       34.7     1290.
## 2 PSU2022          22.2     7.35     35.9       28.5     1235.
## 3 PSU2025          21.6     3.9      35.8       31.9     1177.

Join temperature data with phenotypes

pheno_temp <- gxe_all_data %>%
  left_join(temp_metrics, by = "environment")

# Test if genotype modifies temperature sensitivity
# Key hypothesis: interaction term tests if inv4m changes the slope
model_dta_temp <- lm(DTA ~ mean_temp * genotype, data = pheno_temp)
model_dts_temp <- lm(DTS ~ mean_temp * genotype, data = pheno_temp)
model_ph_temp <- lm(PH ~ mean_temp * genotype, data = pheno_temp)

cat("=== TEMPERATURE SENSITIVITY TESTS ===\n\n")

## === TEMPERATURE SENSITIVITY TESTS ===

cat("Days to Anthesis:\n")

## Days to Anthesis:

print(summary(model_dta_temp))

## 
## Call:
## lm(formula = DTA ~ mean_temp * genotype, data = pheno_temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2046 -2.3450  0.4311  2.1701  4.0561 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              90.8741    17.1426   5.301 5.09e-07 ***
## mean_temp                -0.6805     0.7733  -0.880    0.381    
## genotypeINV4M            24.4039    24.2433   1.007    0.316    
## mean_temp:genotypeINV4M  -1.1290     1.0936  -1.032    0.304    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.467 on 124 degrees of freedom
## Multiple R-squared:  0.06252,    Adjusted R-squared:  0.03983 
## F-statistic: 2.756 on 3 and 124 DF,  p-value: 0.04526

cat("\nInterpretation: genotype:mean_temp interaction tests if inv4m changes temperature response\n")

## 
## Interpretation: genotype:mean_temp interaction tests if inv4m changes temperature response

cat("\n\nDays to Silking:\n")

## 
## 
## Days to Silking:

print(summary(model_dts_temp))

## 
## Call:
## lm(formula = DTS ~ mean_temp * genotype, data = pheno_temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4475 -1.5751 -0.1006  1.7461  3.7035 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              84.7594    13.5049   6.276 5.31e-09 ***
## mean_temp                -0.3616     0.6092  -0.594    0.554    
## genotypeINV4M            20.4850    19.0989   1.073    0.286    
## mean_temp:genotypeINV4M  -0.9538     0.8615  -1.107    0.270    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.944 on 124 degrees of freedom
## Multiple R-squared:  0.06519,    Adjusted R-squared:  0.04257 
## F-statistic: 2.882 on 3 and 124 DF,  p-value: 0.03857

cat("\n\nPlant Height:\n")

## 
## 
## Plant Height:

print(summary(model_ph_temp))

## 
## Call:
## lm(formula = PH ~ mean_temp * genotype, data = pheno_temp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.125 -19.008  -0.172  19.160  32.144 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1234.657    141.718   8.712 1.59e-14 ***
## mean_temp                -47.298      6.393  -7.399 1.81e-11 ***
## genotypeINV4M            227.301    200.420   1.134    0.259    
## mean_temp:genotypeINV4M  -10.075      9.041  -1.114    0.267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 20.4 on 124 degrees of freedom
## Multiple R-squared:  0.524,  Adjusted R-squared:  0.5125 
## F-statistic:  45.5 on 3 and 124 DF,  p-value: < 2.2e-16

# Extract interaction effects
temp_sens_summary <- tibble(
  trait = c("DTA", "DTS", "PH"),
  interaction_coef = c(
    coef(model_dta_temp)["mean_temp:genotypeINV4M"],
    coef(model_dts_temp)["mean_temp:genotypeINV4M"],
    coef(model_ph_temp)["mean_temp:genotypeINV4M"]
  ),
  interaction_se = c(
    summary(model_dta_temp)$coefficients["mean_temp:genotypeINV4M", "Std. Error"],
    summary(model_dts_temp)$coefficients["mean_temp:genotypeINV4M", "Std. Error"],
    summary(model_ph_temp)$coefficients["mean_temp:genotypeINV4M", "Std. Error"]
  ),
  interaction_p = c(
    summary(model_dta_temp)$coefficients["mean_temp:genotypeINV4M", "Pr(>|t|)"],
    summary(model_dts_temp)$coefficients["mean_temp:genotypeINV4M", "Pr(>|t|)"],
    summary(model_ph_temp)$coefficients["mean_temp:genotypeINV4M", "Pr(>|t|)"]
  )
)

print(temp_sens_summary)

## # A tibble: 3 × 4
##   trait interaction_coef interaction_se interaction_p
##   <chr>            <dbl>          <dbl>         <dbl>
## 1 DTA             -1.13           1.09          0.304
## 2 DTS             -0.954          0.862         0.270
## 3 PH             -10.1            9.04          0.267

write_csv(temp_sens_summary, "temperature_sensitivity_results.csv")

Visualize temperature sensitivity (reaction norms)

reaction_norm_data <- pheno_temp %>%
  pivot_longer(cols = c(DTA, DTS, PH), 
               names_to = "trait", values_to = "value") %>%
  group_by(environment, genotype, trait, mean_temp) %>%
  summarise(mean_value = mean(value, na.rm = TRUE),
            se_value = sd(value, na.rm = TRUE) / sqrt(n()),
            .groups = "drop")

ggplot(reaction_norm_data, 
       aes(x = mean_temp, y = mean_value, color = genotype, group = genotype)) +
  geom_point(size = 3) +
  geom_line(size = 1.2) +
  geom_errorbar(aes(ymin = mean_value - se_value, 
                    ymax = mean_value + se_value), width = 0.2) +
  scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
  facet_wrap(~ trait, scales = "free_y") +
  labs(
    title = "Temperature Sensitivity: Genotype Reaction Norms",
    x = "Mean Temperature (°C)",
    y = "Trait Value",
    color = "Genotype"
  ) +
  theme_classic(base_size = 14)

8. Session Information

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.5     viridisLite_0.4.2 patchwork_1.3.2   broom_1.0.9      
##  [5] effectsize_1.0.1  emmeans_1.11.2    ggbeeswarm_0.7.2  ggtext_0.1.2     
##  [9] ggpubr_0.6.1      rstatix_0.7.2     lubridate_1.9.4   forcats_1.0.0    
## [13] stringr_1.5.1     dplyr_1.1.4       purrr_1.1.0       readr_2.1.5      
## [17] tidyr_1.3.1       tibble_3.3.0      ggplot2_3.5.2     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       beeswarm_0.4.0     xfun_0.53          bslib_0.9.0       
##  [5] bayestestR_0.16.1  insight_1.4.0      tzdb_0.5.0         vctrs_0.6.5       
##  [9] tools_4.5.1        generics_0.1.4     parallel_4.5.1     datawizard_1.2.0  
## [13] pkgconfig_2.0.3    RColorBrewer_1.1-3 lifecycle_1.0.4    compiler_4.5.1    
## [17] farver_2.1.2       carData_3.0-5      litedown_0.7       vipor_0.4.7       
## [21] htmltools_0.5.8.1  sass_0.4.10        yaml_2.3.10        Formula_1.2-5     
## [25] crayon_1.5.3       pillar_1.11.0      car_3.1-3          jquerylib_0.1.4   
## [29] cachem_1.1.0       boot_1.3-31        abind_1.4-8        commonmark_2.0.0  
## [33] tidyselect_1.2.1   digest_0.6.37      mvtnorm_1.3-3      stringi_1.8.7     
## [37] labeling_0.4.3     fastmap_1.2.0      grid_4.5.1         cli_3.6.5         
## [41] magrittr_2.0.3     utf8_1.2.6         withr_3.0.2        scales_1.4.0      
## [45] backports_1.5.0    bit64_4.6.0-1      timechange_0.3.0   estimability_1.5.1
## [49] rmarkdown_2.29     bit_4.6.0          gridExtra_2.3      ggsignif_0.6.4    
## [53] hms_1.1.3          evaluate_1.0.4     knitr_1.50         parameters_0.28.0 
## [57] markdown_2.0       rlang_1.1.6        gridtext_0.1.5     Rcpp_1.1.0        
## [61] xtable_1.8-4       glue_1.8.0         xml2_1.4.0         vroom_1.6.5       
## [65] rstudioapi_0.17.1  jsonlite_2.0.0     R6_2.6.1

Genotype × Environment Analysis of inv4m: PSU2022, PSU2025, CLY2025

Analysis of inv4m GxE Interactions

October 02, 2025