Genotype × Environment Analysis of inv4m Introgression Effects: CLY25 vs PSU25

Overview

This analysis examines Genotype × Environment (GxE) interactions for the inv4m introgression across two environments:

PSU2025: Pennsylvania environment (northern)
CLY2025: North Carolina environment (southern)

Research Questions: 1. Does inv4m show consistent effects across environments? 2. Which traits (PH, DTA, DTS) exhibit significant GxE interactions? 3. How does genetic background (donor) modify inv4m effects?

1. Load Libraries and Data

library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggtext)
library(ggbeeswarm)
library(emmeans)
library(effectsize)
library(broom)
library(patchwork)
library(viridis)

# Load spatially corrected datasets
psu2025 <- read.csv("PSU2025_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)
cly2025 <- read.csv("CLY2025_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)

# Check data structure
cat("PSU2025 dimensions:", dim(psu2025), "\n")

## PSU2025 dimensions: 64 8

cat("CLY2025 dimensions:", dim(cly2025), "\n")

## CLY2025 dimensions: 64 17

cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")

## PSU2025 genotypes: Inv4m, CTRL

cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")

## CLY2025 genotypes: INV4M, CTRL

cat("PSU2025 donors:", paste(unique(psu2025$donor), collapse = ", "), "\n")

## PSU2025 donors: TMEX, MI21

cat("CLY2025 donors:", paste(unique(cly2025$donor), collapse = ", "), "\n")

## CLY2025 donors: TMEX, MI21

# Standardize donor names if needed
# Check if we need to standardize donor names
psu2025_donors_orig <- unique(psu2025$donor)
cly2025_donors_orig <- unique(cly2025$donor)

cat("\nChecking for donor name standardization needs...\n")

## 
## Checking for donor name standardization needs...

cat("PSU2025 original donors:", paste(psu2025_donors_orig, collapse = ", "), "\n")

## PSU2025 original donors: TMEX, MI21

cat("CLY2025 original donors:", paste(cly2025_donors_orig, collapse = ", "), "\n")

## CLY2025 original donors: TMEX, MI21

# Apply any necessary donor standardization
# Donors are already standardized: TMEX, MI21 in both datasets
# No donor name changes needed

# Standardize genotype names - PSU2025 uses "Inv4m", CLY2025 uses "INV4M"
# Let's standardize both to "INV4M"
psu2025 <- psu2025 %>%
  mutate(
    genotype = case_when(
      genotype == "Inv4m" ~ "INV4M",
      genotype == "CTRL" ~ "CTRL",
      TRUE ~ genotype
    )
  )

# CLY2025 genotypes are already correct (INV4M, CTRL)

# Report final names after standardization
cat("\nAfter standardization:\n")

## 
## After standardization:

cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")

## PSU2025 genotypes: INV4M, CTRL

cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")

## CLY2025 genotypes: INV4M, CTRL

cat("PSU2025 donors:", paste(unique(psu2025$donor), collapse = ", "), "\n")

## PSU2025 donors: TMEX, MI21

cat("CLY2025 donors:", paste(unique(cly2025$donor), collapse = ", "), "\n")

## CLY2025 donors: TMEX, MI21

# Define shared phenotypes
shared_phenotypes <- c("PH", "DTA", "DTS")

2. Phase 1: Within-Environment Analysis

2.1 PSU2025 Analysis by Donor

# PSU2025 analysis for each donor separately
psu2025_results_list <- list()

# Process each donor
for (current_donor in c("TMEX", "MI21")) {
  cat("\n=== PSU2025", current_donor, "Analysis ===\n")
  
  # Filter PSU2025 to current donor and relevant genotypes
  psu2025_donor <- psu2025 %>%
    filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
    filter(complete.cases(.))
  
  if (nrow(psu2025_donor) < 6) {
    cat("Insufficient data for", current_donor, "(n =", nrow(psu2025_donor), ")\n")
    next
  }
  
  cat("Sample size:", nrow(psu2025_donor), "plots\n")
  print(table(psu2025_donor$genotype))
  
  # Statistical analysis
  psu_results <- psu2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    t_test(value ~ genotype) %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance("p.adj")
  
  # Calculate effect sizes (Cohen's d) - using your original approach
  psu_cohens_d <- psu2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", ci = TRUE)
  
  # Combine results
  psu_summary <- psu_results %>%
    left_join(psu_cohens_d %>% select(trait, effsize, magnitude), by = "trait") %>%
    mutate(donor = current_donor, environment = "PSU2025")
  
  psu2025_results_list[[current_donor]] <- psu_summary
  
  cat("Results for", current_donor, ":\n")
  print(psu_summary)
}

## 
## === PSU2025 TMEX Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for TMEX :
## # A tibble: 3 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df        p    p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>    <dbl>    <dbl>
## 1 DTA   value CTRL   INV4M     16    16      2.08  30.0 4.6 e- 2 6.9 e- 2
## 2 DTS   value CTRL   INV4M     16    16      1.52  30.0 1.39e- 1 1.39e- 1
## 3 PH    value CTRL   INV4M     16    16    -17.7   28.7 5.28e-17 1.58e-16
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>
## 
## === PSU2025 MI21 Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for MI21 :
## # A tibble: 3 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df           p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>       <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     16    16    -1.92   24.7 0.0663      0.0994    
## 2 DTS   value CTRL   INV4M     16    16    -0.584  25.8 0.564       0.564     
## 3 PH    value CTRL   INV4M     16    16    -6.30   27.9 0.000000847 0.00000254
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

# Combine all PSU2025 results
psu2025_all_results <- bind_rows(psu2025_results_list)

2.2 CLY2025 Analysis by Donor

# CLY2025 analysis for each donor separately  
cly2025_results_list <- list()

# Process each donor
for (current_donor in c("TMEX", "MI21")) {
  cat("\n=== CLY2025", current_donor, "Analysis ===\n")
  
  # Filter CLY2025 to current donor and relevant genotypes
  cly2025_donor <- cly2025 %>%
    filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
    filter(complete.cases(.))
  
  if (nrow(cly2025_donor) < 6) {
    cat("Insufficient data for", current_donor, "(n =", nrow(cly2025_donor), ")\n")
    next
  }
  
  cat("Sample size:", nrow(cly2025_donor), "plots\n")
  print(table(cly2025_donor$genotype))
  
  # Statistical analysis
  cly_results <- cly2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    t_test(value ~ genotype) %>%
    adjust_pvalue(method = "fdr") %>%
    add_significance("p.adj")
  
  # Calculate effect sizes (Cohen's d)
  cly_cohens_d <- cly2025_donor %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait) %>%
    rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", ci = TRUE)
  
  # Combine results
  cly_summary <- cly_results %>%
    left_join(cly_cohens_d %>% select(trait, effsize, magnitude), by = "trait") %>%
    mutate(donor = current_donor, environment = "CLY2025")
  
  cly2025_results_list[[current_donor]] <- cly_summary
  
  cat("Results for", current_donor, ":\n")
  print(cly_summary)
}

## 
## === CLY2025 TMEX Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for TMEX :
## # A tibble: 3 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df             p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>         <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     16    16     -8.05  29.3 0.00000000661    1.98e-8
## 2 DTS   value CTRL   INV4M     16    16     -7.86  26.2 0.000000023      3.45e-8
## 3 PH    value CTRL   INV4M     16    16     -4.70  25.5 0.0000766        7.66e-5
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>
## 
## === CLY2025 MI21 Analysis ===
## Sample size: 32 plots
## 
##  CTRL INV4M 
##    16    16 
## Results for MI21 :
## # A tibble: 3 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df       p   p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 DTA   value CTRL   INV4M     16    16      2.19  29.7 0.0362  0.0362 
## 2 DTS   value CTRL   INV4M     16    16      3.16  30.0 0.00357 0.00536
## 3 PH    value CTRL   INV4M     16    16      3.62  29.7 0.00109 0.00327
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

# Combine all CLY2025 results
cly2025_all_results <- bind_rows(cly2025_results_list)

3. Phase 2: Cross-Environment GxE Analysis

3.1 Identify Common Donors for GxE Analysis

# Find donors present in both environments
common_donors <- intersect(names(psu2025_results_list), names(cly2025_results_list))

cat("Common donors for GxE analysis:", paste(common_donors, collapse = ", "), "\n")

## Common donors for GxE analysis: TMEX, MI21

if (length(common_donors) == 0) {
  stop("No common donors found between PSU2025 and CLY2025")
}

3.2 Data Integration for GxE Analysis

# Combine datasets for GxE analysis
gxe_data_list <- list()

for (donor in common_donors) {
  cat("\n=== Preparing GxE data for", donor, "===\n")
  
  # PSU2025 data
  psu_donor <- psu2025 %>%
    filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(environment = "PSU2025", donor = donor) %>%
    filter(complete.cases(.))
  
  # CLY2025 data  
  cly_donor <- cly2025 %>%
    filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
    select(plotId, genotype, all_of(shared_phenotypes)) %>%
    mutate(environment = "CLY2025", donor = donor) %>%
    filter(complete.cases(.))
  
  # Combine
  donor_gxe <- bind_rows(psu_donor, cly_donor) %>%
    mutate(
      genotype = factor(genotype, levels = c("CTRL", "INV4M")),
      environment = factor(environment, levels = c("CLY2025", "PSU2025"))
    )
  
  gxe_data_list[[donor]] <- donor_gxe
  
  cat("Sample sizes by environment and genotype:\n")
  print(table(donor_gxe$environment, donor_gxe$genotype))
}

## 
## === Preparing GxE data for TMEX ===
## Sample sizes by environment and genotype:
##          
##           CTRL INV4M
##   CLY2025   16    16
##   PSU2025   16    16
## 
## === Preparing GxE data for MI21 ===
## Sample sizes by environment and genotype:
##          
##           CTRL INV4M
##   CLY2025   16    16
##   PSU2025   16    16

# Combine all donors for overall analysis
gxe_all_data <- bind_rows(gxe_data_list, .id = "donor")

3.3 GxE Statistical Analysis by Donor

gxe_results_by_donor <- list()

for (donor in common_donors) {
  cat("\n=== GxE Analysis for", donor, "===\n")
  
  donor_data <- gxe_data_list[[donor]]
  gxe_anova_results <- list()
  gxe_effect_sizes <- list()
  gxe_emmeans_results <- list()
  
  for (trait in shared_phenotypes) {
    cat("\n--- Analyzing", trait, "for", donor, "---\n")
    
    # Prepare data for this trait
    trait_data <- donor_data %>%
      select(plotId, genotype, environment, value = all_of(trait)) %>%
      filter(!is.na(value))
    
    if (nrow(trait_data) < 8) {
      cat("Insufficient data for", trait, "in", donor, "\n")
      next
    }
    
    # Two-way ANOVA: Genotype × Environment
    anova_model <- aov(value ~ genotype * environment, data = trait_data)
    anova_summary <- summary(anova_model)
    
    # Effect sizes (eta-squared)
    effect_sizes <- eta_squared(anova_model)
    
    # Store results
    gxe_anova_results[[trait]] <- anova_summary
    gxe_effect_sizes[[trait]] <- effect_sizes
    
    # Print ANOVA results
    cat("ANOVA Results for", trait, "in", donor, ":\n")
    print(anova_summary)
    
    # Post-hoc analysis with emmeans
    if (length(unique(trait_data$environment)) > 1 && length(unique(trait_data$genotype)) > 1) {
      emmeans_result <- emmeans(anova_model, ~ genotype | environment)
      emmeans_contrast <- pairs(emmeans_result)
      
      gxe_emmeans_results[[trait]] <- list(
        means = emmeans_result,
        contrasts = emmeans_contrast
      )
      
      cat("Estimated Marginal Means:\n")
      print(emmeans_result)
    }
  }
  
  gxe_results_by_donor[[donor]] <- list(
    anova = gxe_anova_results,
    effect_sizes = gxe_effect_sizes,
    emmeans = gxe_emmeans_results
  )
}

## 
## === GxE Analysis for TMEX ===
## 
## --- Analyzing PH for TMEX ---
## ANOVA Results for PH in TMEX :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1   4943    4943  182.12  < 2e-16 ***
## environment           1  31579   31579 1163.50  < 2e-16 ***
## genotype:environment  1    840     840   30.94 6.53e-07 ***
## Residuals            60   1628      27                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean  SE df lower.CL upper.CL
##  CTRL        194 1.3 60      191      196
##  INV4M       204 1.3 60      202      207
## 
## environment = PSU2025:
##  genotype emmean  SE df lower.CL upper.CL
##  CTRL        231 1.3 60      228      234
##  INV4M       256 1.3 60      253      258
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTA for TMEX ---
## ANOVA Results for DTA in TMEX :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1   3.63   3.633   5.355 0.024099 *  
## environment           1   9.74   9.742  14.360 0.000352 ***
## genotype:environment  1  23.59  23.586  34.764 1.83e-07 ***
## Residuals            60  40.71   0.678                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       77.7 0.206 60     77.2     78.1
##  INV4M      79.3 0.206 60     78.9     79.8
## 
## environment = PSU2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       79.7 0.206 60     79.2     80.1
##  INV4M      78.9 0.206 60     78.5     79.3
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTS for TMEX ---
## ANOVA Results for DTS in TMEX :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1   4.35   4.353   3.846   0.0545 .  
## environment           1   7.90   7.901   6.980   0.0105 *  
## genotype:environment  1  25.15  25.155  22.224 1.49e-05 ***
## Residuals            60  67.91   1.132                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       78.8 0.266 60     78.3     79.3
##  INV4M      80.6 0.266 60     80.0     81.1
## 
## environment = PSU2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       80.8 0.266 60     80.2     81.3
##  INV4M      80.0 0.266 60     79.5     80.6
## 
## Confidence level used: 0.95 
## 
## === GxE Analysis for MI21 ===
## 
## --- Analyzing PH for MI21 ---
## ANOVA Results for PH in MI21 :
##                      Df Sum Sq Mean Sq  F value   Pr(>F)    
## genotype              1     36      36    2.785      0.1    
## environment           1  42021   42021 3296.024  < 2e-16 ***
## genotype:environment  1    615     615   48.223 3.11e-09 ***
## Residuals            60    765      13                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL        187 0.893 60      185      189
##  INV4M       183 0.893 60      181      184
## 
## environment = PSU2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL        232 0.893 60      230      234
##  INV4M       240 0.893 60      238      242
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTA for MI21 ---
## ANOVA Results for DTA in MI21 :
##                      Df Sum Sq Mean Sq F value  Pr(>F)   
## genotype              1   0.06   0.061   0.115 0.73557   
## environment           1   5.07   5.073   9.556 0.00302 **
## genotype:environment  1   4.24   4.241   7.989 0.00639 **
## Residuals            60  31.85   0.531                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL      77.71 0.182 60    77.34    78.07
##  INV4M     77.26 0.182 60    76.89    77.62
## 
## environment = PSU2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL      77.76 0.182 60    77.39    78.12
##  INV4M     78.33 0.182 60    77.97    78.70
## 
## Confidence level used: 0.95 
## 
## --- Analyzing DTS for MI21 ---
## ANOVA Results for DTS in MI21 :
##                      Df Sum Sq Mean Sq F value Pr(>F)  
## genotype              1   1.06   1.058   1.443 0.2344  
## environment           1   1.50   1.498   2.043 0.1581  
## genotype:environment  1   3.51   3.508   4.783 0.0326 *
## Residuals            60  44.00   0.733                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       78.4 0.214 60     77.9     78.8
##  INV4M      77.6 0.214 60     77.2     78.1
## 
## environment = PSU2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       78.2 0.214 60     77.8     78.6
##  INV4M      78.4 0.214 60     78.0     78.8
## 
## Confidence level used: 0.95

3.4 GxE Interaction Summary

# Extract interaction effects for each donor
gxe_interaction_summary_list <- list()

for (donor in common_donors) {
  if (!donor %in% names(gxe_results_by_donor)) next
  
  donor_results <- gxe_results_by_donor[[donor]]
  
  interaction_summary <- data.frame(
    donor = donor,
    trait = shared_phenotypes,
    interaction_p = NA,
    interaction_eta2 = NA,
    genotype_p = NA,
    environment_p = NA
  )
  
  for (i in seq_along(shared_phenotypes)) {
    trait <- shared_phenotypes[i]
    
    if (trait %in% names(donor_results$anova)) {
      # Extract p-values from ANOVA
      anova_df <- as.data.frame(donor_results$anova[[trait]][[1]])
      
      if ("genotype:environment" %in% rownames(anova_df)) {
        interaction_summary$interaction_p[i] <- anova_df["genotype:environment", "Pr(>F)"]
      }
      if ("genotype" %in% rownames(anova_df)) {
        interaction_summary$genotype_p[i] <- anova_df["genotype", "Pr(>F)"]
      }
      if ("environment" %in% rownames(anova_df)) {
        interaction_summary$environment_p[i] <- anova_df["environment", "Pr(>F)"]
      }
      
      # Extract eta-squared for interaction
      if (trait %in% names(donor_results$effect_sizes)) {
        interaction_row <- donor_results$effect_sizes[[trait]] %>%
          filter(Parameter == "genotype:environment")
        interaction_summary$interaction_eta2[i] <- 
          if(nrow(interaction_row) > 0) interaction_row$Eta2[1] else NA
      }
    }
  }
  
  # Add significance indicators
  interaction_summary <- interaction_summary %>%
    mutate(
      interaction_sig = case_when(
        interaction_p < 0.001 ~ "***",
        interaction_p < 0.01 ~ "**", 
        interaction_p < 0.05 ~ "*",
        TRUE ~ "ns"
      ),
      genotype_sig = case_when(
        genotype_p < 0.001 ~ "***",
        genotype_p < 0.01 ~ "**",
        genotype_p < 0.05 ~ "*", 
        TRUE ~ "ns"
      ),
      environment_sig = case_when(
        environment_p < 0.001 ~ "***",
        environment_p < 0.01 ~ "**",
        environment_p < 0.05 ~ "*",
        TRUE ~ "ns"
      )
    )
  
  gxe_interaction_summary_list[[donor]] <- interaction_summary
  
  cat("\nGxE Interaction Summary for", donor, ":\n")
  print(interaction_summary)
}

## 
## GxE Interaction Summary for TMEX :
##   donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1  TMEX    PH  6.530813e-07        0.3401953         NA            NA
## 2  TMEX   DTA  1.830926e-07        0.3668486         NA            NA
## 3  TMEX   DTS  1.487395e-05        0.2702889         NA            NA
##   interaction_sig genotype_sig environment_sig
## 1             ***           ns              ns
## 2             ***           ns              ns
## 3             ***           ns              ns
## 
## GxE Interaction Summary for MI21 :
##   donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1  MI21    PH  3.112875e-09        0.4455912         NA            NA
## 2  MI21   DTA  6.385088e-03        0.1175011         NA            NA
## 3  MI21   DTS  3.264420e-02        0.0738368         NA            NA
##   interaction_sig genotype_sig environment_sig
## 1             ***           ns              ns
## 2              **           ns              ns
## 3               *           ns              ns

# Combine all donor summaries
gxe_all_interactions <- bind_rows(gxe_interaction_summary_list)

4. Phase 3: Comprehensive Visualization

4.1 Interaction Plots by Donor

# Plot theme
pheno_theme <- theme_classic2(base_size = 16) +
  theme(
    plot.title = element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text(face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = element_markdown(size = 14),
    legend.position = "none"
  )

for (donor in common_donors) {
  cat("\n=== Creating interaction plots for", donor, "===\n")
  
  if (!donor %in% names(gxe_data_list)) next
  
  # Calculate means and SE for interaction plots
  interaction_stats <- gxe_data_list[[donor]] %>%
    pivot_longer(cols = all_of(shared_phenotypes), 
                 names_to = "trait", values_to = "value") %>%
    group_by(trait, genotype, environment) %>%
    summarise(
      mean_value = mean(value, na.rm = TRUE),
      se_value = sd(value, na.rm = TRUE) / sqrt(n()),
      n = n(),
      .groups = "drop"
    ) %>%
    mutate(
      trait = factor(trait, levels = shared_phenotypes),
      environment = factor(environment, levels = c("CLY2025", "PSU2025"))
    )
  
  # Create interaction plot
  interaction_plot <- ggplot(interaction_stats, 
                            aes(x = environment, y = mean_value, 
                                color = genotype, group = genotype)) +
    geom_line(size = 1.2, alpha = 0.8) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = mean_value - se_value, ymax = mean_value + se_value),
                  width = 0.1, size = 1) +
    scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
    facet_wrap(~ trait, scales = "free_y", ncol = 3) +
    labs(
      title = paste("**Genotype × Environment Interaction Effects:**", donor, "**donor**"),
      subtitle = "Genotype means ± SE across environments",
      x = "Environment",
      y = "Trait Value",
      color = "Genotype"
    ) +
    pheno_theme +
    theme(legend.position = "top")
  
  print(interaction_plot)
}

## 
## === Creating interaction plots for TMEX ===

## 
## === Creating interaction plots for MI21 ===

4.2 Effect Size Comparisons

# Combine environment-specific effect sizes from both analyses
all_effect_sizes <- bind_rows(
  psu2025_all_results %>% select(trait, donor, environment, magnitude, effsize),
  cly2025_all_results %>% select(trait, donor, environment, magnitude, effsize)
) %>%
  filter(donor %in% common_donors) %>%
  mutate(
    trait = factor(trait, levels = shared_phenotypes),
    environment = factor(environment, levels = c("CLY2025", "PSU2025"))
  )

# Forest plot of effect sizes
forest_plot <- ggplot(all_effect_sizes, 
                     aes(x = effsize, y = interaction(trait, donor), 
                         color = environment)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  geom_point(size = 3, position = position_dodge(width = 0.4)) +
  geom_text(aes(label = paste0("(", magnitude, ")")),
            position = position_dodge(width = 0.4),
            hjust = -0.2, size = 3) +
  scale_color_manual(values = c("CLY2025" = "tomato", "PSU2025" = "royalblue")) +
  labs(
    title = "**Effect Sizes (Cohen's d) of inv4m Across Environments**",
    subtitle = "Positive values indicate Inv4m > CTRL",
    x = "Cohen's d (Effect Size)",
    y = "Trait × Donor",
    color = "Environment"
  ) +
  pheno_theme +
  theme(
    legend.position = "top",
    axis.text.y = element_text(size = 10)
  )

print(forest_plot)

4.3 Combined Interaction Heatmap

# Create heatmap of interaction p-values
if (nrow(gxe_all_interactions) > 0) {
  heatmap_data <- gxe_all_interactions %>%
    select(donor, trait, interaction_p) %>%
    mutate(
      log_p = -log10(interaction_p),
      significance = case_when(
        interaction_p < 0.001 ~ "p < 0.001",
        interaction_p < 0.01 ~ "p < 0.01",
        interaction_p < 0.05 ~ "p < 0.05",
        TRUE ~ "ns"
      )
    )
  
  heatmap_plot <- ggplot(heatmap_data, 
                        aes(x = trait, y = donor, fill = log_p)) +
    geom_tile(color = "white", size = 0.5) +
    geom_text(aes(label = significance), size = 3, fontface = "bold") +
    scale_fill_viridis_c(name = "-log10(p)", option = "plasma") +
    labs(
      title = "**GxE Interaction Significance Heatmap**",
      subtitle = "Darker colors indicate stronger evidence for GxE interactions",
      x = "Trait",
      y = "Donor"
    ) +
    pheno_theme +
    theme(
      axis.text.x = element_text(angle = 0),
      legend.position = "right"
    )
  
  print(heatmap_plot)
}

5. Summary and Export Results

5.1 Summary Tables

cat("=== FINAL SUMMARY ===\n\n")

## === FINAL SUMMARY ===

cat("1. PSU2025 Results by Donor:\n")

## 1. PSU2025 Results by Donor:

if (nrow(psu2025_all_results) > 0) {
  print(psu2025_all_results)
} else {
  cat("No PSU2025 results available\n")
}

## # A tibble: 6 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df        p    p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>    <dbl>    <dbl>
## 1 DTA   value CTRL   INV4M     16    16     2.08   30.0 4.6 e- 2 6.9 e- 2
## 2 DTS   value CTRL   INV4M     16    16     1.52   30.0 1.39e- 1 1.39e- 1
## 3 PH    value CTRL   INV4M     16    16   -17.7    28.7 5.28e-17 1.58e-16
## 4 DTA   value CTRL   INV4M     16    16    -1.92   24.7 6.63e- 2 9.94e- 2
## 5 DTS   value CTRL   INV4M     16    16    -0.584  25.8 5.64e- 1 5.64e- 1
## 6 PH    value CTRL   INV4M     16    16    -6.30   27.9 8.47e- 7 2.54e- 6
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cat("\n2. CLY2025 Results by Donor:\n")

## 
## 2. CLY2025 Results by Donor:

if (nrow(cly2025_all_results) > 0) {
  print(cly2025_all_results)
} else {
  cat("No CLY2025 results available\n")
}

## # A tibble: 6 × 15
##   trait .y.   group1 group2    n1    n2 statistic    df             p      p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>         <dbl>      <dbl>
## 1 DTA   value CTRL   INV4M     16    16     -8.05  29.3 0.00000000661    1.98e-8
## 2 DTS   value CTRL   INV4M     16    16     -7.86  26.2 0.000000023      3.45e-8
## 3 PH    value CTRL   INV4M     16    16     -4.70  25.5 0.0000766        7.66e-5
## 4 DTA   value CTRL   INV4M     16    16      2.19  29.7 0.0362           3.62e-2
## 5 DTS   value CTRL   INV4M     16    16      3.16  30.0 0.00357          5.36e-3
## 6 PH    value CTRL   INV4M     16    16      3.62  29.7 0.00109          3.27e-3
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## #   donor <chr>, environment <chr>

cat("\n3. GxE Interaction Effects:\n")

## 
## 3. GxE Interaction Effects:

if (nrow(gxe_all_interactions) > 0) {
  print(gxe_all_interactions)
} else {
  cat("No GxE interaction results available\n")
}

##   donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1  TMEX    PH  6.530813e-07        0.3401953         NA            NA
## 2  TMEX   DTA  1.830926e-07        0.3668486         NA            NA
## 3  TMEX   DTS  1.487395e-05        0.2702889         NA            NA
## 4  MI21    PH  3.112875e-09        0.4455912         NA            NA
## 5  MI21   DTA  6.385088e-03        0.1175011         NA            NA
## 6  MI21   DTS  3.264420e-02        0.0738368         NA            NA
##   interaction_sig genotype_sig environment_sig
## 1             ***           ns              ns
## 2             ***           ns              ns
## 3             ***           ns              ns
## 4             ***           ns              ns
## 5              **           ns              ns
## 6               *           ns              ns

5.2 Export Results

# Export summary tables
if (nrow(psu2025_all_results) > 0) {
  write.csv(psu2025_all_results, "PSU2025_inv4m_results_by_donor.csv", row.names = FALSE)
}

if (nrow(cly2025_all_results) > 0) {
  write.csv(cly2025_all_results, "CLY2025_inv4m_results_by_donor.csv", row.names = FALSE)
}

if (nrow(gxe_all_interactions) > 0) {
  write.csv(gxe_all_interactions, "CLY25_PSU25_GxE_interaction_results.csv", row.names = FALSE)
}

if (nrow(all_effect_sizes) > 0) {
  write.csv(all_effect_sizes, "CLY25_PSU25_effect_sizes_by_environment.csv", row.names = FALSE)
}

# Export combined dataset
if (nrow(gxe_all_data) > 0) {
  write.csv(gxe_all_data, "CLY25_PSU25_combined_gxe_dataset.csv", row.names = FALSE)
}

cat("Results exported to CSV files\n")

## Results exported to CSV files

6. Key Findings

cat("=== KEY FINDINGS ===\n\n")

## === KEY FINDINGS ===

if (nrow(gxe_all_interactions) > 0) {
  # Identify significant GxE interactions
  sig_interactions <- gxe_all_interactions %>%
    filter(interaction_p < 0.05, !is.na(interaction_p))
  
  if (nrow(sig_interactions) > 0) {
    cat("Significant GxE interactions (p < 0.05):\n")
    print(sig_interactions %>% select(donor, trait, interaction_p, interaction_sig))
  } else {
    cat("No significant GxE interactions detected (p < 0.05)\n")
  }
  
  # Summary by donor
  cat("\nSummary by donor:\n")
  donor_summary <- gxe_all_interactions %>%
    group_by(donor) %>%
    summarise(
      traits_tested = sum(!is.na(interaction_p)),
      significant_interactions = sum(interaction_p < 0.05, na.rm = TRUE),
      .groups = "drop"
    )
  print(donor_summary)
  
} else {
  cat("No GxE analysis results available\n")
}

## Significant GxE interactions (p < 0.05):
##   donor trait interaction_p interaction_sig
## 1  TMEX    PH  6.530813e-07             ***
## 2  TMEX   DTA  1.830926e-07             ***
## 3  TMEX   DTS  1.487395e-05             ***
## 4  MI21    PH  3.112875e-09             ***
## 5  MI21   DTA  6.385088e-03              **
## 6  MI21   DTS  3.264420e-02               *
## 
## Summary by donor:
## # A tibble: 2 × 3
##   donor traits_tested significant_interactions
##   <chr>         <int>                    <int>
## 1 MI21              3                        3
## 2 TMEX              3                        3

7. Session Information

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Bogota
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.5     viridisLite_0.4.2 patchwork_1.3.2   broom_1.0.9      
##  [5] effectsize_1.0.1  emmeans_1.11.2    ggbeeswarm_0.7.2  ggtext_0.1.2     
##  [9] ggpubr_0.6.1      rstatix_0.7.2     lubridate_1.9.4   forcats_1.0.0    
## [13] stringr_1.5.1     dplyr_1.1.4       purrr_1.1.0       readr_2.1.5      
## [17] tidyr_1.3.1       tibble_3.3.0      ggplot2_3.5.2     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       beeswarm_0.4.0     xfun_0.53          bslib_0.9.0       
##  [5] bayestestR_0.16.1  insight_1.4.0      tzdb_0.5.0         vctrs_0.6.5       
##  [9] tools_4.5.1        generics_0.1.4     datawizard_1.2.0   pkgconfig_2.0.3   
## [13] RColorBrewer_1.1-3 lifecycle_1.0.4    compiler_4.5.1     farver_2.1.2      
## [17] carData_3.0-5      litedown_0.7       vipor_0.4.7        htmltools_0.5.8.1 
## [21] sass_0.4.10        yaml_2.3.10        Formula_1.2-5      pillar_1.11.0     
## [25] car_3.1-3          jquerylib_0.1.4    cachem_1.1.0       boot_1.3-31       
## [29] abind_1.4-8        commonmark_2.0.0   tidyselect_1.2.1   digest_0.6.37     
## [33] mvtnorm_1.3-3      stringi_1.8.7      labeling_0.4.3     fastmap_1.2.0     
## [37] grid_4.5.1         cli_3.6.5          magrittr_2.0.3     utf8_1.2.6        
## [41] withr_3.0.2        scales_1.4.0       backports_1.5.0    timechange_0.3.0  
## [45] estimability_1.5.1 rmarkdown_2.29     gridExtra_2.3      ggsignif_0.6.4    
## [49] hms_1.1.3          evaluate_1.0.4     knitr_1.50         parameters_0.28.0 
## [53] markdown_2.0       rlang_1.1.6        gridtext_0.1.5     Rcpp_1.1.0        
## [57] xtable_1.8-4       glue_1.8.0         xml2_1.4.0         rstudioapi_0.17.1 
## [61] jsonlite_2.0.0     R6_2.6.1