Genotype × Environment Analysis of inv4m Introgression Effects

Overview

This analysis examines Genotype × Environment (GxE) interactions for the inv4m introgression across two environments:

PSU2022: Pennsylvania environment (+P treatment, colder/northern)
CLY2025: North Carolina environment (warmer/southern)

Research Questions: 1. Does inv4m show consistent effects across environments? 2. Which traits (PH, DTA, DTS) exhibit significant GxE interactions? 3. How does genetic background (donor) modify inv4m effects?

1. Load Libraries and Data

library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggtext)
library(emmeans)
library(effectsize)
library(broom)
library(patchwork)
library(viridis)

# Load spatially corrected datasets
psu2022 <- read.csv("PSU2022_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)
cly2025 <- read.csv("CLY2025_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)

# Check data structure
cat("PSU2022 dimensions:", dim(psu2022), "\n")

## PSU2022 dimensions: 127 24

cat("CLY2025 dimensions:", dim(cly2025), "\n")

## CLY2025 dimensions: 64 17

cat("PSU2022 genotypes:", unique(psu2022$genotype), "\n")

## PSU2022 genotypes: Inv4m CTRL B73 NUE

cat("CLY2025 genotypes:", unique(cly2025$genotype), "\n")

## CLY2025 genotypes: INV4M CTRL

cat("PSU2022 treatments:", unique(psu2022$Treatment), "\n")

## PSU2022 treatments: -P +P

cat("CLY2025 donors:", unique(cly2025$donor), "\n")

## CLY2025 donors: TMEX MI21

# Define shared phenotypes
shared_phenotypes <- c("PH", "DTA", "DTS")

2. Phase 1: Within-Environment Analysis

2.1 PSU2022 High-P Environment Analysis

# Filter PSU2022 to +P treatment and relevant genotypes
psu2022_highp <- psu2022 %>%
  filter(Treatment == "+P", genotype %in% c("CTRL", "Inv4m")) %>%
  select(plotId, genotype, repId, all_of(shared_phenotypes)) %>%
  mutate(
    genotype = factor(genotype, levels = c("CTRL", "Inv4m")),
    environment = "PSU2022_HighP"
  ) %>%
  filter(complete.cases(.))

cat("PSU2022 +P data - Genotype counts:\n")

## PSU2022 +P data - Genotype counts:

print(table(psu2022_highp$genotype))

## 
##  CTRL Inv4m 
##    16    16

cat("Sample size per genotype:", nrow(psu2022_highp), "total plots\n")

## Sample size per genotype: 32 total plots

# Statistical analysis for PSU2022 +P
psu2022_results <- psu2022_highp %>%
  pivot_longer(cols = all_of(shared_phenotypes), 
               names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  t_test(value ~ genotype) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance("p.adj")

# Calculate effect sizes (Cohen's d)
psu2022_cohens_d <- psu2022_highp %>%
  pivot_longer(cols = all_of(shared_phenotypes), 
               names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
 rstatix::cohens_d(value ~ genotype, ref.group = "Inv4m",   ci = TRUE)

# Combine results
psu2022_summary <- psu2022_results %>%
  left_join(psu2022_cohens_d %>% select(trait, effsize, magnitude), by = "trait")

print("PSU2022 High-P Results:")

## [1] "PSU2022 High-P Results:"

print(psu2022_summary)

## # A tibble: 3 × 13
##   trait .y.   group1 group2    n1    n2 statistic    df         p    p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>     <dbl>    <dbl>
## 1 DTA   value CTRL   Inv4m     16    16      3.56  28.9 0.00129   0.00129 
## 2 DTS   value CTRL   Inv4m     16    16      4.83  29.1 0.0000408 0.000122
## 3 PH    value CTRL   Inv4m     16    16     -3.57  29.7 0.00123   0.00129 
## # ℹ 3 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>

2.2 CLY2025 MI21 Donor Analysis

# Filter CLY2025 to MI21 donor and standardize genotype names
cly2025_mi21 <- cly2025 %>%
  filter(donor == "MI21") %>%
  select(plotId, genotype, repId, donor, all_of(shared_phenotypes)) %>%
  mutate(
    genotype = case_when(
      genotype == "INV4M" ~ "Inv4m",
      genotype == "CTRL" ~ "CTRL",
      TRUE ~ genotype
    ),
    genotype = factor(genotype, levels = c("CTRL", "Inv4m")),
    environment = "CLY2025"
  ) %>%
  filter(genotype %in% c("CTRL", "Inv4m")) %>%
  filter(complete.cases(.))

cat("CLY2025 MI21 data - Genotype counts:\n")

## CLY2025 MI21 data - Genotype counts:

print(table(cly2025_mi21$genotype))

## 
##  CTRL Inv4m 
##    16    16

cat("Sample size per genotype:", nrow(cly2025_mi21), "total plots\n")

## Sample size per genotype: 32 total plots

# Statistical analysis for CLY2025 MI21
cly2025_results <- cly2025_mi21 %>%
  pivot_longer(cols = all_of(shared_phenotypes), 
               names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  t_test(value ~ genotype) %>%
  adjust_pvalue(method = "fdr") %>%
  add_significance("p.adj")

# Calculate effect sizes
cly2025_cohens_d <- cly2025_mi21 %>%
  pivot_longer(cols = all_of(shared_phenotypes), 
               names_to = "trait", values_to = "value") %>%
  group_by(trait) %>%
  rstatix::cohens_d(value ~ genotype, ref.group = "Inv4m",   ci = TRUE)

# Combine results
cly2025_summary <- cly2025_results %>%
  left_join(cly2025_cohens_d %>% select(trait, effsize, magnitude), by = "trait")

print("CLY2025 MI21 Results:")

## [1] "CLY2025 MI21 Results:"

print(cly2025_summary)

## # A tibble: 3 × 13
##   trait .y.   group1 group2    n1    n2 statistic    df       p   p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 DTA   value CTRL   Inv4m     16    16      2.19  29.7 0.0362  0.0362 
## 2 DTS   value CTRL   Inv4m     16    16      3.16  30.0 0.00357 0.00536
## 3 PH    value CTRL   Inv4m     16    16      3.62  29.7 0.00109 0.00327
## # ℹ 3 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>

3. Phase 2: Cross-Environment GxE Analysis

3.1 Data Integration for GxE Analysis

# Combine datasets for GxE analysis
gxe_data <- bind_rows(
  psu2022_highp %>% select(plotId, genotype, environment, all_of(shared_phenotypes)),
  cly2025_mi21 %>% select(plotId, genotype, environment, all_of(shared_phenotypes))
) %>%
  mutate(
    genotype = factor(genotype, levels = c("CTRL", "Inv4m")),
    environment = factor(environment, levels = c("PSU2022_HighP", "CLY2025"))
  ) %>%
  filter(complete.cases(.))

cat("Combined GxE dataset dimensions:", dim(gxe_data), "\n")

## Combined GxE dataset dimensions: 64 6

cat("Sample sizes by environment and genotype:\n")

## Sample sizes by environment and genotype:

print(table(gxe_data$environment, gxe_data$genotype))

##                
##                 CTRL Inv4m
##   PSU2022_HighP   16    16
##   CLY2025         16    16

3.2 GxE Statistical Analysis

# Two-way ANOVA for each trait
gxe_anova_results <- list()
gxe_emmeans_results <- list()
gxe_effect_sizes <- list()

for (trait in shared_phenotypes) {
  cat("\n=== Analyzing", trait, "===\n")
  
  # Prepare data for this trait
  trait_data <- gxe_data %>%
    select(plotId, genotype, environment, value = all_of(trait)) %>%
    filter(!is.na(value))
  
  # Two-way ANOVA: Genotype × Environment
  anova_model <- aov(value ~ genotype * environment, data = trait_data)
  anova_summary <- summary(anova_model)
  
  # Effect sizes (eta-squared)
  effect_sizes <- effectsize::eta_squared(anova_model)
  
  # Store results
  gxe_anova_results[[trait]] <- anova_summary
  gxe_effect_sizes[[trait]] <- effect_sizes
  
  # Print ANOVA results
  cat("ANOVA Results for", trait, ":\n")
  print(anova_summary)
  cat("Effect Sizes:\n")
  print(effect_sizes)
  
  # Post-hoc analysis with emmeans
  emmeans_result <- emmeans(anova_model, ~ genotype | environment)
  emmeans_contrast <- pairs(emmeans_result)
  
  gxe_emmeans_results[[trait]] <- list(
    means = emmeans_result,
    contrasts = emmeans_contrast
  )
  
  cat("Estimated Marginal Means:\n")
  print(emmeans_result)
  cat("Pairwise Comparisons:\n")
  print(emmeans_contrast)
}

## 
## === Analyzing PH ===
## ANOVA Results for PH :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1      3       3   0.184     0.67    
## environment           1   5949    5949 359.482  < 2e-16 ***
## genotype:environment  1    424     424  25.595 4.26e-06 ***
## Residuals            60    993      17                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Effect Sizes:
## # Effect Size for ANOVA (Type I)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## genotype             |       3.05e-03 | [0.00, 1.00]
## environment          |           0.86 | [0.80, 1.00]
## genotype:environment |           0.30 | [0.15, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].Estimated Marginal Means:
## environment = PSU2022_HighP:
##  genotype emmean   SE df lower.CL upper.CL
##  CTRL        163 1.02 60      161      165
##  Inv4m       168 1.02 60      166      170
## 
## environment = CLY2025:
##  genotype emmean   SE df lower.CL upper.CL
##  CTRL        187 1.02 60      185      189
##  Inv4m       183 1.02 60      180      185
## 
## Confidence level used: 0.95 
## Pairwise Comparisons:
## environment = PSU2022_HighP:
##  contrast     estimate   SE df t.ratio p.value
##  CTRL - Inv4m    -5.58 1.44 60  -3.880  0.0003
## 
## environment = CLY2025:
##  contrast     estimate   SE df t.ratio p.value
##  CTRL - Inv4m     4.71 1.44 60   3.274  0.0018
## 
## 
## === Analyzing DTA ===
## ANOVA Results for DTA :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1  14.27   14.27  17.408 9.87e-05 ***
## environment           1 297.16  297.16 362.607  < 2e-16 ***
## genotype:environment  1   3.86    3.86   4.712   0.0339 *  
## Residuals            60  49.17    0.82                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Effect Sizes:
## # Effect Size for ANOVA (Type I)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## genotype             |           0.22 | [0.09, 1.00]
## environment          |           0.86 | [0.80, 1.00]
## genotype:environment |           0.07 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].Estimated Marginal Means:
## environment = PSU2022_HighP:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       73.9 0.226 60     73.4     74.3
##  Inv4m      72.5 0.226 60     72.0     72.9
## 
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL       77.7 0.226 60     77.3     78.2
##  Inv4m      77.3 0.226 60     76.8     77.7
## 
## Confidence level used: 0.95 
## Pairwise Comparisons:
## environment = PSU2022_HighP:
##  contrast     estimate   SE df t.ratio p.value
##  CTRL - Inv4m    1.436 0.32 60   4.485  <.0001
## 
## environment = CLY2025:
##  contrast     estimate   SE df t.ratio p.value
##  CTRL - Inv4m    0.453 0.32 60   1.415  0.1621
## 
## 
## === Analyzing DTS ===
## ANOVA Results for DTS :
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## genotype              1  17.55   17.55  32.976 3.29e-07 ***
## environment           1 176.92  176.92 332.481  < 2e-16 ***
## genotype:environment  1   1.66    1.66   3.115   0.0827 .  
## Residuals            60  31.93    0.53                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Effect Sizes:
## # Effect Size for ANOVA (Type I)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## genotype             |           0.35 | [0.20, 1.00]
## environment          |           0.85 | [0.79, 1.00]
## genotype:environment |           0.05 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].Estimated Marginal Means:
## environment = PSU2022_HighP:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL      75.36 0.182 60    74.99    75.72
##  Inv4m     73.99 0.182 60    73.62    74.35
## 
## environment = CLY2025:
##  genotype emmean    SE df lower.CL upper.CL
##  CTRL      78.36 0.182 60    77.99    78.72
##  Inv4m     77.63 0.182 60    77.27    78.00
## 
## Confidence level used: 0.95 
## Pairwise Comparisons:
## environment = PSU2022_HighP:
##  contrast     estimate    SE df t.ratio p.value
##  CTRL - Inv4m    1.369 0.258 60   5.309  <.0001
## 
## environment = CLY2025:
##  contrast     estimate    SE df t.ratio p.value
##  CTRL - Inv4m    0.725 0.258 60   2.813  0.0066

3.3 GxE Interaction Effects

# Extract interaction p-values and effect sizes
gxe_interaction_summary <- data.frame(
  trait = shared_phenotypes,
  interaction_p = numeric(length(shared_phenotypes)),
  interaction_eta2 = numeric(length(shared_phenotypes)),
  genotype_p = numeric(length(shared_phenotypes)),
  environment_p = numeric(length(shared_phenotypes))
)

for (i in seq_along(shared_phenotypes)) {
  trait <- shared_phenotypes[i]
  
  # Extract p-values from ANOVA
  anova_df <- as.data.frame(gxe_anova_results[[trait]][[1]])
  gxe_interaction_summary$interaction_p[i] <- anova_df["genotype:environment", "Pr(>F)"]
  gxe_interaction_summary$genotype_p[i] <- anova_df["genotype", "Pr(>F)"]
  gxe_interaction_summary$environment_p[i] <- anova_df["environment", "Pr(>F)"]
  
  # Extract eta-squared for interaction
  interaction_row <- gxe_effect_sizes[[trait]] %>%
    filter(Parameter == "genotype:environment")
  gxe_interaction_summary$interaction_eta2[i] <- 
    if(nrow(interaction_row) > 0) interaction_row$Eta2[1] else NA
}

# Add significance indicators
gxe_interaction_summary <- gxe_interaction_summary %>%
  mutate(
    interaction_sig = case_when(
      interaction_p < 0.001 ~ "***",
      interaction_p < 0.01 ~ "**", 
      interaction_p < 0.05 ~ "*",
      TRUE ~ "ns"
    ),
    genotype_sig = case_when(
      genotype_p < 0.001 ~ "***",
      genotype_p < 0.01 ~ "**",
      genotype_p < 0.05 ~ "*", 
      TRUE ~ "ns"
    ),
    environment_sig = case_when(
      environment_p < 0.001 ~ "***",
      environment_p < 0.01 ~ "**",
      environment_p < 0.05 ~ "*",
      TRUE ~ "ns"
    )
  )

print("GxE Interaction Summary:")

## [1] "GxE Interaction Summary:"

print(gxe_interaction_summary)

##   trait interaction_p interaction_eta2 genotype_p environment_p interaction_sig
## 1    PH  4.258027e-06       0.29902143         NA  5.088033e-27             ***
## 2   DTA  3.392581e-02       0.07281053         NA  4.070043e-27               *
## 3   DTS  8.266769e-02       0.04935305         NA  3.765287e-26              ns
##   genotype_sig environment_sig
## 1           ns             ***
## 2           ns             ***
## 3           ns             ***

4. Phase 3: Comprehensive Visualization

4.1 Interaction Plots

# Calculate means and SE for interaction plots
interaction_stats <- gxe_data %>%
  pivot_longer(cols = all_of(shared_phenotypes), 
               names_to = "trait", values_to = "value") %>%
  group_by(trait, genotype, environment) %>%
  summarise(
    mean_value = mean(value, na.rm = TRUE),
    se_value = sd(value, na.rm = TRUE) / sqrt(n()),
    n = n(),
    .groups = "drop"
  )

pheno_theme <- ggpubr::theme_classic2(base_size = 20) +
  ggplot2::theme(
    plot.title = ggtext::element_markdown(hjust = 0.5, face ="bold"),
    axis.title.y = ggtext::element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text( angle=45, face="bold", color= "black", hjust = 1),
    strip.background = element_blank(),
    strip.text = ggtext::element_markdown(size=20),
    legend.position = "top")

interaction_stats$trait <- factor(interaction_stats$trait, levels = c("PH","DTA","DTS"))

interaction_stats$environment <- factor(interaction_stats$environment,
                                  levels = c( "CLY2025","PSU2022_HighP"))

# Create interaction plots
interaction_plot <- ggplot(interaction_stats, 
                          aes(x = environment, y = mean_value, 
                              color = genotype, group = genotype)) +
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = mean_value - se_value, ymax = mean_value + se_value),
                width = 0.1, size = 1) +
  scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
  facet_wrap(~ trait, scales = "free_y", ncol = 3) +
  labs(
    title = "**Genotype × Environment Interaction Effects, Mi21 donor**",
    subtitle = "Genotype means ± SE across environments",
    x = "Environment",
    y = "Trait Value",
    color = "Genotype"
  ) + pheno_theme

print(interaction_plot)

4.2 Effect Size Comparisons

# Calculate environment-specific effect sizes
env_effect_sizes <- bind_rows(
  psu2022_cohens_d %>% mutate(environment = "PSU2022_HighP"),
  cly2025_cohens_d %>% mutate(environment = "CLY2025")
) %>%
  select(trait, environment, magnitude, effsize, conf.low, conf.high) %>%
  mutate(trait = factor(trait, levels = c( "DTS","DTA","PH")))


pheno_theme2 <- ggpubr::theme_classic2(base_size = 20) +
  ggplot2::theme(
    plot.title = ggtext::element_markdown(hjust = 0.5, face ="bold"),
    axis.title.y = ggtext::element_markdown(),
    axis.title.x = element_blank(),
    axis.text.x = element_text( face="bold", color= "black"),
    strip.background = element_blank(),
    strip.text = ggtext::element_markdown(size=20),
    legend.position = "top")

# Forest plot of effect sizes
forest_plot <- env_effect_sizes  %>%
ggplot(aes(x = effsize, y = trait, color = environment)) +
  geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
  geom_errorbarh(aes(xmin = conf.low, 
                     xmax = conf.high),
                 height = 0.2, position = position_dodge(width = 0.4)) +
  geom_point(size = 4, position = position_dodge(width = 0.4)) +
  geom_text(aes(label = paste0("(", magnitude, ")")),
            position = position_dodge(width = 0.4),
            hjust = -0.2, size = 3) +
  scale_color_manual(values= c("tomato", "royalblue")) +
  labs(
    title = "**Effect Sizes (Cohen's d) of inv4m Across Environments**",
    subtitle = "Positive values indicate Inv4m > CTRL",
    x = "Cohen's d (Effect Size)",
    y = "Trait",
    color = "Environment"
  ) +
 pheno_theme2

print(forest_plot)

5. Summary and Export Results

5.1 Summary Tables

# Create comprehensive summary table
final_summary <- list(
  PSU2022_HighP = psu2022_summary,
  CLY2025_MI21 = cly2025_summary,
  GxE_Interactions = gxe_interaction_summary,
  Effect_Sizes_by_Environment = env_effect_sizes
)

# Print final summary
cat("=== FINAL SUMMARY ===\n\n")

## === FINAL SUMMARY ===

cat("1. PSU2022 High-P:\n")

## 1. PSU2022 High-P:

print(final_summary$PSU2022_HighP)

## # A tibble: 3 × 13
##   trait .y.   group1 group2    n1    n2 statistic    df         p    p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>     <dbl>    <dbl>
## 1 DTA   value CTRL   Inv4m     16    16      3.56  28.9 0.00129   0.00129 
## 2 DTS   value CTRL   Inv4m     16    16      4.83  29.1 0.0000408 0.000122
## 3 PH    value CTRL   Inv4m     16    16     -3.57  29.7 0.00123   0.00129 
## # ℹ 3 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>

cat("\n2. CLY2025 MI21:\n")

## 
## 2. CLY2025 MI21:

print(final_summary$CLY2025_MI21)

## # A tibble: 3 × 13
##   trait .y.   group1 group2    n1    n2 statistic    df       p   p.adj
##   <chr> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>   <dbl>   <dbl>
## 1 DTA   value CTRL   Inv4m     16    16      2.19  29.7 0.0362  0.0362 
## 2 DTS   value CTRL   Inv4m     16    16      3.16  30.0 0.00357 0.00536
## 3 PH    value CTRL   Inv4m     16    16      3.62  29.7 0.00109 0.00327
## # ℹ 3 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>

cat("\n3. GxE Interaction Effects:\n")

## 
## 3. GxE Interaction Effects:

print(final_summary$GxE_Interactions)

##   trait interaction_p interaction_eta2 genotype_p environment_p interaction_sig
## 1    PH  4.258027e-06       0.29902143         NA  5.088033e-27             ***
## 2   DTA  3.392581e-02       0.07281053         NA  4.070043e-27               *
## 3   DTS  8.266769e-02       0.04935305         NA  3.765287e-26              ns
##   genotype_sig environment_sig
## 1           ns             ***
## 2           ns             ***
## 3           ns             ***

cat("\n4. Effect Sizes by Environment:\n")

## 
## 4. Effect Sizes by Environment:

print(final_summary$Effect_Sizes_by_Environment)

## # A tibble: 6 × 6
##   trait environment   magnitude effsize conf.low conf.high
##   <fct> <chr>         <ord>       <dbl>    <dbl>     <dbl>
## 1 DTA   PSU2022_HighP large      -1.26     -2.31     -0.6 
## 2 DTS   PSU2022_HighP large      -1.71     -2.84     -1.06
## 3 PH    PSU2022_HighP large       1.26      0.52      2.21
## 4 DTA   CLY2025       moderate   -0.776    -1.64     -0.05
## 5 DTS   CLY2025       large      -1.12     -2.1      -0.41
## 6 PH    CLY2025       large      -1.28     -2.11     -0.64

5.2 Export Results

# Export summary tables
write.csv(final_summary$PSU2022_HighP, "PSU2022_HighP_inv4m_results.csv", row.names = FALSE)
write.csv(final_summary$CLY2025_MI21, "CLY2025_MI21_inv4m_results.csv", row.names = FALSE)
write.csv(final_summary$GxE_Interactions, "GxE_interaction_results.csv", row.names = FALSE)
write.csv(final_summary$Effect_Sizes_by_Environment, "effect_sizes_by_environment.csv", row.names = FALSE)

# Export combined dataset
write.csv(gxe_data, "combined_gxe_dataset.csv", row.names = FALSE)

cat("Results exported to CSV files:\n")

## Results exported to CSV files:

cat("- PSU2022_HighP_inv4m_results.csv\n")

## - PSU2022_HighP_inv4m_results.csv

cat("- CLY2025_MI21_inv4m_results.csv\n")

## - CLY2025_MI21_inv4m_results.csv

cat("- GxE_interaction_results.csv\n")

## - GxE_interaction_results.csv

cat("- effect_sizes_by_environment.csv\n")

## - effect_sizes_by_environment.csv

cat("- combined_gxe_dataset.csv\n")

## - combined_gxe_dataset.csv

6. Key Findings

cat("=== KEY FINDINGS ===\n\n")

## === KEY FINDINGS ===

# Identify significant GxE interactions
sig_interactions <- gxe_interaction_summary %>%
  filter(interaction_p < 0.05) %>%
  pull(trait)

if (length(sig_interactions) > 0) {
  cat("Traits with significant GxE interactions (p < 0.05):\n")
  cat(paste(sig_interactions, collapse = ", "), "\n\n")
} else {
  cat("No significant GxE interactions detected (p < 0.05)\n\n")
}

## Traits with significant GxE interactions (p < 0.05):
## PH, DTA

# Identify consistent effects across environments
consistent_effects <- env_effect_sizes %>%
  group_by(trait) %>%
  summarise(
    same_direction = all(sign(effsize) == sign(effsize[1])),
    min_effect = min(abs(effsize)),
    max_effect = max(abs(effsize)),
    .groups = "drop"
  )

cat("Traits with consistent inv4m effects across environments:\n")

## Traits with consistent inv4m effects across environments:

if (nrow(consistent_effects) > 0) {
  print(consistent_effects)
} else {
  cat("No traits show consistent effects across environments\n")
}

## # A tibble: 3 × 4
##   trait same_direction min_effect max_effect
##   <fct> <lgl>               <dbl>      <dbl>
## 1 DTS   TRUE                1.12        1.71
## 2 DTA   TRUE                0.776       1.26
## 3 PH    FALSE               1.26        1.28