This analysis examines Genotype × Environment (GxE) interactions for the inv4m introgression across two environments:
Research Questions: 1. Does inv4m show consistent effects across environments? 2. Which traits (PH, DTA, DTS) exhibit significant GxE interactions? 3. How does genetic background (donor) modify inv4m effects?
library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggtext)
library(ggbeeswarm)
library(emmeans)
library(effectsize)
library(broom)
library(patchwork)
library(viridis)
# Load spatially corrected datasets
psu2025 <- read.csv("PSU2025_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)
cly2025 <- read.csv("CLY2025_spatially_corrected_phenotypes.csv", stringsAsFactors = FALSE)
# Check data structure
cat("PSU2025 dimensions:", dim(psu2025), "\n")
## PSU2025 dimensions: 64 8
cat("CLY2025 dimensions:", dim(cly2025), "\n")
## CLY2025 dimensions: 64 17
cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")
## PSU2025 genotypes: Inv4m, CTRL
cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")
## CLY2025 genotypes: INV4M, CTRL
cat("PSU2025 donors:", paste(unique(psu2025$donor), collapse = ", "), "\n")
## PSU2025 donors: TMEX, MI21
cat("CLY2025 donors:", paste(unique(cly2025$donor), collapse = ", "), "\n")
## CLY2025 donors: TMEX, MI21
# Standardize donor names if needed
# Check if we need to standardize donor names
psu2025_donors_orig <- unique(psu2025$donor)
cly2025_donors_orig <- unique(cly2025$donor)
cat("\nChecking for donor name standardization needs...\n")
##
## Checking for donor name standardization needs...
cat("PSU2025 original donors:", paste(psu2025_donors_orig, collapse = ", "), "\n")
## PSU2025 original donors: TMEX, MI21
cat("CLY2025 original donors:", paste(cly2025_donors_orig, collapse = ", "), "\n")
## CLY2025 original donors: TMEX, MI21
# Apply any necessary donor standardization
# Donors are already standardized: TMEX, MI21 in both datasets
# No donor name changes needed
# Standardize genotype names - PSU2025 uses "Inv4m", CLY2025 uses "INV4M"
# Let's standardize both to "INV4M"
psu2025 <- psu2025 %>%
mutate(
genotype = case_when(
genotype == "Inv4m" ~ "INV4M",
genotype == "CTRL" ~ "CTRL",
TRUE ~ genotype
)
)
# CLY2025 genotypes are already correct (INV4M, CTRL)
# Report final names after standardization
cat("\nAfter standardization:\n")
##
## After standardization:
cat("PSU2025 genotypes:", paste(unique(psu2025$genotype), collapse = ", "), "\n")
## PSU2025 genotypes: INV4M, CTRL
cat("CLY2025 genotypes:", paste(unique(cly2025$genotype), collapse = ", "), "\n")
## CLY2025 genotypes: INV4M, CTRL
cat("PSU2025 donors:", paste(unique(psu2025$donor), collapse = ", "), "\n")
## PSU2025 donors: TMEX, MI21
cat("CLY2025 donors:", paste(unique(cly2025$donor), collapse = ", "), "\n")
## CLY2025 donors: TMEX, MI21
# Define shared phenotypes
shared_phenotypes <- c("PH", "DTA", "DTS")
# PSU2025 analysis for each donor separately
psu2025_results_list <- list()
# Process each donor
for (current_donor in c("TMEX", "MI21")) {
cat("\n=== PSU2025", current_donor, "Analysis ===\n")
# Filter PSU2025 to current donor and relevant genotypes
psu2025_donor <- psu2025 %>%
filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
select(plotId, genotype, all_of(shared_phenotypes)) %>%
mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
filter(complete.cases(.))
if (nrow(psu2025_donor) < 6) {
cat("Insufficient data for", current_donor, "(n =", nrow(psu2025_donor), ")\n")
next
}
cat("Sample size:", nrow(psu2025_donor), "plots\n")
print(table(psu2025_donor$genotype))
# Statistical analysis
psu_results <- psu2025_donor %>%
pivot_longer(cols = all_of(shared_phenotypes),
names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
t_test(value ~ genotype) %>%
adjust_pvalue(method = "fdr") %>%
add_significance("p.adj")
# Calculate effect sizes (Cohen's d) - using your original approach
psu_cohens_d <- psu2025_donor %>%
pivot_longer(cols = all_of(shared_phenotypes),
names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", ci = TRUE)
# Combine results
psu_summary <- psu_results %>%
left_join(psu_cohens_d %>% select(trait, effsize, magnitude), by = "trait") %>%
mutate(donor = current_donor, environment = "PSU2025")
psu2025_results_list[[current_donor]] <- psu_summary
cat("Results for", current_donor, ":\n")
print(psu_summary)
}
##
## === PSU2025 TMEX Analysis ===
## Sample size: 32 plots
##
## CTRL INV4M
## 16 16
## Results for TMEX :
## # A tibble: 3 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 2.08 30.0 4.6 e- 2 6.9 e- 2
## 2 DTS value CTRL INV4M 16 16 1.52 30.0 1.39e- 1 1.39e- 1
## 3 PH value CTRL INV4M 16 16 -17.7 28.7 5.28e-17 1.58e-16
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
##
## === PSU2025 MI21 Analysis ===
## Sample size: 32 plots
##
## CTRL INV4M
## 16 16
## Results for MI21 :
## # A tibble: 3 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 -1.92 24.7 0.0663 0.0994
## 2 DTS value CTRL INV4M 16 16 -0.584 25.8 0.564 0.564
## 3 PH value CTRL INV4M 16 16 -6.30 27.9 0.000000847 0.00000254
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
# Combine all PSU2025 results
psu2025_all_results <- bind_rows(psu2025_results_list)
# CLY2025 analysis for each donor separately
cly2025_results_list <- list()
# Process each donor
for (current_donor in c("TMEX", "MI21")) {
cat("\n=== CLY2025", current_donor, "Analysis ===\n")
# Filter CLY2025 to current donor and relevant genotypes
cly2025_donor <- cly2025 %>%
filter(donor == current_donor, genotype %in% c("CTRL", "INV4M")) %>%
select(plotId, genotype, all_of(shared_phenotypes)) %>%
mutate(genotype = factor(genotype, levels = c("CTRL", "INV4M"))) %>%
filter(complete.cases(.))
if (nrow(cly2025_donor) < 6) {
cat("Insufficient data for", current_donor, "(n =", nrow(cly2025_donor), ")\n")
next
}
cat("Sample size:", nrow(cly2025_donor), "plots\n")
print(table(cly2025_donor$genotype))
# Statistical analysis
cly_results <- cly2025_donor %>%
pivot_longer(cols = all_of(shared_phenotypes),
names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
t_test(value ~ genotype) %>%
adjust_pvalue(method = "fdr") %>%
add_significance("p.adj")
# Calculate effect sizes (Cohen's d)
cly_cohens_d <- cly2025_donor %>%
pivot_longer(cols = all_of(shared_phenotypes),
names_to = "trait", values_to = "value") %>%
group_by(trait) %>%
rstatix::cohens_d(value ~ genotype, ref.group = "INV4M", ci = TRUE)
# Combine results
cly_summary <- cly_results %>%
left_join(cly_cohens_d %>% select(trait, effsize, magnitude), by = "trait") %>%
mutate(donor = current_donor, environment = "CLY2025")
cly2025_results_list[[current_donor]] <- cly_summary
cat("Results for", current_donor, ":\n")
print(cly_summary)
}
##
## === CLY2025 TMEX Analysis ===
## Sample size: 32 plots
##
## CTRL INV4M
## 16 16
## Results for TMEX :
## # A tibble: 3 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 -8.05 29.3 0.00000000661 1.98e-8
## 2 DTS value CTRL INV4M 16 16 -7.86 26.2 0.000000023 3.45e-8
## 3 PH value CTRL INV4M 16 16 -4.70 25.5 0.0000766 7.66e-5
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
##
## === CLY2025 MI21 Analysis ===
## Sample size: 32 plots
##
## CTRL INV4M
## 16 16
## Results for MI21 :
## # A tibble: 3 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 2.19 29.7 0.0362 0.0362
## 2 DTS value CTRL INV4M 16 16 3.16 30.0 0.00357 0.00536
## 3 PH value CTRL INV4M 16 16 3.62 29.7 0.00109 0.00327
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
# Combine all CLY2025 results
cly2025_all_results <- bind_rows(cly2025_results_list)
# Find donors present in both environments
common_donors <- intersect(names(psu2025_results_list), names(cly2025_results_list))
cat("Common donors for GxE analysis:", paste(common_donors, collapse = ", "), "\n")
## Common donors for GxE analysis: TMEX, MI21
if (length(common_donors) == 0) {
stop("No common donors found between PSU2025 and CLY2025")
}
# Combine datasets for GxE analysis
gxe_data_list <- list()
for (donor in common_donors) {
cat("\n=== Preparing GxE data for", donor, "===\n")
# PSU2025 data
psu_donor <- psu2025 %>%
filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
select(plotId, genotype, all_of(shared_phenotypes)) %>%
mutate(environment = "PSU2025", donor = donor) %>%
filter(complete.cases(.))
# CLY2025 data
cly_donor <- cly2025 %>%
filter(donor == !!donor, genotype %in% c("CTRL", "INV4M")) %>%
select(plotId, genotype, all_of(shared_phenotypes)) %>%
mutate(environment = "CLY2025", donor = donor) %>%
filter(complete.cases(.))
# Combine
donor_gxe <- bind_rows(psu_donor, cly_donor) %>%
mutate(
genotype = factor(genotype, levels = c("CTRL", "INV4M")),
environment = factor(environment, levels = c("CLY2025", "PSU2025"))
)
gxe_data_list[[donor]] <- donor_gxe
cat("Sample sizes by environment and genotype:\n")
print(table(donor_gxe$environment, donor_gxe$genotype))
}
##
## === Preparing GxE data for TMEX ===
## Sample sizes by environment and genotype:
##
## CTRL INV4M
## CLY2025 16 16
## PSU2025 16 16
##
## === Preparing GxE data for MI21 ===
## Sample sizes by environment and genotype:
##
## CTRL INV4M
## CLY2025 16 16
## PSU2025 16 16
# Combine all donors for overall analysis
gxe_all_data <- bind_rows(gxe_data_list, .id = "donor")
gxe_results_by_donor <- list()
for (donor in common_donors) {
cat("\n=== GxE Analysis for", donor, "===\n")
donor_data <- gxe_data_list[[donor]]
gxe_anova_results <- list()
gxe_effect_sizes <- list()
gxe_emmeans_results <- list()
for (trait in shared_phenotypes) {
cat("\n--- Analyzing", trait, "for", donor, "---\n")
# Prepare data for this trait
trait_data <- donor_data %>%
select(plotId, genotype, environment, value = all_of(trait)) %>%
filter(!is.na(value))
if (nrow(trait_data) < 8) {
cat("Insufficient data for", trait, "in", donor, "\n")
next
}
# Two-way ANOVA: Genotype × Environment
anova_model <- aov(value ~ genotype * environment, data = trait_data)
anova_summary <- summary(anova_model)
# Effect sizes (eta-squared)
effect_sizes <- eta_squared(anova_model)
# Store results
gxe_anova_results[[trait]] <- anova_summary
gxe_effect_sizes[[trait]] <- effect_sizes
# Print ANOVA results
cat("ANOVA Results for", trait, "in", donor, ":\n")
print(anova_summary)
# Post-hoc analysis with emmeans
if (length(unique(trait_data$environment)) > 1 && length(unique(trait_data$genotype)) > 1) {
emmeans_result <- emmeans(anova_model, ~ genotype | environment)
emmeans_contrast <- pairs(emmeans_result)
gxe_emmeans_results[[trait]] <- list(
means = emmeans_result,
contrasts = emmeans_contrast
)
cat("Estimated Marginal Means:\n")
print(emmeans_result)
}
}
gxe_results_by_donor[[donor]] <- list(
anova = gxe_anova_results,
effect_sizes = gxe_effect_sizes,
emmeans = gxe_emmeans_results
)
}
##
## === GxE Analysis for TMEX ===
##
## --- Analyzing PH for TMEX ---
## ANOVA Results for PH in TMEX :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 4943 4943 182.12 < 2e-16 ***
## environment 1 31579 31579 1163.50 < 2e-16 ***
## genotype:environment 1 840 840 30.94 6.53e-07 ***
## Residuals 60 1628 27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 194 1.3 60 191 196
## INV4M 204 1.3 60 202 207
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 231 1.3 60 228 234
## INV4M 256 1.3 60 253 258
##
## Confidence level used: 0.95
##
## --- Analyzing DTA for TMEX ---
## ANOVA Results for DTA in TMEX :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 3.63 3.633 5.355 0.024099 *
## environment 1 9.74 9.742 14.360 0.000352 ***
## genotype:environment 1 23.59 23.586 34.764 1.83e-07 ***
## Residuals 60 40.71 0.678
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 77.7 0.206 60 77.2 78.1
## INV4M 79.3 0.206 60 78.9 79.8
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 79.7 0.206 60 79.2 80.1
## INV4M 78.9 0.206 60 78.5 79.3
##
## Confidence level used: 0.95
##
## --- Analyzing DTS for TMEX ---
## ANOVA Results for DTS in TMEX :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 4.35 4.353 3.846 0.0545 .
## environment 1 7.90 7.901 6.980 0.0105 *
## genotype:environment 1 25.15 25.155 22.224 1.49e-05 ***
## Residuals 60 67.91 1.132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 78.8 0.266 60 78.3 79.3
## INV4M 80.6 0.266 60 80.0 81.1
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 80.8 0.266 60 80.2 81.3
## INV4M 80.0 0.266 60 79.5 80.6
##
## Confidence level used: 0.95
##
## === GxE Analysis for MI21 ===
##
## --- Analyzing PH for MI21 ---
## ANOVA Results for PH in MI21 :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 36 36 2.785 0.1
## environment 1 42021 42021 3296.024 < 2e-16 ***
## genotype:environment 1 615 615 48.223 3.11e-09 ***
## Residuals 60 765 13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 187 0.893 60 185 189
## INV4M 183 0.893 60 181 184
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 232 0.893 60 230 234
## INV4M 240 0.893 60 238 242
##
## Confidence level used: 0.95
##
## --- Analyzing DTA for MI21 ---
## ANOVA Results for DTA in MI21 :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 0.06 0.061 0.115 0.73557
## environment 1 5.07 5.073 9.556 0.00302 **
## genotype:environment 1 4.24 4.241 7.989 0.00639 **
## Residuals 60 31.85 0.531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 77.71 0.182 60 77.34 78.07
## INV4M 77.26 0.182 60 76.89 77.62
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 77.76 0.182 60 77.39 78.12
## INV4M 78.33 0.182 60 77.97 78.70
##
## Confidence level used: 0.95
##
## --- Analyzing DTS for MI21 ---
## ANOVA Results for DTS in MI21 :
## Df Sum Sq Mean Sq F value Pr(>F)
## genotype 1 1.06 1.058 1.443 0.2344
## environment 1 1.50 1.498 2.043 0.1581
## genotype:environment 1 3.51 3.508 4.783 0.0326 *
## Residuals 60 44.00 0.733
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Estimated Marginal Means:
## environment = CLY2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 78.4 0.214 60 77.9 78.8
## INV4M 77.6 0.214 60 77.2 78.1
##
## environment = PSU2025:
## genotype emmean SE df lower.CL upper.CL
## CTRL 78.2 0.214 60 77.8 78.6
## INV4M 78.4 0.214 60 78.0 78.8
##
## Confidence level used: 0.95
# Extract interaction effects for each donor
gxe_interaction_summary_list <- list()
for (donor in common_donors) {
if (!donor %in% names(gxe_results_by_donor)) next
donor_results <- gxe_results_by_donor[[donor]]
interaction_summary <- data.frame(
donor = donor,
trait = shared_phenotypes,
interaction_p = NA,
interaction_eta2 = NA,
genotype_p = NA,
environment_p = NA
)
for (i in seq_along(shared_phenotypes)) {
trait <- shared_phenotypes[i]
if (trait %in% names(donor_results$anova)) {
# Extract p-values from ANOVA
anova_df <- as.data.frame(donor_results$anova[[trait]][[1]])
if ("genotype:environment" %in% rownames(anova_df)) {
interaction_summary$interaction_p[i] <- anova_df["genotype:environment", "Pr(>F)"]
}
if ("genotype" %in% rownames(anova_df)) {
interaction_summary$genotype_p[i] <- anova_df["genotype", "Pr(>F)"]
}
if ("environment" %in% rownames(anova_df)) {
interaction_summary$environment_p[i] <- anova_df["environment", "Pr(>F)"]
}
# Extract eta-squared for interaction
if (trait %in% names(donor_results$effect_sizes)) {
interaction_row <- donor_results$effect_sizes[[trait]] %>%
filter(Parameter == "genotype:environment")
interaction_summary$interaction_eta2[i] <-
if(nrow(interaction_row) > 0) interaction_row$Eta2[1] else NA
}
}
}
# Add significance indicators
interaction_summary <- interaction_summary %>%
mutate(
interaction_sig = case_when(
interaction_p < 0.001 ~ "***",
interaction_p < 0.01 ~ "**",
interaction_p < 0.05 ~ "*",
TRUE ~ "ns"
),
genotype_sig = case_when(
genotype_p < 0.001 ~ "***",
genotype_p < 0.01 ~ "**",
genotype_p < 0.05 ~ "*",
TRUE ~ "ns"
),
environment_sig = case_when(
environment_p < 0.001 ~ "***",
environment_p < 0.01 ~ "**",
environment_p < 0.05 ~ "*",
TRUE ~ "ns"
)
)
gxe_interaction_summary_list[[donor]] <- interaction_summary
cat("\nGxE Interaction Summary for", donor, ":\n")
print(interaction_summary)
}
##
## GxE Interaction Summary for TMEX :
## donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1 TMEX PH 6.530813e-07 0.3401953 NA NA
## 2 TMEX DTA 1.830926e-07 0.3668486 NA NA
## 3 TMEX DTS 1.487395e-05 0.2702889 NA NA
## interaction_sig genotype_sig environment_sig
## 1 *** ns ns
## 2 *** ns ns
## 3 *** ns ns
##
## GxE Interaction Summary for MI21 :
## donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1 MI21 PH 3.112875e-09 0.4455912 NA NA
## 2 MI21 DTA 6.385088e-03 0.1175011 NA NA
## 3 MI21 DTS 3.264420e-02 0.0738368 NA NA
## interaction_sig genotype_sig environment_sig
## 1 *** ns ns
## 2 ** ns ns
## 3 * ns ns
# Combine all donor summaries
gxe_all_interactions <- bind_rows(gxe_interaction_summary_list)
# Plot theme
pheno_theme <- theme_classic2(base_size = 16) +
theme(
plot.title = element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = element_markdown(),
axis.title.x = element_blank(),
axis.text.x = element_text(face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_markdown(size = 14),
legend.position = "none"
)
for (donor in common_donors) {
cat("\n=== Creating interaction plots for", donor, "===\n")
if (!donor %in% names(gxe_data_list)) next
# Calculate means and SE for interaction plots
interaction_stats <- gxe_data_list[[donor]] %>%
pivot_longer(cols = all_of(shared_phenotypes),
names_to = "trait", values_to = "value") %>%
group_by(trait, genotype, environment) %>%
summarise(
mean_value = mean(value, na.rm = TRUE),
se_value = sd(value, na.rm = TRUE) / sqrt(n()),
n = n(),
.groups = "drop"
) %>%
mutate(
trait = factor(trait, levels = shared_phenotypes),
environment = factor(environment, levels = c("CLY2025", "PSU2025"))
)
# Create interaction plot
interaction_plot <- ggplot(interaction_stats,
aes(x = environment, y = mean_value,
color = genotype, group = genotype)) +
geom_line(size = 1.2, alpha = 0.8) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = mean_value - se_value, ymax = mean_value + se_value),
width = 0.1, size = 1) +
scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
facet_wrap(~ trait, scales = "free_y", ncol = 3) +
labs(
title = paste("**Genotype × Environment Interaction Effects:**", donor, "**donor**"),
subtitle = "Genotype means ± SE across environments",
x = "Environment",
y = "Trait Value",
color = "Genotype"
) +
pheno_theme +
theme(legend.position = "top")
print(interaction_plot)
}
##
## === Creating interaction plots for TMEX ===
##
## === Creating interaction plots for MI21 ===
# Combine environment-specific effect sizes from both analyses
all_effect_sizes <- bind_rows(
psu2025_all_results %>% select(trait, donor, environment, magnitude, effsize),
cly2025_all_results %>% select(trait, donor, environment, magnitude, effsize)
) %>%
filter(donor %in% common_donors) %>%
mutate(
trait = factor(trait, levels = shared_phenotypes),
environment = factor(environment, levels = c("CLY2025", "PSU2025"))
)
# Forest plot of effect sizes
forest_plot <- ggplot(all_effect_sizes,
aes(x = effsize, y = interaction(trait, donor),
color = environment)) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.6) +
geom_point(size = 3, position = position_dodge(width = 0.4)) +
geom_text(aes(label = paste0("(", magnitude, ")")),
position = position_dodge(width = 0.4),
hjust = -0.2, size = 3) +
scale_color_manual(values = c("CLY2025" = "tomato", "PSU2025" = "royalblue")) +
labs(
title = "**Effect Sizes (Cohen's d) of inv4m Across Environments**",
subtitle = "Positive values indicate Inv4m > CTRL",
x = "Cohen's d (Effect Size)",
y = "Trait × Donor",
color = "Environment"
) +
pheno_theme +
theme(
legend.position = "top",
axis.text.y = element_text(size = 10)
)
print(forest_plot)
# Create heatmap of interaction p-values
if (nrow(gxe_all_interactions) > 0) {
heatmap_data <- gxe_all_interactions %>%
select(donor, trait, interaction_p) %>%
mutate(
log_p = -log10(interaction_p),
significance = case_when(
interaction_p < 0.001 ~ "p < 0.001",
interaction_p < 0.01 ~ "p < 0.01",
interaction_p < 0.05 ~ "p < 0.05",
TRUE ~ "ns"
)
)
heatmap_plot <- ggplot(heatmap_data,
aes(x = trait, y = donor, fill = log_p)) +
geom_tile(color = "white", size = 0.5) +
geom_text(aes(label = significance), size = 3, fontface = "bold") +
scale_fill_viridis_c(name = "-log10(p)", option = "plasma") +
labs(
title = "**GxE Interaction Significance Heatmap**",
subtitle = "Darker colors indicate stronger evidence for GxE interactions",
x = "Trait",
y = "Donor"
) +
pheno_theme +
theme(
axis.text.x = element_text(angle = 0),
legend.position = "right"
)
print(heatmap_plot)
}
cat("=== FINAL SUMMARY ===\n\n")
## === FINAL SUMMARY ===
cat("1. PSU2025 Results by Donor:\n")
## 1. PSU2025 Results by Donor:
if (nrow(psu2025_all_results) > 0) {
print(psu2025_all_results)
} else {
cat("No PSU2025 results available\n")
}
## # A tibble: 6 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 2.08 30.0 4.6 e- 2 6.9 e- 2
## 2 DTS value CTRL INV4M 16 16 1.52 30.0 1.39e- 1 1.39e- 1
## 3 PH value CTRL INV4M 16 16 -17.7 28.7 5.28e-17 1.58e-16
## 4 DTA value CTRL INV4M 16 16 -1.92 24.7 6.63e- 2 9.94e- 2
## 5 DTS value CTRL INV4M 16 16 -0.584 25.8 5.64e- 1 5.64e- 1
## 6 PH value CTRL INV4M 16 16 -6.30 27.9 8.47e- 7 2.54e- 6
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
cat("\n2. CLY2025 Results by Donor:\n")
##
## 2. CLY2025 Results by Donor:
if (nrow(cly2025_all_results) > 0) {
print(cly2025_all_results)
} else {
cat("No CLY2025 results available\n")
}
## # A tibble: 6 × 15
## trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 DTA value CTRL INV4M 16 16 -8.05 29.3 0.00000000661 1.98e-8
## 2 DTS value CTRL INV4M 16 16 -7.86 26.2 0.000000023 3.45e-8
## 3 PH value CTRL INV4M 16 16 -4.70 25.5 0.0000766 7.66e-5
## 4 DTA value CTRL INV4M 16 16 2.19 29.7 0.0362 3.62e-2
## 5 DTS value CTRL INV4M 16 16 3.16 30.0 0.00357 5.36e-3
## 6 PH value CTRL INV4M 16 16 3.62 29.7 0.00109 3.27e-3
## # ℹ 5 more variables: p.adj.signif <chr>, effsize <dbl>, magnitude <ord>,
## # donor <chr>, environment <chr>
cat("\n3. GxE Interaction Effects:\n")
##
## 3. GxE Interaction Effects:
if (nrow(gxe_all_interactions) > 0) {
print(gxe_all_interactions)
} else {
cat("No GxE interaction results available\n")
}
## donor trait interaction_p interaction_eta2 genotype_p environment_p
## 1 TMEX PH 6.530813e-07 0.3401953 NA NA
## 2 TMEX DTA 1.830926e-07 0.3668486 NA NA
## 3 TMEX DTS 1.487395e-05 0.2702889 NA NA
## 4 MI21 PH 3.112875e-09 0.4455912 NA NA
## 5 MI21 DTA 6.385088e-03 0.1175011 NA NA
## 6 MI21 DTS 3.264420e-02 0.0738368 NA NA
## interaction_sig genotype_sig environment_sig
## 1 *** ns ns
## 2 *** ns ns
## 3 *** ns ns
## 4 *** ns ns
## 5 ** ns ns
## 6 * ns ns
# Export summary tables
if (nrow(psu2025_all_results) > 0) {
write.csv(psu2025_all_results, "PSU2025_inv4m_results_by_donor.csv", row.names = FALSE)
}
if (nrow(cly2025_all_results) > 0) {
write.csv(cly2025_all_results, "CLY2025_inv4m_results_by_donor.csv", row.names = FALSE)
}
if (nrow(gxe_all_interactions) > 0) {
write.csv(gxe_all_interactions, "CLY25_PSU25_GxE_interaction_results.csv", row.names = FALSE)
}
if (nrow(all_effect_sizes) > 0) {
write.csv(all_effect_sizes, "CLY25_PSU25_effect_sizes_by_environment.csv", row.names = FALSE)
}
# Export combined dataset
if (nrow(gxe_all_data) > 0) {
write.csv(gxe_all_data, "CLY25_PSU25_combined_gxe_dataset.csv", row.names = FALSE)
}
cat("Results exported to CSV files\n")
## Results exported to CSV files
cat("=== KEY FINDINGS ===\n\n")
## === KEY FINDINGS ===
if (nrow(gxe_all_interactions) > 0) {
# Identify significant GxE interactions
sig_interactions <- gxe_all_interactions %>%
filter(interaction_p < 0.05, !is.na(interaction_p))
if (nrow(sig_interactions) > 0) {
cat("Significant GxE interactions (p < 0.05):\n")
print(sig_interactions %>% select(donor, trait, interaction_p, interaction_sig))
} else {
cat("No significant GxE interactions detected (p < 0.05)\n")
}
# Summary by donor
cat("\nSummary by donor:\n")
donor_summary <- gxe_all_interactions %>%
group_by(donor) %>%
summarise(
traits_tested = sum(!is.na(interaction_p)),
significant_interactions = sum(interaction_p < 0.05, na.rm = TRUE),
.groups = "drop"
)
print(donor_summary)
} else {
cat("No GxE analysis results available\n")
}
## Significant GxE interactions (p < 0.05):
## donor trait interaction_p interaction_sig
## 1 TMEX PH 6.530813e-07 ***
## 2 TMEX DTA 1.830926e-07 ***
## 3 TMEX DTS 1.487395e-05 ***
## 4 MI21 PH 3.112875e-09 ***
## 5 MI21 DTA 6.385088e-03 **
## 6 MI21 DTS 3.264420e-02 *
##
## Summary by donor:
## # A tibble: 2 × 3
## donor traits_tested significant_interactions
## <chr> <int> <int>
## 1 MI21 3 3
## 2 TMEX 3 3
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Bogota
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] viridis_0.6.5 viridisLite_0.4.2 patchwork_1.3.2 broom_1.0.9
## [5] effectsize_1.0.1 emmeans_1.11.2 ggbeeswarm_0.7.2 ggtext_0.1.2
## [9] ggpubr_0.6.1 rstatix_0.7.2 lubridate_1.9.4 forcats_1.0.0
## [13] stringr_1.5.1 dplyr_1.1.4 purrr_1.1.0 readr_2.1.5
## [17] tidyr_1.3.1 tibble_3.3.0 ggplot2_3.5.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 beeswarm_0.4.0 xfun_0.53 bslib_0.9.0
## [5] bayestestR_0.16.1 insight_1.4.0 tzdb_0.5.0 vctrs_0.6.5
## [9] tools_4.5.1 generics_0.1.4 datawizard_1.2.0 pkgconfig_2.0.3
## [13] RColorBrewer_1.1-3 lifecycle_1.0.4 compiler_4.5.1 farver_2.1.2
## [17] carData_3.0-5 litedown_0.7 vipor_0.4.7 htmltools_0.5.8.1
## [21] sass_0.4.10 yaml_2.3.10 Formula_1.2-5 pillar_1.11.0
## [25] car_3.1-3 jquerylib_0.1.4 cachem_1.1.0 boot_1.3-31
## [29] abind_1.4-8 commonmark_2.0.0 tidyselect_1.2.1 digest_0.6.37
## [33] mvtnorm_1.3-3 stringi_1.8.7 labeling_0.4.3 fastmap_1.2.0
## [37] grid_4.5.1 cli_3.6.5 magrittr_2.0.3 utf8_1.2.6
## [41] withr_3.0.2 scales_1.4.0 backports_1.5.0 timechange_0.3.0
## [45] estimability_1.5.1 rmarkdown_2.29 gridExtra_2.3 ggsignif_0.6.4
## [49] hms_1.1.3 evaluate_1.0.4 knitr_1.50 parameters_0.28.0
## [53] markdown_2.0 rlang_1.1.6 gridtext_0.1.5 Rcpp_1.1.0
## [57] xtable_1.8-4 glue_1.8.0 xml2_1.4.0 rstudioapi_0.17.1
## [61] jsonlite_2.0.0 R6_2.6.1