PSU 2022 Spatial Analysis: SpATS Spatial Correction for Both P Treatments

Overview

This notebook fits a SpATS PSANOVA surface per phenotype and treatment and returns spatially-corrected plot-level values for each phenotype. The approach uses statgenHTP::fitModels() (which wraps SpATS when provided a TimePoints object) and getCorrected() to extract corrected plot-level observations. The workflow:

Load and clean data
Create plot_data (plot-level means)
Build TimePoints objects for both +P and -P treatments
Loop over usable phenotypes and apply fitModels() → getCorrected() for each treatment
Merge corrected values from both treatments into a combined dataset
Perform second-stage treatment × genotype analysis
Save corrected table for downstream analyses

Warning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.

1. Load libraries

library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme)          # Mixed-effects models (if needed)
library(gstat)         # Variograms
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS)         # SpATS package (used internally by statgenHTP)
library(statgenHTP)    # Provides fitModels() and getCorrected()

2. Load and prepare data

DATA_DIR <- "/Users/fvrodriguez/Desktop/inv4mHighland/data"
plant_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
ear_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field_ear_pheno.csv")

field_data_raw <- read.csv(plant_csv, na.strings = c("", "#N/A", "NA"))
ear_data_raw <- read.csv(ear_csv, na.strings = c("", "n/a", "NA"), skip = 1)

field_data <- field_data_raw %>%
  filter(P22. >= 3004, P22. <= 4192) %>%
  rename(
    rowid = P22.,
    Genotype = Who.What,
    PH = Height_Anthesis,
    STW40 = X40_DAP_dw,
    STW50 = X50_DAP_dw, 
    STW60 = X60_DAP_dw,
    STWHV = harvest_dw
  ) %>%
  mutate(
    # Set +P as reference level (first level)
    Treatment = factor(Treatment, levels = c("HighP", "LowP")),
    Genotype = factor(Genotype),
    Rep = as.factor(Rep)
  ) %>%
  mutate(
    Treatment = fct_recode(Treatment, "+P" = "HighP", "-P" = "LowP"),
    # Set CTRL as reference genotype
    Genotype = fct_recode(Genotype, "Inv4m" = "INV4M"),
    Genotype = fct_relevel(Genotype, "CTRL")
  )

ear_data <- ear_data_raw %>%
  select(-description, -RK, -CC, -NIR, ear_rep = rep) %>%
  rename(rowid = row) %>%
  arrange(rowid) %>%
  group_by(rowid) %>%
  select(-ear_rep) %>%
  summarise_all(mean, na.rm = TRUE) %>%
  droplevels()

field_data_complete <- field_data %>%
  left_join(ear_data, by = "rowid") %>%
  mutate(HI = TKW / STWHV) %>%
  mutate(
    Plot_Column = case_when(
      Plot == "PlotVIII" ~ Plot_Column + 10,
      Plot == "PlotVI" ~ Plot_Column,
      TRUE ~ Plot_Column
    )
  ) %>%
  filter(!is.na(PH)) %>%
  droplevels()

# Define phenotypes
base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")
all_phenotypes <- unique(c(base_phenotypes, ear_phenotypes))

plot_data <- field_data_complete %>%
  group_by(rowid, Rep, Plot_Row, Plot_Column, Genotype, Treatment) %>%
  summarise(
    across(all_of(all_phenotypes), ~ mean(.x, na.rm = TRUE)),
    n_plants = n(),
    .groups = 'drop'
  ) %>%
  mutate(across(all_of(all_phenotypes), ~ ifelse(is.nan(.x), NA, .x)))

plot_data$Plot_Row <- as.numeric(plot_data$Plot_Row)
plot_data$Plot_Column <- as.numeric(plot_data$Plot_Column)
plot_data$Rep <- as.factor(plot_data$Rep)

cat("Plot-level summary dimensions:", dim(plot_data), "\n")

## Plot-level summary dimensions: 127 23

cat("Average plants per plot:", round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")

## Average plants per plot: 1

3. Identify usable phenotypes (<50% missing)

missing_summary <- plot_data %>%
  select(all_of(all_phenotypes)) %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
  mutate(
    total_obs = nrow(plot_data),
    missing_pct = round(missing_count / total_obs * 100, 1),
    available_n = total_obs - missing_count
  ) %>%
  arrange(missing_pct)

print(missing_summary)

## # A tibble: 16 × 5
##    phenotype missing_count total_obs missing_pct available_n
##    <chr>             <int>     <int>       <dbl>       <int>
##  1 PH                    0       127         0           127
##  2 DTA                   0       127         0           127
##  3 DTS                   0       127         0           127
##  4 STW40                 2       127         1.6         125
##  5 STWHV                 2       127         1.6         125
##  6 STW50                15       127        11.8         112
##  7 STW60                20       127        15.7         107
##  8 EW                   22       127        17.3         105
##  9 EL                   22       127        17.3         105
## 10 ED                   22       127        17.3         105
## 11 KRN                  22       127        17.3         105
## 12 CD                   22       127        17.3         105
## 13 TKW                  22       127        17.3         105
## 14 KW50                 22       127        17.3         105
## 15 TKN                  22       127        17.3         105
## 16 HI                   22       127        17.3         105

usable_phenotypes <- missing_summary %>%
  filter(missing_pct < 50) %>%
  pull(phenotype)

cat("Usable phenotypes (<50% missing):", paste(usable_phenotypes, collapse = ", "), "\n")

## Usable phenotypes (<50% missing): PH, DTA, DTS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

4. Spatial correction for +P treatment

# Prepare data for TimePoints
single_time <- plot_data %>%
  mutate(
    Genotype = as.character(Genotype),
    Treatment = as.character(Treatment),
    Rep = as.character(Rep),
    Plot_Row = as.numeric(Plot_Row),
    Plot_Column = as.numeric(Plot_Column)
  ) %>%
  mutate(time = as.POSIXct("2022-05-22 10:00:00"))

# Create TimePoints object for +P treatment
tp_data_plus_p <- createTimePoints(
  dat = single_time %>% filter(Treatment == "+P"),
  experimentName = "PSU2022_PlusP",
  genotype = "Genotype",
  timePoint = "time",
  plotId = "rowid",
  repId = "Rep",
  rowNum = "Plot_Row",
  colNum = "Plot_Column",
  checkGenotypes = c("NUE", "B73"),
  addCheck = TRUE
)

summary(tp_data_plus_p)

# Initialize lists for +P treatment
corrected_list_plus_p <- list()
fit_info_plus_p <- list()
all_models_plus_p <- list()

for (phen in usable_phenotypes) {
  message("Fitting SpATS model for +P treatment, phenotype: ", phen)
  
  # Check data availability
  no_nas_plus_p <- single_time %>%
    filter(Treatment == "+P", !is.na(.data[[phen]]))
  
  if (nrow(no_nas_plus_p) < 10) {
    message("Insufficient data for ", phen, " in +P treatment (n=", nrow(no_nas_plus_p), ")")
    next
  }
  
  # Fit model
  fit_try <- try(
    fitModels(
      TP = tp_data_plus_p,
      trait = phen,
      timePoints = 1,
      what = "fixed"
    ),
    silent = TRUE
  )
  
  if (inherits(fit_try, "try-error")) {
    message("Fit failed for ", phen, " in +P treatment: ", fit_try)
    next
  }
  
  all_models_plus_p[[phen]] <- fit_try
  
  # Extract corrected values
  corr <- try(getCorrected(fit_try), silent = TRUE)
  if (inherits(corr, "try-error") || is.null(corr)) {
    message("getCorrected() failed for ", phen, " in +P treatment")
    next
  } 
  
  corr <- corr %>% select(-phen) 
  corrected_list_plus_p[[phen]] <- corr
  fit_info_plus_p[[phen]] <- list(n_obs = nrow(no_nas_plus_p), status = "success")
}

# Combine all corrected data for +P treatment
sp_corrected_plus_p <- lapply(
  corrected_list_plus_p,
  function(corr_df) {
    corr_df %>%
      select(timeNumber, timePoint, genotype, rowId, colId, plotId) 
  }
) %>%
  bind_rows() %>%
  arrange(plotId) %>%
  distinct()  

# Join each corrected phenotype for +P
for (phen in names(corrected_list_plus_p)) {
  phen_corr <- paste0(phen, "_corr")
  corr_df <- corrected_list_plus_p[[phen]] %>%
    select(plotId, all_of(phen_corr))
  sp_corrected_plus_p <- left_join(
    sp_corrected_plus_p,
    corr_df,
    by = "plotId"
  )
}

# Clean column names (remove _corr suffix)
colnames(sp_corrected_plus_p) <- gsub(
  "_corr", "", 
  colnames(sp_corrected_plus_p)
)

# Add treatment information
sp_corrected_plus_p <- sp_corrected_plus_p %>% 
  inner_join(
    tp_data_plus_p[[1]] %>% select(plotId, Treatment),
    by = "plotId"
  ) %>%
  select(timeNumber:genotype, Treatment, everything())

cat("Final sp_corrected_plus_p dimensions:", dim(sp_corrected_plus_p), "\n")

5. Spatial correction for -P treatment

# Create TimePoints object for -P treatment
tp_data_minus_p <- createTimePoints(
  dat = single_time %>% filter(Treatment == "-P"),
  experimentName = "PSU2022_MinusP",
  genotype = "Genotype",
  timePoint = "time",
  plotId = "rowid",
  repId = "Rep",
  rowNum = "Plot_Row",
  colNum = "Plot_Column",
  checkGenotypes = c("NUE", "B73"),
  addCheck = TRUE
)

summary(tp_data_minus_p)

# Initialize lists for -P treatment
corrected_list_minus_p <- list()
fit_info_minus_p <- list()
all_models_minus_p <- list()

for (phen in usable_phenotypes) {
  message("Fitting SpATS model for -P treatment, phenotype: ", phen)
  
  # Check if we have enough data for this phenotype in -P
  no_nas_minus_p <- single_time %>%
    filter(Treatment == "-P", !is.na(.data[[phen]]))
  
  if (nrow(no_nas_minus_p) < 10) {
    message("Insufficient data for ", phen, " in -P treatment (n=", nrow(no_nas_minus_p), ")")
    next
  }
  
  # Fit model
  fit_try <- try(
    fitModels(
      TP = tp_data_minus_p,
      trait = phen,
      timePoints = 1,
      what = "fixed"
    ),
    silent = TRUE
  )
  
  if (inherits(fit_try, "try-error")) {
    message("Fit failed for ", phen, " in -P treatment: ", fit_try)
    next
  }
  
  all_models_minus_p[[phen]] <- fit_try
  
  # Extract corrected values
  corr <- try(getCorrected(fit_try), silent = TRUE)
  if (inherits(corr, "try-error") || is.null(corr)) {
    message("getCorrected() failed for ", phen, " in -P treatment")
    next
  } 
  
  corr <- corr %>% select(-phen) 
  corrected_list_minus_p[[phen]] <- corr
  fit_info_minus_p[[phen]] <- list(n_obs = nrow(no_nas_minus_p), status = "success")
}

# Combine all corrected data for -P treatment
sp_corrected_minus_p <- lapply(
  corrected_list_minus_p,
  function(corr_df) {
    corr_df %>%
      select(timeNumber, timePoint, genotype, rowId, colId, plotId) 
  }
) %>%
  bind_rows() %>%
  arrange(plotId) %>%
  distinct()  

# Join each corrected phenotype for -P
for (phen in names(corrected_list_minus_p)) {
  phen_corr <- paste0(phen, "_corr")
  corr_df <- corrected_list_minus_p[[phen]] %>%
    select(plotId, all_of(phen_corr))
  sp_corrected_minus_p <- left_join(
    sp_corrected_minus_p,
    corr_df,
    by = "plotId"
  )
}

# Clean column names (remove _corr suffix)
colnames(sp_corrected_minus_p) <- gsub(
  "_corr", "", 
  colnames(sp_corrected_minus_p)
)

# Add treatment information
sp_corrected_minus_p <- sp_corrected_minus_p %>% 
  inner_join(
    tp_data_minus_p[[1]] %>% select(plotId, Treatment),
    by = "plotId"
  ) %>%
  select(timeNumber:genotype, Treatment, everything())

cat("Final sp_corrected_minus_p dimensions:", dim(sp_corrected_minus_p), "\n")

6. Merge both spatially corrected datasets

# Combine +P and -P corrected data
sp_corrected_combined <- bind_rows(
  sp_corrected_plus_p,    # +P treatment
  sp_corrected_minus_p    # -P treatment
) %>%
  arrange(plotId)

cat("Combined sp_corrected dimensions:", dim(sp_corrected_combined), "\n")

## Combined sp_corrected dimensions: 127 23

cat("Treatment distribution:\n")

## Treatment distribution:

table(sp_corrected_combined$Treatment)

## 
## -P +P 
## 63 64

# Verify we have data for both treatments
treatment_summary <- sp_corrected_combined %>%
  group_by(Treatment) %>%
  summarise(
    n_plots = n(),
    n_genotypes = n_distinct(genotype),
    .groups = "drop"
  )

print(treatment_summary)

## # A tibble: 2 × 3
##   Treatment n_plots n_genotypes
##   <chr>       <int>       <int>
## 1 +P             64           4
## 2 -P             63           4

# Check for overlapping phenotypes between treatments
plus_p_phenos <- names(sp_corrected_plus_p)[!names(sp_corrected_plus_p) %in% 
                                            c("timeNumber", "timePoint", "genotype", 
                                              "rowId", "colId", "plotId", "Treatment")]
minus_p_phenos <- names(sp_corrected_minus_p)[!names(sp_corrected_minus_p) %in% 
                                             c("timeNumber", "timePoint", "genotype", 
                                               "rowId", "colId", "plotId", "Treatment")]

common_phenos <- intersect(plus_p_phenos, minus_p_phenos)
cat("Phenotypes corrected in both treatments:", paste(common_phenos, collapse = ", "), "\n")

## Phenotypes corrected in both treatments: PH, DTA, DTS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI

7. Spatial correction diagnostic plots

# Example spatial diagnostic plots for key phenotypes
key_phenotypes <- intersect(c("PH", "HI", "DTS", "TKW"), common_phenos)

for (phen in key_phenotypes[1:2]) {  # Limit to first 2 to avoid too many plots
  if (phen %in% names(all_models_plus_p)) {
    cat("\n### +P Treatment:", phen, "\n")
    plot(all_models_plus_p[[phen]], timePoints = 1)
    plot(all_models_plus_p[[phen]], timePoints = 1, plotType = "spatial", spaTrend = "raw")
  }
  
  if (phen %in% names(all_models_minus_p)) {
    cat("\n### -P Treatment:", phen, "\n")
    plot(all_models_minus_p[[phen]], timePoints = 1)
    plot(all_models_minus_p[[phen]], timePoints = 1, plotType = "spatial", spaTrend = "raw")
  }
}

## 
## ### +P Treatment: PH

## 
## ### -P Treatment: PH

## 
## ### +P Treatment: HI

## 
## ### -P Treatment: HI

8. Second-stage analysis: Treatment and Genotype × Treatment effects

# Filter to NILs for treatment × genotype analysis
nil_corrected_combined <- sp_corrected_combined %>%
  filter(genotype %in% c("Inv4m", "CTRL")) %>%
  mutate(
    # Ensure proper factor levels with +P and CTRL as references
    Treatment = factor(Treatment, levels = c("+P", "-P")),
    genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
  ) %>%
  droplevels()

cat("NIL data dimensions:", dim(nil_corrected_combined), "\n")

## NIL data dimensions: 64 23

cat("Treatment × Genotype combinations:\n")

## Treatment × Genotype combinations:

table(nil_corrected_combined$Treatment, nil_corrected_combined$genotype)

##     
##      CTRL Inv4m
##   +P   16    16
##   -P   16    16

# Extract model coefficients for all common phenotypes
model_coefficients <- list()

cat("\n=== Treatment and Genotype × Treatment Models ===\n")

## 
## === Treatment and Genotype × Treatment Models ===

for (phen in common_phenos) {
  if (sum(!is.na(nil_corrected_combined[[phen]])) < 20) {
    message("Skipping ", phen, " - insufficient data")
    next
  }
  
  model_formula <- as.formula(paste(phen, "~ genotype * Treatment"))
  
  lm_result <- try(
    lm(model_formula, data = nil_corrected_combined),
    silent = TRUE
  )
  
  if (!inherits(lm_result, "try-error")) {
    # Extract coefficients
    coef_summary <- summary(lm_result)$coefficients
    coef_df <- data.frame(
      trait = phen,
      term = rownames(coef_summary),
      estimate = coef_summary[, "Estimate"],
      std_error = coef_summary[, "Std. Error"],
      t_value = coef_summary[, "t value"],
      p_value = coef_summary[, "Pr(>|t|)"],
      stringsAsFactors = FALSE
    )
    
    model_coefficients[[phen]] <- coef_df
    
    cat("\n=== ", phen, " ===\n")
    print(summary(lm_result))
  }
}

## 
## ===  PH  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8349 -2.7792  0.4735  2.8230  9.6399 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                161.658      1.047 154.406  < 2e-16 ***
## genotypeInv4m                5.570      1.481   3.762 0.000385 ***
## Treatment-P                  2.588      1.481   1.748 0.085629 .  
## genotypeInv4m:Treatment-P    1.677      2.094   0.801 0.426346    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.188 on 60 degrees of freedom
## Multiple R-squared:  0.4486, Adjusted R-squared:  0.421 
## F-statistic: 16.27 on 3 and 60 DF,  p-value: 7.49e-08
## 
## 
## ===  DTA  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.35736 -0.70516 -0.01931  0.60844  2.88214 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                72.1200     0.2599 277.458  < 2e-16 ***
## genotypeInv4m              -1.4778     0.3676  -4.020 0.000165 ***
## Treatment-P                 3.4329     0.3676   9.339 2.67e-13 ***
## genotypeInv4m:Treatment-P   0.3374     0.5199   0.649 0.518774    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.04 on 60 degrees of freedom
## Multiple R-squared:  0.784,  Adjusted R-squared:  0.7732 
## F-statistic: 72.59 on 3 and 60 DF,  p-value: < 2.2e-16
## 
## 
## ===  DTS  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.30539 -0.58763  0.05908  0.52957  2.51458 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                73.6544     0.2277 323.494  < 2e-16 ***
## genotypeInv4m              -1.2842     0.3220  -3.988 0.000183 ***
## Treatment-P                 3.0621     0.3220   9.510 1.38e-13 ***
## genotypeInv4m:Treatment-P   0.7087     0.4554   1.556 0.124886    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9107 on 60 degrees of freedom
## Multiple R-squared:  0.8028, Adjusted R-squared:  0.7929 
## F-statistic: 81.42 on 3 and 60 DF,  p-value: < 2.2e-16
## 
## 
## ===  STW40  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.338 -1.303 -0.032  1.450  6.476 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                12.0737     0.7239  16.679  < 2e-16 ***
## genotypeInv4m              -2.9345     1.0076  -2.912  0.00506 ** 
## Treatment-P                -6.5170     1.0076  -6.468 2.14e-08 ***
## genotypeInv4m:Treatment-P   2.8619     1.4134   2.025  0.04741 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.804 on 59 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.5015, Adjusted R-squared:  0.4761 
## F-statistic: 19.78 on 3 and 59 DF,  p-value: 5.389e-09
## 
## 
## ===  STWHV  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.370  -7.546   1.087   5.429  43.449 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                106.989      3.433  31.163  < 2e-16 ***
## genotypeInv4m              -11.655      4.701  -2.479   0.0161 *  
## Treatment-P                -22.407      4.701  -4.766  1.3e-05 ***
## genotypeInv4m:Treatment-P    7.421      6.537   1.135   0.2609    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.85 on 58 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3988, Adjusted R-squared:  0.3677 
## F-statistic: 12.82 on 3 and 58 DF,  p-value: 1.557e-06
## 
## 
## ===  STW50  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.2540  -2.2394   0.1365   2.5199  11.9398 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 35.984      1.159  31.060   <2e-16 ***
## genotypeInv4m               -4.113      1.638  -2.510   0.0152 *  
## Treatment-P                -18.759      1.583 -11.851   <2e-16 ***
## genotypeInv4m:Treatment-P    4.620      2.222   2.079   0.0425 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.177 on 53 degrees of freedom
##   (7 observations deleted due to missingness)
## Multiple R-squared:  0.8097, Adjusted R-squared:  0.7989 
## F-statistic: 75.15 on 3 and 53 DF,  p-value: < 2.2e-16
## 
## 
## ===  STW60  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.086  -7.884  -0.080   4.937  35.809 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 77.003      3.517  21.896  < 2e-16 ***
## genotypeInv4m               -2.906      5.076  -0.572    0.570    
## Treatment-P                -33.343      4.973  -6.704 1.74e-08 ***
## genotypeInv4m:Treatment-P   10.807      6.941   1.557    0.126    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12.68 on 50 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.5715, Adjusted R-squared:  0.5458 
## F-statistic: 22.23 on 3 and 50 DF,  p-value: 2.768e-09
## 
## 
## ===  EW  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -54.605 -10.703   1.946  12.888  21.022 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 86.460      4.788  18.056   <2e-16 ***
## genotypeInv4m               12.388      6.399   1.936   0.0587 .  
## Treatment-P                 -2.487      6.304  -0.395   0.6949    
## genotypeInv4m:Treatment-P   -7.283      8.784  -0.829   0.4111    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.88 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1261, Adjusted R-squared:  0.07264 
## F-statistic: 2.358 on 3 and 49 DF,  p-value: 0.08307
## 
## 
## ===  EL  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6282 -0.2277  0.0289  0.4594  1.3351 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                14.7579     0.2905  50.804   <2e-16 ***
## genotypeInv4m               0.8936     0.3882   2.302   0.0256 *  
## Treatment-P                 0.8272     0.3824   2.163   0.0354 *  
## genotypeInv4m:Treatment-P  -1.0603     0.5329  -1.990   0.0522 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9634 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1154, Adjusted R-squared:  0.06126 
## F-statistic: 2.131 on 3 and 49 DF,  p-value: 0.1083
## 
## 
## ===  ED  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.3132  -0.6363   0.1322   1.1502   4.9629 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               39.04209    0.84882  45.996   <2e-16 ***
## genotypeInv4m             -0.13902    1.13429  -0.123    0.903    
## Treatment-P                0.07826    1.11753   0.070    0.944    
## genotypeInv4m:Treatment-P -2.13106    1.55712  -1.369    0.177    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.815 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1073, Adjusted R-squared:  0.05259 
## F-statistic: 1.962 on 3 and 49 DF,  p-value: 0.1319
## 
## 
## ===  KRN  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3323 -0.3699  0.0803  0.6031  1.7838 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               16.73541    0.28661  58.390   <2e-16 ***
## genotypeInv4m             -0.23275    0.38300  -0.608    0.546    
## Treatment-P                0.09206    0.37734   0.244    0.808    
## genotypeInv4m:Treatment-P -0.56062    0.52578  -1.066    0.292    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9506 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1014, Adjusted R-squared:  0.04643 
## F-statistic: 1.844 on 3 and 49 DF,  p-value: 0.1515
## 
## 
## ===  CD  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6747 -0.7966  0.0991  0.8650  2.9659 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                26.1623     0.5325  49.135   <2e-16 ***
## genotypeInv4m               0.1583     0.7115   0.222    0.825    
## Treatment-P                -0.1906     0.7010  -0.272    0.787    
## genotypeInv4m:Treatment-P  -2.6177     0.9768  -2.680    0.010 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.766 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.3109, Adjusted R-squared:  0.2687 
## F-statistic:  7.37 on 3 and 49 DF,  p-value: 0.0003591
## 
## 
## ===  TKW  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.299  -8.307   2.268   9.737  19.859 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                66.6977     4.0227  16.580   <2e-16 ***
## genotypeInv4m              11.8710     5.3755   2.208   0.0319 *  
## Treatment-P                -0.3746     5.2961  -0.071   0.9439    
## genotypeInv4m:Treatment-P  -8.0556     7.3794  -1.092   0.2803    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.34 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1326, Adjusted R-squared:  0.07952 
## F-statistic: 2.497 on 3 and 49 DF,  p-value: 0.07059
## 
## 
## ===  KW50  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2755 -0.9140 -0.2116  0.7555  4.7007 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                10.4346     0.4073  25.617  < 2e-16 ***
## genotypeInv4m               0.2029     0.5443   0.373  0.71094    
## Treatment-P                -1.8014     0.5363  -3.359  0.00152 ** 
## genotypeInv4m:Treatment-P  -0.1211     0.7472  -0.162  0.87193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.351 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.3437, Adjusted R-squared:  0.3035 
## F-statistic: 8.553 on 3 and 49 DF,  p-value: 0.0001139
## 
## 
## ===  TKN  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -255.83  -52.03   11.53   64.12  150.58 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 338.75      26.40  12.833   <2e-16 ***
## genotypeInv4m                39.63      35.27   1.123    0.267    
## Treatment-P                  69.22      34.75   1.992    0.052 .  
## genotypeInv4m:Treatment-P   -21.07      48.42  -0.435    0.665    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 87.55 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1236, Adjusted R-squared:  0.06995 
## F-statistic: 2.304 on 3 and 49 DF,  p-value: 0.0885
## 
## 
## ===  HI  ===
## 
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45944 -0.11480  0.02008  0.12552  0.37169 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.66198    0.04909  13.485   <2e-16 ***
## genotypeInv4m              0.17185    0.06560   2.620   0.0117 *  
## Treatment-P                0.12871    0.06463   1.991   0.0520 .  
## genotypeInv4m:Treatment-P -0.09458    0.09006  -1.050   0.2988    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1628 on 49 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:   0.18,  Adjusted R-squared:  0.1298 
## F-statistic: 3.586 on 3 and 49 DF,  p-value: 0.02013

# Combine all coefficients into single data frame
if (length(model_coefficients) > 0) {
  all_coefficients <- bind_rows(model_coefficients) %>%
    mutate(
      term = case_when(
        term == "(Intercept)" ~ "Intercept (CTRL, +P)",
        term == "genotypeInv4m" ~ "Genotype Effect (Inv4m vs CTRL)",
        term == "Treatment-P" ~ "Treatment Effect (-P vs +P)",
        term == "genotypeInv4m:Treatment-P" ~ "Inv4m:-P",
        TRUE ~ term
      )
    )
  
  # Apply FDR correction within each term type
  all_coefficients <- all_coefficients %>%
    group_by(term) %>%
    mutate(
      p_adj_fdr = p.adjust(p_value, method = "fdr")
    ) %>%
    ungroup()
  
  cat("\n=== All Model Coefficients ===\n")
  print(all_coefficients %>% filter(grepl("Interaction",term)) %>% arrange(p_value))
}

## 
## === All Model Coefficients ===
## # A tibble: 0 × 7
## # ℹ 7 variables: trait <chr>, term <chr>, estimate <dbl>, std_error <dbl>,
## #   t_value <dbl>, p_value <dbl>, p_adj_fdr <dbl>

9. Effect tables: Treatment effects and Genotype × Treatment interactions

if (exists("all_coefficients") && nrow(all_coefficients) > 0) {
  
  # Treatment effects table (main effect of -P vs +P)
  treatment_effects <- all_coefficients %>%
    filter(term == "Treatment Effect (-P vs +P)") %>%
    arrange(p_value) %>%
    mutate(
      significance = case_when(
        p_adj_fdr < 0.001 ~ "***",
        p_adj_fdr < 0.01 ~ "**", 
        p_adj_fdr < 0.05 ~ "*",
        p_adj_fdr < 0.1 ~ ".",
        TRUE ~ ""
      )
    )
  
  cat("\n=== Treatment Effects: -P vs +P (Reference) ===\n")
  cat("Negative estimates indicate reduction under -P treatment\n")
  cat("Positive estimates indicate increase under -P treatment\n\n")
  
  print(treatment_effects %>%
        select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
        mutate(across(where(is.numeric), ~ round(.x, 4))))
  
  # Genotype × Treatment interaction effects table
  interaction_effects <- all_coefficients %>%
    filter(term == "Inv4m:-P") %>%
    arrange(p_value) %>%
    mutate(
      significance = case_when(
        p_adj_fdr < 0.001 ~ "***",
        p_adj_fdr < 0.01 ~ "**", 
        p_adj_fdr < 0.05 ~ "*",
        p_adj_fdr < 0.1 ~ ".",
        TRUE ~ ""
      )
    )
  
  cat("\n=== Inv4m:-P Effects ===\n")
  cat("Tests whether Inv4m responds differently to -P vs +P compared to CTRL\n")
  cat("Negative estimates: Inv4m shows greater reduction under -P than CTRL\n")
  cat("Positive estimates: Inv4m shows less reduction (or greater increase) under -P than CTRL\n\n")
  
  print(interaction_effects %>%
        select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
        mutate(across(where(is.numeric), ~ round(.x, 4))))
  
  # Genotype main effects table
  genotype_effects <- all_coefficients %>%
    filter(term == "Genotype Effect (Inv4m vs CTRL)") %>%
    arrange(p_value) %>%
    mutate(
      significance = case_when(
        p_adj_fdr < 0.001 ~ "***",
        p_adj_fdr < 0.01 ~ "**", 
        p_adj_fdr < 0.05 ~ "*",
        p_adj_fdr < 0.1 ~ ".",
        TRUE ~ ""
      )
    )
  
  cat("\n=== Genotype Effects: Inv4m vs CTRL (at +P reference level) ===\n")
  print(genotype_effects %>%
        select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
        mutate(across(where(is.numeric), ~ round(.x, 4))))
  
  # Summary counts
  cat("\n=== Summary of Significant Effects (FDR < 0.05) ===\n")
  cat("Treatment effects:", sum(treatment_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(treatment_effects), "\n")
  cat("Interaction effects:", sum(interaction_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(interaction_effects), "\n")
  cat("Genotype effects:", sum(genotype_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(genotype_effects), "\n")
}

## 
## === Treatment Effects: -P vs +P (Reference) ===
## Negative estimates indicate reduction under -P treatment
## Positive estimates indicate increase under -P treatment
## 
## # A tibble: 16 × 7
##    trait estimate std_error  t_value p_value p_adj_fdr significance
##    <chr>    <dbl>     <dbl>    <dbl>   <dbl>     <dbl> <chr>       
##  1 STW50 -18.8       1.58   -11.9     0         0      "***"       
##  2 DTS     3.06      0.322    9.51    0         0      "***"       
##  3 DTA     3.43      0.368    9.34    0         0      "***"       
##  4 STW60 -33.3       4.97    -6.70    0         0      "***"       
##  5 STW40  -6.52      1.01    -6.47    0         0      "***"       
##  6 STWHV -22.4       4.70    -4.77    0         0      "***"       
##  7 KW50   -1.80      0.536   -3.36    0.0015    0.0035 "**"        
##  8 EL      0.827     0.382    2.16    0.0354    0.0709 "."         
##  9 TKN    69.2      34.8      1.99    0.052     0.0832 "."         
## 10 HI      0.129     0.0646   1.99    0.052     0.0832 "."         
## 11 PH      2.59      1.48     1.75    0.0856    0.125  ""          
## 12 EW     -2.49      6.30    -0.394   0.695     0.924  ""          
## 13 CD     -0.191     0.701   -0.272   0.787     0.924  ""          
## 14 KRN     0.0921    0.377    0.244   0.808     0.924  ""          
## 15 TKW    -0.375     5.30    -0.0707  0.944     0.944  ""          
## 16 ED      0.0783    1.12     0.07    0.944     0.944  ""          
## 
## === Inv4m:-P Effects ===
## Tests whether Inv4m responds differently to -P vs +P compared to CTRL
## Negative estimates: Inv4m shows greater reduction under -P than CTRL
## Positive estimates: Inv4m shows less reduction (or greater increase) under -P than CTRL
## 
## # A tibble: 16 × 7
##    trait estimate std_error t_value p_value p_adj_fdr significance
##    <chr>    <dbl>     <dbl>   <dbl>   <dbl>     <dbl> <chr>       
##  1 CD     -2.62      0.977   -2.68   0.01       0.16  ""          
##  2 STW50   4.62      2.22     2.08   0.0425     0.209 ""          
##  3 STW40   2.86      1.41     2.02   0.0474     0.209 ""          
##  4 EL     -1.06      0.533   -1.99   0.0522     0.209 ""          
##  5 DTS     0.709     0.455    1.56   0.125      0.336 ""          
##  6 STW60  10.8       6.94     1.56   0.126      0.336 ""          
##  7 ED     -2.13      1.56    -1.37   0.177      0.405 ""          
##  8 STWHV   7.42      6.54     1.14   0.261      0.435 ""          
##  9 TKW    -8.06      7.38    -1.09   0.280      0.435 ""          
## 10 KRN    -0.561     0.526   -1.07   0.292      0.435 ""          
## 11 HI     -0.0946    0.0901  -1.05   0.299      0.435 ""          
## 12 EW     -7.28      8.78    -0.829  0.411      0.525 ""          
## 13 PH      1.68      2.09     0.801  0.426      0.525 ""          
## 14 DTA     0.337     0.520    0.649  0.519      0.593 ""          
## 15 TKN   -21.1      48.4     -0.435  0.665      0.710 ""          
## 16 KW50   -0.121     0.747   -0.162  0.872      0.872 ""          
## 
## === Genotype Effects: Inv4m vs CTRL (at +P reference level) ===
## # A tibble: 16 × 7
##    trait estimate std_error t_value p_value p_adj_fdr significance
##    <chr>    <dbl>     <dbl>   <dbl>   <dbl>     <dbl> <chr>       
##  1 DTA     -1.48     0.368   -4.02   0.0002    0.0015 "**"        
##  2 DTS     -1.28     0.322   -3.99   0.0002    0.0015 "**"        
##  3 PH       5.57     1.48     3.76   0.0004    0.0021 "**"        
##  4 STW40   -2.93     1.01    -2.91   0.0051    0.0202 "*"         
##  5 HI       0.172    0.0656   2.62   0.0117    0.0368 "*"         
##  6 STW50   -4.11     1.64    -2.51   0.0152    0.0368 "*"         
##  7 STWHV  -11.7      4.70    -2.48   0.0161    0.0368 "*"         
##  8 EL       0.894    0.388    2.30   0.0256    0.0512 "."         
##  9 TKW     11.9      5.38     2.21   0.0319    0.0568 "."         
## 10 EW      12.4      6.40     1.94   0.0587    0.0938 "."         
## 11 TKN     39.6     35.3      1.12   0.267     0.388  ""          
## 12 KRN     -0.233    0.383   -0.608  0.546     0.701  ""          
## 13 STW60   -2.91     5.08    -0.572  0.570     0.701  ""          
## 14 KW50     0.203    0.544    0.373  0.711     0.812  ""          
## 15 CD       0.158    0.712    0.222  0.825     0.880  ""          
## 16 ED      -0.139    1.13    -0.123  0.903     0.903  ""          
## 
## === Summary of Significant Effects (FDR < 0.05) ===
## Treatment effects: 7 of 16 
## Interaction effects: 0 of 16 
## Genotype effects: 7 of 16

10. Visualization: Treatment and Genotype × Treatment effects

# Define plotting theme
pheno_theme2 <- ggpubr::theme_classic2(base_size = 16) +
  ggplot2::theme(
    plot.title = ggtext::element_markdown(hjust = 0.5, face = "bold"),
    axis.title.y = ggtext::element_markdown(),
    axis.title.x = element_text(face = "bold"),
    axis.text.x = element_text(size = 14, face = "bold", color = "black"),
    strip.background = element_blank(),
    strip.text = ggtext::element_markdown(size = 14),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 12)
  )

if (length(common_phenos) > 0 && exists("all_coefficients")) {
  
  # Prepare data for plotting
  plot_data_combined <- nil_corrected_combined %>%
    pivot_longer(
      cols = all_of(common_phenos),
      names_to = "trait",
      values_to = "value"
    ) %>%
    filter(!is.na(value)) %>%
    mutate(trait = as.character(trait))
  
  # Treatment × Genotype interaction plot (emphasizing treatment effect first)
  interaction_plot <- ggplot(
    plot_data_combined, 
    aes(x = Treatment, y = value, color = genotype, group = genotype)
  ) +
    stat_summary(
      fun = mean, 
      geom = "point", 
      size = 4
    ) +
    stat_summary(
      fun = mean, 
      geom = "line", 
      linewidth = 1.5
    ) +
    stat_summary(
      fun.data = mean_se, 
      geom = "errorbar", 
      width = 0.1,
      linewidth = 1
    ) +
    scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
    scale_x_discrete(labels = c("+P" = "+P\n(Reference)", "-P" = "-P\n(Low P)")) +
    labs(
      title = "Treatment Effects and Inv4m:-Ps",
      subtitle = "+P is reference treatment; Lines show genotype-specific responses",
      y = "Spatially Corrected Value",
      x = "Phosphorus Treatment",
      color = "Genotype"
    ) +
    facet_wrap(~ trait, scales = "free_y", ncol = 4) +
    pheno_theme2
  
  print(interaction_plot)
  
  # Focus on significant treatment effects
  if (exists("treatment_effects")) {
    sig_treatment_traits <- treatment_effects %>%
      filter(p_adj_fdr < 0.05) %>%
      pull(trait)
    
    if (length(sig_treatment_traits) > 0) {
      treatment_plot_data <- plot_data_combined %>%
        filter(trait %in% sig_treatment_traits)
      
      treatment_plot <- ggplot(
        treatment_plot_data,
        aes(x = Treatment, y = value, fill = Treatment)
      ) +
        geom_boxplot(width = 0.6, alpha = 0.7, linewidth = 0.8) +
        geom_point(
          position = position_jitter(width = 0.2),
          size = 2,
          alpha = 0.6
        ) +
        scale_fill_manual(
          values = c("+P" = "lightblue", "-P" = "lightcoral"),
          labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
        ) +
        labs(
          title = "Significant Treatment Effects (FDR < 0.05)",
          subtitle = "Traits showing significant response to phosphorus limitation",
          y = "Spatially Corrected Value",
          x = "Phosphorus Treatment",
          fill = "Treatment"
        ) +
        facet_wrap(~ trait, scales = "free_y", ncol = 3) +
        pheno_theme2
      
      print(treatment_plot)
    }
  }
  
  # Focus on significant interaction effects
  if (exists("interaction_effects")) {
    sig_interaction_traits <- interaction_effects %>%
      filter(p_adj_fdr < 0.05) %>%
      pull(trait)
    
    if (length(sig_interaction_traits) > 0) {
      interaction_plot_data <- plot_data_combined %>%
        filter(trait %in% sig_interaction_traits)
      
      interaction_detailed_plot <- ggplot(
        interaction_plot_data, 
        aes(x = genotype, y = value, fill = Treatment)
      ) +
        geom_boxplot(
          width = 0.6, 
          alpha = 0.7,
          position = position_dodge(width = 0.8),
          linewidth = 0.8
        ) +
        geom_point(
          aes(color = Treatment),
          position = position_jitterdodge(
            dodge.width = 0.8,
            jitter.width = 0.2
          ),
          size = 1.5,
          alpha = 0.6
        ) +
        scale_fill_manual(
          values = c("+P" = "lightblue", "-P" = "lightcoral"),
          labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
        ) +
        scale_color_manual(
          values = c("+P" = "blue", "-P" = "red"),
          labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
        ) +
        labs(
          title = "Significant Inv4m:-Ps (FDR < 0.05)",
          subtitle = "Genotypes respond differently to phosphorus limitation",
          y = "Spatially Corrected Value",
          x = "Genotype",
          fill = "P Treatment",
          color = "P Treatment"
        ) +
        facet_wrap(~ trait, scales = "free_y", ncol = 3) +
        pheno_theme2 +
        guides(color = "none")  # Remove duplicate legend
      
      print(interaction_detailed_plot)
    }
  }
}

11. Summary statistics and effect sizes

# Calculate effect sizes and summary statistics
effect_summary <- nil_corrected_combined %>%
  pivot_longer(
    cols = all_of(common_phenos),
    names_to = "trait",
    values_to = "value"
  ) %>%
  filter(!is.na(value)) %>%
  group_by(trait, Treatment, genotype) %>%
  summarise(
    n = n(),
    mean = mean(value),
    sd = sd(value),
    se = sd / sqrt(n),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = genotype,
    values_from = c(n, mean, sd, se),
    names_sep = "_"
  ) %>%
  mutate(
    diff_mean = mean_Inv4m - mean_CTRL,
    percent_change = (diff_mean / mean_CTRL) * 100,
    pooled_sd = sqrt(((n_Inv4m - 1) * sd_Inv4m^2 + (n_CTRL - 1) * sd_CTRL^2) / 
                     (n_Inv4m + n_CTRL - 2)),
    cohens_d = diff_mean / pooled_sd
  ) %>%
  arrange(Treatment, desc(abs(cohens_d)))

cat("\n=== Effect Size Summary ===\n")

## 
## === Effect Size Summary ===

print(effect_summary %>% 
       select(trait, Treatment, diff_mean, percent_change, cohens_d) %>%
       mutate(across(where(is.numeric), ~ round(.x, 3))))

## # A tibble: 32 × 5
##    trait Treatment diff_mean percent_change cohens_d
##    <chr> <fct>         <dbl>          <dbl>    <dbl>
##  1 DTS   +P           -1.28           -1.74   -1.56 
##  2 DTA   +P           -1.48           -2.05   -1.43 
##  3 PH    +P            5.57            3.44    1.29 
##  4 EL    +P            0.894           6.06    0.917
##  5 STW40 +P           -2.94          -24.3    -0.89 
##  6 HI    +P            0.172          26.0     0.826
##  7 STW50 +P           -4.11          -11.4    -0.739
##  8 STWHV +P          -11.7           -10.9    -0.736
##  9 TKW   +P           11.9            17.8     0.683
## 10 EW    +P           12.4            14.3     0.632
## # ℹ 22 more rows

12. Export results

# Export the combined spatially corrected dataset
combined_output_file <- "PSU2022_spatially_corrected_both_treatments.csv"
write.csv(sp_corrected_combined, combined_output_file, row.names = FALSE)

# Export all model coefficients
if (exists("all_coefficients")) {
  coefficients_output_file <- "PSU2022_model_coefficients_spatially_corrected.csv"
  write.csv(all_coefficients, coefficients_output_file, row.names = FALSE)
  
  # Export treatment effects table
  if (exists("treatment_effects")) {
    treatment_output_file <- "PSU2022_treatment_effects_spatially_corrected.csv"
    write.csv(treatment_effects, treatment_output_file, row.names = FALSE)
  }
  
  # Export interaction effects table
  if (exists("interaction_effects")) {
    interaction_output_file <- "PSU2022_interaction_effects_spatially_corrected.csv"
    write.csv(interaction_effects, interaction_output_file, row.names = FALSE)
  }
  
  # Export genotype effects table
  if (exists("genotype_effects")) {
    genotype_output_file <- "PSU2022_genotype_effects_spatially_corrected.csv"
    write.csv(genotype_effects, genotype_output_file, row.names = FALSE)
  }
}

# Export summary statistics with treatment contrasts
effect_summary_treatment <- nil_corrected_combined %>%
  pivot_longer(
    cols = all_of(common_phenos),
    names_to = "trait",
    values_to = "value"
  ) %>%
  filter(!is.na(value)) %>%
  group_by(trait, Treatment, genotype) %>%
  summarise(
    n = n(),
    mean = mean(value),
    sd = sd(value),
    se = sd / sqrt(n),
    .groups = "drop"
  ) %>%
  # Calculate treatment effects (relative to +P reference)
  pivot_wider(
    names_from = Treatment,
    values_from = c(n, mean, sd, se),
    names_sep = "_"
  ) %>%
  mutate(
    treatment_effect = `mean_-P` - `mean_+P`,
    treatment_percent_change = (treatment_effect / `mean_+P`) * 100
  ) %>%
  # Add genotype comparison within each treatment
  group_by(trait) %>%
  mutate(
    genotype_effect_plus_p = case_when(
      genotype == "Inv4m" ~ `mean_+P` - `mean_+P`[genotype == "CTRL"],
      TRUE ~ NA_real_
    ),
    genotype_effect_minus_p = case_when(
      genotype == "Inv4m" ~ `mean_-P` - `mean_-P`[genotype == "CTRL"],
      TRUE ~ NA_real_
    )
  ) %>%
  ungroup()

summary_output_file <- "PSU2022_effect_summary_with_treatment_reference.csv"
write.csv(effect_summary_treatment, summary_output_file, row.names = FALSE)

cat("=== Files Exported ===\n")

## === Files Exported ===

cat("Spatially corrected data:", combined_output_file, "\n")

## Spatially corrected data: PSU2022_spatially_corrected_both_treatments.csv

if (exists("all_coefficients")) {
  cat("All model coefficients:", coefficients_output_file, "\n")
  if (exists("treatment_effects")) cat("Treatment effects:", treatment_output_file, "\n")
  if (exists("interaction_effects")) cat("Interaction effects:", interaction_output_file, "\n")
  if (exists("genotype_effects")) cat("Genotype effects:", genotype_output_file, "\n")
}

## All model coefficients: PSU2022_model_coefficients_spatially_corrected.csv 
## Treatment effects: PSU2022_treatment_effects_spatially_corrected.csv 
## Interaction effects: PSU2022_interaction_effects_spatially_corrected.csv 
## Genotype effects: PSU2022_genotype_effects_spatially_corrected.csv

cat("Effect summary (treatment reference):", summary_output_file, "\n")

## Effect summary (treatment reference): PSU2022_effect_summary_with_treatment_reference.csv

# Final analysis summary
cat("\n=== Final Analysis Summary ===\n")

## 
## === Final Analysis Summary ===

cat("Total phenotypes evaluated:", length(all_phenotypes), "\n")

## Total phenotypes evaluated: 16

cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")

## Usable phenotypes (<50% missing): 16

cat("Successfully fitted models (+P):", length(all_models_plus_p), "\n")

## Successfully fitted models (+P): 16

cat("Successfully fitted models (-P):", length(all_models_minus_p), "\n")

## Successfully fitted models (-P): 16

cat("Common phenotypes between treatments:", length(common_phenos), "\n")

## Common phenotypes between treatments: 16

cat("Total plots with corrected data:", nrow(sp_corrected_combined), "\n")

## Total plots with corrected data: 127

cat("Significant traits (by treatment):", length(sig_treatment_traits), "\n")

## Significant traits (by treatment): 7

# Print model fit information for both treatments
if (length(fit_info_plus_p) > 0) {
  cat("\n=== +P Model Fit Details ===\n")
  for (trait in names(fit_info_plus_p)) {
    info <- fit_info_plus_p[[trait]]
    cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
  }
}

## 
## === +P Model Fit Details ===
## PH: n=64, status=success
## DTA: n=64, status=success
## DTS: n=64, status=success
## STW40: n=62, status=success
## STWHV: n=62, status=success
## STW50: n=51, status=success
## STW60: n=50, status=success
## EW: n=49, status=success
## EL: n=49, status=success
## ED: n=49, status=success
## KRN: n=49, status=success
## CD: n=49, status=success
## TKW: n=49, status=success
## KW50: n=49, status=success
## TKN: n=49, status=success
## HI: n=49, status=success

# Final analysis summary
cat("\n=== Final Analysis Summary ===\n")

## 
## === Final Analysis Summary ===

cat("Total phenotypes evaluated:", length(all_phenotypes), "\n")

## Total phenotypes evaluated: 16

cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")

## Usable phenotypes (<50% missing): 16

cat("Successfully fitted models (+P):", length(all_models_plus_p), "\n")

## Successfully fitted models (+P): 16

cat("Successfully fitted models (-P):", length(all_models_minus_p), "\n")

## Successfully fitted models (-P): 16

cat("Common phenotypes between treatments:", length(common_phenos), "\n")

## Common phenotypes between treatments: 16

cat("Total plots with corrected data:", nrow(sp_corrected_combined), "\n")

## Total plots with corrected data: 127

if (exists("treatment_effects") && exists("interaction_effects") && exists("genotype_effects")) {
  cat("Significant treatment effects (FDR < 0.05):", sum(treatment_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(treatment_effects), "\n")
  cat("Significant interaction effects (FDR < 0.05):", sum(interaction_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(interaction_effects), "\n")
  cat("Significant genotype effects (FDR < 0.05):", sum(genotype_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(genotype_effects), "\n")
}

## Significant treatment effects (FDR < 0.05): 7 of 16 
## Significant interaction effects (FDR < 0.05): 0 of 16 
## Significant genotype effects (FDR < 0.05): 7 of 16

# Print model fit information for both treatments
if (length(fit_info_plus_p) > 0) {
  cat("\n=== +P Model Fit Details ===\n")
  for (trait in names(fit_info_plus_p)) {
    info <- fit_info_plus_p[[trait]]
    cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
  }
}

## 
## === +P Model Fit Details ===
## PH: n=64, status=success
## DTA: n=64, status=success
## DTS: n=64, status=success
## STW40: n=62, status=success
## STWHV: n=62, status=success
## STW50: n=51, status=success
## STW60: n=50, status=success
## EW: n=49, status=success
## EL: n=49, status=success
## ED: n=49, status=success
## KRN: n=49, status=success
## CD: n=49, status=success
## TKW: n=49, status=success
## KW50: n=49, status=success
## TKN: n=49, status=success
## HI: n=49, status=success

if (length(fit_info_minus_p) > 0) {
  cat("\n=== -P Model Fit Details ===\n")
  for (trait in names(fit_info_minus_p)) {
    info <- fit_info_minus_p[[trait]]
    cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
  }
}

## 
## === -P Model Fit Details ===
## PH: n=63, status=success
## DTA: n=63, status=success
## DTS: n=63, status=success
## STW40: n=63, status=success
## STWHV: n=63, status=success
## STW50: n=61, status=success
## STW60: n=57, status=success
## EW: n=56, status=success
## EL: n=56, status=success
## ED: n=56, status=success
## KRN: n=56, status=success
## CD: n=56, status=success
## TKW: n=56, status=success
## KW50: n=56, status=success
## TKN: n=56, status=success
## HI: n=56, status=success

# Check for missing data patterns in final dataset
missing_final <- sp_corrected_combined %>%
  select(all_of(common_phenos)) %>%
  summarise(across(everything(), ~ sum(is.na(.)))) %>%
  pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
  mutate(
    total_plots = nrow(sp_corrected_combined),
    missing_pct = round(missing_count / total_plots * 100, 1)
  ) %>%
  arrange(missing_pct)

cat("\n=== Missing Data in Final Dataset ===\n")

## 
## === Missing Data in Final Dataset ===

print(missing_final)

## # A tibble: 16 × 4
##    phenotype missing_count total_plots missing_pct
##    <chr>             <int>       <int>       <dbl>
##  1 PH                    0         127         0  
##  2 DTA                   0         127         0  
##  3 DTS                   0         127         0  
##  4 STW40                 2         127         1.6
##  5 STWHV                 2         127         1.6
##  6 STW50                15         127        11.8
##  7 STW60                20         127        15.7
##  8 EW                   22         127        17.3
##  9 EL                   22         127        17.3
## 10 ED                   22         127        17.3
## 11 KRN                  22         127        17.3
## 12 CD                   22         127        17.3
## 13 TKW                  22         127        17.3
## 14 KW50                 22         127        17.3
## 15 TKN                  22         127        17.3
## 16 HI                   22         127        17.3

# Key findings summary
if (exists("treatment_effects") && exists("interaction_effects")) {
  cat("\n=== Key Findings Summary ===\n")
  
  # Most significant treatment effects
  top_treatment <- treatment_effects %>%
    filter(p_adj_fdr < 0.05) %>%
    arrange(p_adj_fdr) %>%
    head(3)
  
  if (nrow(top_treatment) > 0) {
    cat("Top treatment effects (-P vs +P):\n")
    for (i in 1:nrow(top_treatment)) {
      trait <- top_treatment$trait[i]
      est <- round(top_treatment$estimate[i], 3)
      pval <- format(top_treatment$p_adj_fdr[i], scientific = TRUE, digits = 2)
      direction <- ifelse(est < 0, "decreased", "increased")
      cat(sprintf("  %s: %s by %s units (FDR = %s)\n", trait, direction, abs(est), pval))
    }
  }
  
  # Most significant interaction effects
  top_interactions <- interaction_effects %>%
    filter(p_adj_fdr < 0.05) %>%
    arrange(p_adj_fdr) %>%
    head(3)
  
  if (nrow(top_interactions) > 0) {
    cat("\nTop genotype × treatment interactions:\n")
    for (i in 1:nrow(top_interactions)) {
      trait <- top_interactions$trait[i]
      est <- round(top_interactions$estimate[i], 3)
      pval <- format(top_interactions$p_adj_fdr[i], scientific = TRUE, digits = 2)
      direction <- ifelse(est < 0, "more sensitive", "less sensitive")
      cat(sprintf("  %s: Inv4m is %s to -P than CTRL (interaction = %s, FDR = %s)\n", 
                  trait, direction, est, pval))
    }
  }
}

## 
## === Key Findings Summary ===
## Top treatment effects (-P vs +P):
##   STW50: decreased by 18.759 units (FDR = 2.6e-15)
##   DTS: increased by 3.062 units (FDR = 1.1e-12)
##   DTA: increased by 3.433 units (FDR = 1.4e-12)