This notebook fits a SpATS PSANOVA surface per
phenotype and returns spatially-corrected plot-level values for
each phenotype, including GDD-based traits (GDDA and GDDS). The approach
uses statgenHTP::fitModels() (which wraps SpATS when
provided a TimePoints object) and getCorrected() to extract
corrected plot-level observations. The workflow:
TimePoints object for
statgenHTP.fitModels() → getCorrected().sp_corrected.Warning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.
library(tidyverse)
library(dplyr)
library(lubridate)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme)
library(gstat)
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS)
library(statgenHTP)
library(ggbeeswarm)
environment_name <- "CLY2025"
planting_date <- mdy("4/04/2025")
gdd_lookup_file <- "gdd_lookup_table.csv"
gdd_at_phenology <- read_csv(gdd_lookup_file) %>%
filter(environment == environment_name)
cat("Loaded GDD lookup for", environment_name, "with",
nrow(gdd_at_phenology), "dates\n")
## Loaded GDD lookup for CLY2025 with 175 dates
cat("GDD range:", min(gdd_at_phenology$cumulative_gdd_from_planting), "to",
max(gdd_at_phenology$cumulative_gdd_from_planting), "°C\n")
## GDD range: 13.965 to 2314.08 °C
file_path <- "/Users/fvrodriguez/Desktop/inv4mHighland/data/CLY25_Inv4m.csv"
if (!file.exists(file_path)) {
stop("Error: CLY25_Inv4m.csv not found in the working directory.")
} else {
field_data_raw <- read_csv(file_path, na = c("", "#N/A", "NA"))
field_data <- field_data_raw %>%
mutate(EBA = 0.75 * BL * BW) %>%
rename(
plant_id = plant,
plotId = plot_id,
block = rep,
x = X_pos,
y = Y_pos,
inv4m = inv4m_gt
) %>%
mutate(
across(c(plotId, block, donor, inv4m), as.factor),
x = x + runif(n(), min = 0.0, max = 0.01),
environment = environment_name,
anthesis_date = planting_date + days(as.integer(DTA)),
silking_date = planting_date + days(as.integer(DTS))
) %>%
left_join(
gdd_at_phenology %>%
rename(anthesis_date = date, GDDA = cumulative_gdd_from_planting),
by = c("environment", "anthesis_date")
) %>%
left_join(
gdd_at_phenology %>%
rename(silking_date = date, GDDS = cumulative_gdd_from_planting),
by = c("environment", "silking_date")
)
base_phenotypes <- c("DTA", "DTS", "LAE", "PH", "EN", "SL", "BL", "BW", "EBA")
available_phenotypes <- intersect(base_phenotypes, names(field_data))
all_phenotypes <- available_phenotypes
cat("Plant-level data loaded and cleaned successfully!\n")
cat("Phenotypes available for analysis:",
paste(all_phenotypes, collapse = ", "), "\n")
cat("Plant-level data dimensions:", dim(field_data), "\n")
cat("GDD columns added at plant level: GDDA (anthesis), GDDS (silking)\n")
cat("Plants with GDDA:", sum(!is.na(field_data$GDDA)), "\n")
cat("Plants with GDDS:", sum(!is.na(field_data$GDDS)), "\n")
}
## Plant-level data loaded and cleaned successfully!
## Phenotypes available for analysis: DTA, DTS, LAE, PH, EN, SL, BL, BW, EBA
## Plant-level data dimensions: 442 29
## GDD columns added at plant level: GDDA (anthesis), GDDS (silking)
## Plants with GDDA: 442
## Plants with GDDS: 442
head(field_data)
## # A tibble: 6 × 29
## plant_id row_id plotId plot_row plot_col block y x donor inv4m group
## <dbl> <dbl> <fct> <dbl> <dbl> <fct> <dbl> <dbl> <fct> <fct> <chr>
## 1 1 4573 1 1 1 3 1.7 1.00 TMEX INV4M TMEX-I…
## 2 2 4573 1 1 1 3 2.3 1.00 TMEX INV4M TMEX-I…
## 3 3 4573 1 1 1 3 3.6 1.00 TMEX INV4M TMEX-I…
## 4 4 4573 1 1 1 3 4 1.01 TMEX INV4M TMEX-I…
## 5 5 4573 1 1 1 3 4.5 1.01 TMEX INV4M TMEX-I…
## 6 6 4573 1 1 1 3 5 1.01 TMEX INV4M TMEX-I…
## # ℹ 18 more variables: DOA <dbl>, DOS <dbl>, DTA <dbl>, DTS <dbl>,
## # DTA_GDD <dbl>, DTS_GDD <dbl>, LAE <dbl>, PH <dbl>, EN <dbl>, SL <dbl>,
## # BL <dbl>, BW <dbl>, EBA <dbl>, environment <chr>, anthesis_date <date>,
## # silking_date <date>, GDDA <dbl>, GDDS <dbl>
plot_data <- field_data %>%
group_by(plotId, block, plot_row, plot_col, donor, inv4m) %>%
summarise(
across(all_of(c(all_phenotypes, "GDDA", "GDDS")), ~ mean(.x, na.rm = TRUE)),
n_plants = n(),
.groups = "drop"
) %>%
mutate(
across(all_of(c(all_phenotypes, "GDDA", "GDDS")), ~ ifelse(is.nan(.x), NA, .x))
) %>%
rename(
Rep = block,
Plot_Row = plot_row,
Plot_Column = plot_col
) %>%
mutate(
Plot_Row = as.numeric(Plot_Row),
Plot_Column = as.numeric(Plot_Column),
Rep = as.factor(Rep),
genotype = as.factor(paste(donor, inv4m, sep = "."))
) %>%
select(plotId, Rep, Plot_Row, Plot_Column, donor, inv4m, genotype,
all_of(all_phenotypes), GDDA, GDDS, n_plants)
cat("Plot-level summary dimensions:", dim(plot_data), "\n")
## Plot-level summary dimensions: 64 19
cat("Average plants per plot:",
round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")
## Average plants per plot: 6.9
cat("GDD columns at plot level: GDDA (anthesis), GDDS (silking)\n")
## GDD columns at plot level: GDDA (anthesis), GDDS (silking)
cat("Plots with GDDA:", sum(!is.na(plot_data$GDDA)), "\n")
## Plots with GDDA: 64
cat("Plots with GDDS:", sum(!is.na(plot_data$GDDS)), "\n")
## Plots with GDDS: 64
cat("Genotype levels:", levels(plot_data$genotype), "\n")
## Genotype levels: MI21.CTRL MI21.INV4M TMEX.CTRL TMEX.INV4M
cat("Rep levels:", levels(plot_data$Rep), "\n")
## Rep levels: 1 2 3 4
head(plot_data)
## # A tibble: 6 × 19
## plotId Rep Plot_Row Plot_Column donor inv4m genotype DTA DTS LAE PH
## <fct> <fct> <dbl> <dbl> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 1 3 1 1 TMEX INV4M TMEX.IN… 78.3 79.7 5.14 211.
## 2 2 3 2 1 MI21 INV4M MI21.IN… 76.3 77.6 5.71 176.
## 3 3 3 3 1 MI21 CTRL MI21.CT… 75 75.5 5.75 193
## 4 4 3 4 1 TMEX CTRL TMEX.CT… 77.8 78.6 5.5 199.
## 5 5 1 5 1 MI21 INV4M MI21.IN… 75.9 76.8 5.62 179.
## 6 6 1 6 1 TMEX INV4M TMEX.IN… 78 78.7 5 205
## # ℹ 8 more variables: EN <dbl>, SL <dbl>, BL <dbl>, BW <dbl>, EBA <dbl>,
## # GDDA <dbl>, GDDS <dbl>, n_plants <int>
all_traits <- c(all_phenotypes, "GDDA", "GDDS")
missing_summary <- plot_data %>%
select(all_of(all_traits)) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
mutate(
total_obs = nrow(plot_data),
missing_pct = round(missing_count / total_obs * 100, 1),
available_n = total_obs - missing_count
) %>%
arrange(missing_pct)
print(missing_summary)
## # A tibble: 11 × 5
## phenotype missing_count total_obs missing_pct available_n
## <chr> <int> <int> <dbl> <int>
## 1 DTA 0 64 0 64
## 2 DTS 0 64 0 64
## 3 LAE 0 64 0 64
## 4 PH 0 64 0 64
## 5 EN 0 64 0 64
## 6 SL 0 64 0 64
## 7 BL 0 64 0 64
## 8 BW 0 64 0 64
## 9 EBA 0 64 0 64
## 10 GDDA 0 64 0 64
## 11 GDDS 0 64 0 64
usable_phenotypes <- missing_summary %>%
filter(missing_pct < 50) %>%
pull(phenotype)
cat("Usable phenotypes (<50% missing):",
paste(usable_phenotypes, collapse = ", "), "\n")
## Usable phenotypes (<50% missing): DTA, DTS, LAE, PH, EN, SL, BL, BW, EBA, GDDA, GDDS
single_time <- plot_data %>%
mutate(
plotId = as.character(plotId),
Rep = as.character(Rep),
Plot_Row = as.numeric(Plot_Row),
Plot_Column = as.numeric(Plot_Column),
time = as.POSIXct("2025-01-15 10:00:00")
)
tp_data <- createTimePoints(
dat = single_time,
experimentName = environment_name,
genotype = "genotype",
timePoint = "time",
plotId = "plotId",
repId = "Rep",
rowNum = "Plot_Row",
colNum = "Plot_Column",
addCheck = FALSE
)
summary(tp_data)
## tp_data contains data for experiment CLY2025.
##
## It contains 1 time points.
## First time point: 2025-01-15 10:00:00
## Last time point: 2025-01-15 10:00:00
##
## No check genotypes are defined.
corrected_list <- list()
fit_info <- list()
all_models <- list()
for (phen in usable_phenotypes) {
message("Fitting SpATS model for: ", phen)
no_nas <- single_time %>%
filter(!is.na(.data[[phen]]))
if (nrow(no_nas) < 10) {
message("Insufficient data for ", phen, " (n=", nrow(no_nas), ")")
next
}
fit_try <- try(
fitModels(
TP = tp_data,
trait = phen,
extraFixedFactors = c("repId"),
timePoints = 1,
what = "fixed"
),
silent = TRUE
)
if (inherits(fit_try, "try-error")) {
message("Fit failed for ", phen, ": ", fit_try)
next
}
all_models[[phen]] <- fit_try
corr <- try(getCorrected(fit_try), silent = TRUE)
if (inherits(corr, "try-error") || is.null(corr)) {
message("getCorrected() failed for ", phen)
next
}
corr <- corr %>%
select(-phen)
corrected_list[[phen]] <- corr
fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "success")
}
if (length(corrected_list) > 0) {
sp_corrected <- lapply(
corrected_list,
function(corr_df) {
corr_df %>%
select(timeNumber, timePoint, genotype, repId, rowId, colId, plotId)
}
) %>%
bind_rows() %>%
arrange(plotId) %>%
distinct()
for (phen in names(corrected_list)) {
phen_corr <- paste0(phen, "_corr")
corr_df <- corrected_list[[phen]] %>%
select(plotId, all_of(phen_corr))
sp_corrected <- left_join(sp_corrected, corr_df, by = "plotId")
}
colnames(sp_corrected) <- gsub("_corr", "", colnames(sp_corrected))
sp_corrected <- sp_corrected %>%
separate(genotype, c("donor", "genotype"))
cat("Final sp_corrected dimensions:", dim(sp_corrected), "\n")
cat("Corrected traits:",
paste(intersect(usable_phenotypes, colnames(sp_corrected)),
collapse = ", "), "\n")
print(head(sp_corrected))
} else {
message("No models were successfully fitted.")
}
for (trait in usable_phenotypes) {
if (trait %in% names(all_models)) {
cat("\n### Spatial plots for", trait, "\n")
plot(all_models[[trait]], timePoints = 1)
plot(all_models[[trait]],
timePoints = 1,
plotType = "spatial",
spaTrend = "raw")
}
}
##
## ### Spatial plots for DTA
##
## ### Spatial plots for DTS
##
## ### Spatial plots for LAE
##
## ### Spatial plots for PH
##
## ### Spatial plots for EN
##
## ### Spatial plots for SL
##
## ### Spatial plots for BL
##
## ### Spatial plots for BW
##
## ### Spatial plots for EBA
##
## ### Spatial plots for GDDA
##
## ### Spatial plots for GDDS
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
corrected_phenotypes <- intersect(usable_phenotypes, colnames(sp_corrected))
if (length(corrected_phenotypes) > 0) {
comparison_genotypes <- unique(sp_corrected$genotype)
cat("Available genotypes:", paste(comparison_genotypes, collapse = ", "), "\n")
donors <- unique(sp_corrected$donor)
cat("Available donors:", paste(donors, collapse = ", "), "\n")
htest <- sp_corrected %>%
pivot_longer(
cols = all_of(corrected_phenotypes),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
group_by(donor, trait) %>%
filter(n_distinct(genotype) == 2) %>%
t_test(value ~ genotype) %>%
adjust_pvalue(method = "fdr") %>%
add_y_position(scales = "free") %>%
add_significance()
print(htest)
pheno_theme2 <- theme_classic2(base_size = 20) +
theme(
plot.title = element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = element_markdown(),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 25, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_markdown(size = 20),
legend.position = "none"
)
for (current_donor in donors) {
cat("\n=== Processing donor:", current_donor, "===\n")
alpha <- 0.05
sig_traits_donor <- htest %>%
filter(donor == current_donor, p.adj < alpha) %>%
arrange(p.adj) %>%
pull(trait)
cat("Significant traits for", current_donor, "(FDR < 0.05):",
paste(sig_traits_donor, collapse = ", "), "\n")
if (length(sig_traits_donor) == 0) {
cat("No traits significant after FDR correction for donor",
current_donor, "\n")
next
}
plot_data_donor <- sp_corrected %>%
filter(donor == current_donor) %>%
pivot_longer(
cols = all_of(corrected_phenotypes),
names_to = "trait",
values_to = "value"
) %>%
filter(trait %in% sig_traits_donor, !is.na(value)) %>%
mutate(trait = as.character(trait))
test_to_plot_donor <- htest %>%
filter(donor == current_donor, trait %in% sig_traits_donor)
n_traits <- length(sig_traits_donor)
ncol_facets <- min(4, n_traits)
diff_plot <- ggplot(plot_data_donor,
aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) +
geom_quasirandom(size = 2) +
scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
labs(
title = paste("CLY25: ", current_donor,
" Spatially Corrected (FDR-Adjusted)"),
y = "Corrected Value",
x = "Genotype"
) +
stat_pvalue_manual(
test_to_plot_donor,
tip.length = 0.01,
step.height = 0.08,
size = 10,
bracket.size = 0.8
) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
facet_wrap(~ factor(trait, levels = sig_traits_donor),
scales = "free_y",
ncol = ncol_facets) +
pheno_theme2
print(diff_plot)
cat("\n--- Summary for", current_donor, "---\n")
summary_stats <- plot_data_donor %>%
group_by(trait, genotype) %>%
summarise(
n = n(),
mean = round(mean(value, na.rm = TRUE), 3),
sd = round(sd(value, na.rm = TRUE), 3),
.groups = "drop"
) %>%
pivot_wider(
names_from = genotype,
values_from = c(n, mean, sd),
names_sep = "_"
)
print(summary_stats)
}
cat("\n=== Overall Summary Across All Donors ===\n")
overall_sig <- htest %>%
filter(p.adj < alpha) %>%
group_by(trait) %>%
summarise(
n_donors_significant = n(),
min_pvalue = min(p.adj),
max_pvalue = max(p.adj),
.groups = "drop"
) %>%
arrange(desc(n_donors_significant), min_pvalue)
print(overall_sig)
} else {
message("No corrected phenotypes available for analysis")
}
} else {
message("No spatially corrected data available")
}
## Available genotypes: INV4M, CTRL
## Available donors: TMEX, MI21
## # A tibble: 22 × 14
## donor trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 BL value CTRL INV4M 16 16 5.95 29.9 1.63e-6 5.98e-6
## 2 MI21 BW value CTRL INV4M 16 16 5.64 30.0 3.86e-6 1.21e-5
## 3 MI21 DTA value CTRL INV4M 16 16 2.19 29.7 3.62e-2 4.73e-2
## 4 MI21 DTS value CTRL INV4M 16 16 3.16 30.0 3.57e-3 6.55e-3
## 5 MI21 EBA value CTRL INV4M 16 16 6.97 29.6 1.04e-7 4.58e-7
## 6 MI21 EN value CTRL INV4M 16 16 -0.903 29.8 3.74e-1 3.92e-1
## 7 MI21 GDDA value CTRL INV4M 16 16 2.17 29.6 3.84e-2 4.73e-2
## 8 MI21 GDDS value CTRL INV4M 16 16 3.13 30.0 3.93e-3 6.65e-3
## 9 MI21 LAE value CTRL INV4M 16 16 -2.16 29.7 3.87e-2 4.73e-2
## 10 MI21 PH value CTRL INV4M 16 16 3.62 29.7 1.09e-3 2.18e-3
## # ℹ 12 more rows
## # ℹ 3 more variables: y.position <dbl>, groups <named list>, p.adj.signif <chr>
##
## === Processing donor: TMEX ===
## Significant traits for TMEX (FDR < 0.05): DTA, GDDA, DTS, GDDS, PH, LAE, EN, BW, EBA
##
## --- Summary for TMEX ---
## # A tibble: 9 × 7
## trait n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 BW 16 16 8.62 8.19 0.336 0.485
## 2 DTA 16 16 77.7 79.3 0.545 0.639
## 3 DTS 16 16 78.8 80.6 0.503 0.75
## 4 EBA 16 16 393. 368. 24.5 27.8
## 5 EN 16 16 2.20 2.53 0.183 0.244
## 6 GDDA 16 16 877. 904. 8.70 10.3
## 7 GDDS 16 16 895. 924. 8.23 12.5
## 8 LAE 16 16 6.03 5.73 0.203 0.171
## 9 PH 16 16 194. 204. 4.74 7.40
##
## === Processing donor: MI21 ===
## Significant traits for MI21 (FDR < 0.05): EBA, BL, BW, PH, DTS, GDDS, DTA, GDDA, LAE
##
## --- Summary for MI21 ---
## # A tibble: 9 × 7
## trait n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 BL 16 16 58.3 55.3 1.49 1.39
## 2 BW 16 16 8.78 8.14 0.32 0.321
## 3 DTA 16 16 77.7 77.3 0.615 0.552
## 4 DTS 16 16 78.4 77.6 0.638 0.66
## 5 EBA 16 16 386. 338. 20.6 18.4
## 6 GDDA 16 16 877. 870. 9.68 8.60
## 7 GDDS 16 16 888. 876. 10.1 10.4
## 8 LAE 16 16 5.89 6.02 0.157 0.172
## 9 PH 16 16 187. 183. 3.50 3.86
##
## === Overall Summary Across All Donors ===
## # A tibble: 10 × 4
## trait n_donors_significant min_pvalue max_pvalue
## <chr> <int> <dbl> <dbl>
## 1 DTA 2 0.0000000758 0.0473
## 2 GDDA 2 0.0000000758 0.0473
## 3 DTS 2 0.000000169 0.00654
## 4 GDDS 2 0.000000169 0.00665
## 5 EBA 2 0.000000458 0.0201
## 6 BW 2 0.0000121 0.0122
## 7 PH 2 0.000211 0.00218
## 8 LAE 2 0.000219 0.0473
## 9 BL 1 0.00000598 0.00000598
## 10 EN 1 0.000473 0.000473
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
corrected_phenotypes <- intersect(c("PH", "DTA", "DTS", "GDDA", "GDDS"),
colnames(sp_corrected))
comparison_genotypes <- unique(sp_corrected$genotype)
cat("Available genotypes:", paste(comparison_genotypes, collapse = ", "), "\n")
donors <- unique(sp_corrected$donor)
cat("Available donors:", paste(donors, collapse = ", "), "\n")
htest <- sp_corrected %>%
pivot_longer(
cols = all_of(corrected_phenotypes),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
group_by(donor, trait) %>%
filter(n_distinct(genotype) == 2) %>%
t_test(value ~ genotype) %>%
adjust_pvalue(method = "fdr") %>%
add_y_position(scales = "free") %>%
add_significance()
print(htest)
pheno_theme2 <- theme_classic2(base_size = 22) +
theme(
plot.title = element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = element_markdown(),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 16, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_markdown(size = 18, face = "bold"),
legend.position = "none"
)
trait_plots <- list()
for (current_trait in corrected_phenotypes) {
cat("\n=== Processing trait:", current_trait, "===\n")
plot_data_trait <- sp_corrected %>%
select(donor, genotype, all_of(current_trait)) %>%
rename(value = all_of(current_trait)) %>%
filter(!is.na(value))
test_to_plot_trait <- htest %>%
filter(trait == current_trait)
trait_plot <- ggplot(plot_data_trait,
aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) +
geom_quasirandom(size = 2.5) +
scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
labs(
title = current_trait,
y = paste("Corrected", current_trait),
x = "Genotype"
) +
stat_pvalue_manual(
test_to_plot_trait,
tip.length = 0.01,
step.height = 0.08,
size = 8,
bracket.size = 0.8
) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
facet_wrap(~ donor, ncol = 2) +
pheno_theme2
trait_plots[[current_trait]] <- trait_plot
cat("\n--- Summary for", current_trait, "---\n")
summary_stats_trait <- plot_data_trait %>%
group_by(donor, genotype) %>%
summarise(
n = n(),
mean = round(mean(value, na.rm = TRUE), 3),
sd = round(sd(value, na.rm = TRUE), 3),
.groups = "drop"
) %>%
pivot_wider(
names_from = genotype,
values_from = c(n, mean, sd),
names_sep = "_"
)
print(summary_stats_trait)
sig_results_trait <- test_to_plot_trait %>%
select(donor, statistic, p, p.adj, p.adj.signif) %>%
arrange(p.adj)
cat("Statistical test results for", current_trait, ":\n")
print(sig_results_trait)
}
if (length(trait_plots) > 0) {
cat("\n=== Creating combined plot ===\n")
ncol_arrange <- min(3, length(trait_plots))
nrow_arrange <- ceiling(length(trait_plots) / ncol_arrange)
combined_plot <- ggarrange(
plotlist = trait_plots[c("DTA","DTS", "PH","GDDA","GDDS")],
ncol = ncol_arrange,
nrow = nrow_arrange,
common.legend = TRUE,
legend = "bottom"
) %>%
annotate_figure(
top = text_grob("CLY2025 — Spatially Corrected Phenotypes",
color = "black",
face = "bold",
size = 18)
)
print(combined_plot)
}
cat("\n=== Overall Summary ===\n")
alpha <- 0.05
overall_sig <- htest %>%
mutate(significant = p.adj < alpha) %>%
group_by(trait) %>%
summarise(
n_donors_tested = n(),
n_donors_significant = sum(significant),
min_pvalue = min(p.adj),
max_pvalue = max(p.adj),
.groups = "drop"
) %>%
arrange(desc(n_donors_significant), min_pvalue)
print(overall_sig)
} else {
message("No spatially corrected data available for CLY2025")
}
## Available genotypes: INV4M, CTRL
## Available donors: TMEX, MI21
## # A tibble: 10 × 14
## donor trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 DTA value CTRL INV4M 16 16 2.19 29.7 3.62e-2 3.84e-2
## 2 MI21 DTS value CTRL INV4M 16 16 3.16 30.0 3.57e-3 4.91e-3
## 3 MI21 GDDA value CTRL INV4M 16 16 2.17 29.6 3.84e-2 3.84e-2
## 4 MI21 GDDS value CTRL INV4M 16 16 3.13 30.0 3.93e-3 4.91e-3
## 5 MI21 PH value CTRL INV4M 16 16 3.62 29.7 1.09e-3 1.82e-3
## 6 TMEX DTA value CTRL INV4M 16 16 -8.05 29.3 6.61e-9 3.45e-8
## 7 TMEX DTS value CTRL INV4M 16 16 -7.86 26.2 2.3 e-8 7.67e-8
## 8 TMEX GDDA value CTRL INV4M 16 16 -8.04 29.2 6.89e-9 3.45e-8
## 9 TMEX GDDS value CTRL INV4M 16 16 -7.77 26.0 3.07e-8 7.68e-8
## 10 TMEX PH value CTRL INV4M 16 16 -4.70 25.5 7.66e-5 1.53e-4
## # ℹ 3 more variables: y.position <dbl>, groups <named list>, p.adj.signif <chr>
##
## === Processing trait: PH ===
##
## --- Summary for PH ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 187. 183. 3.50 3.86
## 2 TMEX 16 16 194. 204. 4.74 7.40
## Statistical test results for PH :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -4.70 0.0000766 0.000153 ***
## 2 MI21 3.62 0.00109 0.00182 **
##
## === Processing trait: DTA ===
##
## --- Summary for DTA ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 77.7 77.3 0.615 0.552
## 2 TMEX 16 16 77.7 79.3 0.545 0.639
## Statistical test results for DTA :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -8.05 0.00000000661 0.0000000344 ****
## 2 MI21 2.19 0.0362 0.0384 *
##
## === Processing trait: DTS ===
##
## --- Summary for DTS ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 78.4 77.6 0.638 0.66
## 2 TMEX 16 16 78.8 80.6 0.503 0.75
## Statistical test results for DTS :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -7.86 0.000000023 0.0000000767 ****
## 2 MI21 3.16 0.00357 0.00491 **
##
## === Processing trait: GDDA ===
##
## --- Summary for GDDA ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 877. 870. 9.68 8.60
## 2 TMEX 16 16 877. 904. 8.70 10.3
## Statistical test results for GDDA :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -8.04 0.00000000689 0.0000000344 ****
## 2 MI21 2.17 0.0384 0.0384 *
##
## === Processing trait: GDDS ===
##
## --- Summary for GDDS ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 888. 876. 10.1 10.4
## 2 TMEX 16 16 895. 924. 8.23 12.5
## Statistical test results for GDDS :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -7.77 0.0000000307 0.0000000768 ****
## 2 MI21 3.13 0.00393 0.00491 **
##
## === Creating combined plot ===
##
## === Overall Summary ===
## # A tibble: 5 × 5
## trait n_donors_tested n_donors_significant min_pvalue max_pvalue
## <chr> <int> <int> <dbl> <dbl>
## 1 DTA 2 2 0.0000000344 0.0384
## 2 GDDA 2 2 0.0000000344 0.0384
## 3 DTS 2 2 0.0000000767 0.00491
## 4 GDDS 2 2 0.0000000768 0.00491
## 5 PH 2 2 0.000153 0.00182
cat("\n=== Analysis Summary ===\n")
##
## === Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_traits), "\n")
## Total phenotypes evaluated: 11
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 11
cat("Successfully fitted models:", length(all_models), "\n")
## Successfully fitted models: 11
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
cat("Plots with corrected data:", nrow(sp_corrected), "\n")
output_file <- paste0(environment_name, "_spatially_corrected_phenotypes.csv")
write_csv(sp_corrected, output_file)
cat("Spatially corrected data exported to:", output_file, "\n")
} else {
cat("No corrected data to export\n")
}
## Plots with corrected data: 64
## Spatially corrected data exported to: CLY2025_spatially_corrected_phenotypes.csv
if (length(fit_info) > 0) {
cat("\n=== Model Fit Details ===\n")
for (trait in names(fit_info)) {
info <- fit_info[[trait]]
cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
}
}
##
## === Model Fit Details ===
## DTA: n=64, status=success
## DTS: n=64, status=success
## LAE: n=64, status=success
## PH: n=64, status=success
## EN: n=64, status=success
## SL: n=64, status=success
## BL: n=64, status=success
## BW: n=64, status=success
## EBA: n=64, status=success
## GDDA: n=64, status=success
## GDDS: n=64, status=success
if (exists("sp_corrected") && nrow(sp_corrected) > 0) {
corrected_phenotypes <- intersect(usable_phenotypes, colnames(sp_corrected))
corrected_phenotypes <- c("DTA", "DTS",
"GDDA","GDDS",
"PH","EBA",
"BW","BL",
"LAE","EN")
if (length(corrected_phenotypes) > 0) {
comparison_genotypes <- unique(sp_corrected$genotype)
cat("Available genotypes:", paste(comparison_genotypes, collapse = ", "), "\n")
donors <- unique(sp_corrected$donor)
cat("Available donors:", paste(donors, collapse = ", "), "\n")
htest <- sp_corrected %>%
pivot_longer(
cols = all_of(corrected_phenotypes),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
group_by(donor, trait) %>%
filter(n_distinct(genotype) == 2) %>%
t_test(value ~ genotype) %>%
adjust_pvalue(method = "fdr") %>%
add_y_position(scales = "free") %>%
add_significance()
print(htest)
pheno_theme2 <- theme_classic2(base_size = 22) +
theme(
plot.title = element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = element_markdown(),
axis.title.x = element_blank(),
axis.text.x = element_text(size = 18, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = element_markdown(size = 18, face = "bold"),
legend.position = "none"
)
trait_plots <- list()
for (current_trait in corrected_phenotypes) {
cat("\n=== Processing trait:", current_trait, "===\n")
plot_data_trait <- sp_corrected %>%
select(donor, genotype, all_of(current_trait)) %>%
rename(value = all_of(current_trait)) %>%
filter(!is.na(value))
test_to_plot_trait <- htest %>%
filter(trait == current_trait)
trait_plot <- ggplot(plot_data_trait,
aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) +
geom_quasirandom(size = 2.5) +
scale_color_manual(values = c("CTRL" = "gold", "INV4M" = "purple4")) +
labs(
title = current_trait,
y = paste("Corrected", current_trait),
x = "Genotype"
) +
stat_pvalue_manual(
test_to_plot_trait,
tip.length = 0.01,
step.height = 0.08,
size = 8,
bracket.size = 0.8
) +
scale_y_continuous(expand = expansion(mult = c(0.1, 0.15))) +
facet_wrap(~ donor, ncol = 2) +
pheno_theme2
trait_plots[[current_trait]] <- trait_plot
cat("\n--- Summary for", current_trait, "---\n")
summary_stats_trait <- plot_data_trait %>%
group_by(donor, genotype) %>%
summarise(
n = n(),
mean = round(mean(value, na.rm = TRUE), 3),
sd = round(sd(value, na.rm = TRUE), 3),
.groups = "drop"
) %>%
pivot_wider(
names_from = genotype,
values_from = c(n, mean, sd),
names_sep = "_"
)
print(summary_stats_trait)
sig_results_trait <- test_to_plot_trait %>%
select(donor, statistic, p, p.adj, p.adj.signif) %>%
arrange(p.adj)
cat("Statistical test results for", current_trait, ":\n")
print(sig_results_trait)
}
if (length(trait_plots) > 0) {
cat("\n=== Creating combined plot ===\n")
n_traits <- length(trait_plots)
nrow_arrange <- ceiling(n_traits / 2)
combined_plot <- ggarrange(
plotlist = trait_plots,
ncol = 2,
nrow = nrow_arrange,
common.legend = TRUE,
legend = "bottom"
) %>%
annotate_figure(
top = text_grob(paste(environment_name,
"— Spatially Corrected Phenotypes"),
color = "black",
face = "bold",
size = 18)
)
print(combined_plot)
}
cat("\n=== Overall Summary Across All Traits and Donors ===\n")
alpha <- 0.05
overall_sig <- htest %>%
mutate(significant = p.adj < alpha) %>%
group_by(trait) %>%
summarise(
n_donors_tested = n(),
n_donors_significant = sum(significant),
min_pvalue = min(p.adj),
max_pvalue = max(p.adj),
.groups = "drop"
) %>%
arrange(desc(n_donors_significant), min_pvalue)
print(overall_sig)
} else {
message("No corrected phenotypes available for analysis")
}
} else {
message("No spatially corrected data available")
}
## Available genotypes: INV4M, CTRL
## Available donors: TMEX, MI21
## # A tibble: 20 × 14
## donor trait .y. group1 group2 n1 n2 statistic df p p.adj
## <chr> <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 BL value CTRL INV4M 16 16 5.95 29.9 1.63e-6 5.43e-6
## 2 MI21 BW value CTRL INV4M 16 16 5.64 30.0 3.86e-6 1.10e-5
## 3 MI21 DTA value CTRL INV4M 16 16 2.19 29.7 3.62e-2 4.3 e-2
## 4 MI21 DTS value CTRL INV4M 16 16 3.16 30.0 3.57e-3 5.95e-3
## 5 MI21 EBA value CTRL INV4M 16 16 6.97 29.6 1.04e-7 4.16e-7
## 6 MI21 EN value CTRL INV4M 16 16 -0.903 29.8 3.74e-1 3.74e-1
## 7 MI21 GDDA value CTRL INV4M 16 16 2.17 29.6 3.84e-2 4.3 e-2
## 8 MI21 GDDS value CTRL INV4M 16 16 3.13 30.0 3.93e-3 6.05e-3
## 9 MI21 LAE value CTRL INV4M 16 16 -2.16 29.7 3.87e-2 4.3 e-2
## 10 MI21 PH value CTRL INV4M 16 16 3.62 29.7 1.09e-3 1.98e-3
## 11 TMEX BL value CTRL INV4M 16 16 1.17 29.9 2.52e-1 2.65e-1
## 12 TMEX BW value CTRL INV4M 16 16 2.88 26.7 7.79e-3 1.11e-2
## 13 TMEX DTA value CTRL INV4M 16 16 -8.05 29.3 6.61e-9 6.89e-8
## 14 TMEX DTS value CTRL INV4M 16 16 -7.86 26.2 2.3 e-8 1.53e-7
## 15 TMEX EBA value CTRL INV4M 16 16 2.62 29.5 1.37e-2 1.83e-2
## 16 TMEX EN value CTRL INV4M 16 16 -4.25 27.9 2.15e-4 4.3 e-4
## 17 TMEX GDDA value CTRL INV4M 16 16 -8.04 29.2 6.89e-9 6.89e-8
## 18 TMEX GDDS value CTRL INV4M 16 16 -7.77 26.0 3.07e-8 1.53e-7
## 19 TMEX LAE value CTRL INV4M 16 16 4.54 29.1 8.97e-5 1.99e-4
## 20 TMEX PH value CTRL INV4M 16 16 -4.70 25.5 7.66e-5 1.92e-4
## # ℹ 3 more variables: y.position <dbl>, groups <named list>, p.adj.signif <chr>
##
## === Processing trait: DTA ===
##
## --- Summary for DTA ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 77.7 77.3 0.615 0.552
## 2 TMEX 16 16 77.7 79.3 0.545 0.639
## Statistical test results for DTA :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -8.05 0.00000000661 0.0000000689 ****
## 2 MI21 2.19 0.0362 0.043 *
##
## === Processing trait: DTS ===
##
## --- Summary for DTS ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 78.4 77.6 0.638 0.66
## 2 TMEX 16 16 78.8 80.6 0.503 0.75
## Statistical test results for DTS :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -7.86 0.000000023 0.000000153 ****
## 2 MI21 3.16 0.00357 0.00595 **
##
## === Processing trait: GDDA ===
##
## --- Summary for GDDA ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 877. 870. 9.68 8.60
## 2 TMEX 16 16 877. 904. 8.70 10.3
## Statistical test results for GDDA :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -8.04 0.00000000689 0.0000000689 ****
## 2 MI21 2.17 0.0384 0.043 *
##
## === Processing trait: GDDS ===
##
## --- Summary for GDDS ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 888. 876. 10.1 10.4
## 2 TMEX 16 16 895. 924. 8.23 12.5
## Statistical test results for GDDS :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -7.77 0.0000000307 0.000000154 ****
## 2 MI21 3.13 0.00393 0.00605 **
##
## === Processing trait: PH ===
##
## --- Summary for PH ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 187. 183. 3.50 3.86
## 2 TMEX 16 16 194. 204. 4.74 7.40
## Statistical test results for PH :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -4.70 0.0000766 0.000192 ***
## 2 MI21 3.62 0.00109 0.00198 **
##
## === Processing trait: EBA ===
##
## --- Summary for EBA ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 386. 338. 20.6 18.4
## 2 TMEX 16 16 393. 368. 24.5 27.8
## Statistical test results for EBA :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 MI21 6.97 0.000000104 0.000000416 ****
## 2 TMEX 2.62 0.0137 0.0183 *
##
## === Processing trait: BW ===
##
## --- Summary for BW ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 8.78 8.14 0.32 0.321
## 2 TMEX 16 16 8.62 8.19 0.336 0.485
## Statistical test results for BW :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 MI21 5.64 0.00000386 0.0000110 ****
## 2 TMEX 2.88 0.00779 0.0111 *
##
## === Processing trait: BL ===
##
## --- Summary for BL ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 58.3 55.3 1.49 1.39
## 2 TMEX 16 16 60.6 60.0 1.63 1.56
## Statistical test results for BL :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 MI21 5.95 0.00000163 0.00000543 ****
## 2 TMEX 1.17 0.252 0.265 ns
##
## === Processing trait: LAE ===
##
## --- Summary for LAE ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 5.89 6.02 0.157 0.172
## 2 TMEX 16 16 6.03 5.73 0.203 0.171
## Statistical test results for LAE :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX 4.54 0.0000897 0.000199 ***
## 2 MI21 -2.16 0.0387 0.043 *
##
## === Processing trait: EN ===
##
## --- Summary for EN ---
## # A tibble: 2 × 7
## donor n_CTRL n_INV4M mean_CTRL mean_INV4M sd_CTRL sd_INV4M
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 MI21 16 16 2.37 2.43 0.185 0.201
## 2 TMEX 16 16 2.20 2.53 0.183 0.244
## Statistical test results for EN :
## # A tibble: 2 × 5
## donor statistic p p.adj p.adj.signif
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 TMEX -4.25 0.000215 0.00043 ***
## 2 MI21 -0.903 0.374 0.374 ns
##
## === Creating combined plot ===
##
## === Overall Summary Across All Traits and Donors ===
## # A tibble: 10 × 5
## trait n_donors_tested n_donors_significant min_pvalue max_pvalue
## <chr> <int> <int> <dbl> <dbl>
## 1 DTA 2 2 0.0000000689 0.043
## 2 GDDA 2 2 0.0000000689 0.043
## 3 DTS 2 2 0.000000153 0.00595
## 4 GDDS 2 2 0.000000154 0.00605
## 5 EBA 2 2 0.000000416 0.0183
## 6 BW 2 2 0.0000110 0.0111
## 7 PH 2 2 0.000192 0.00198
## 8 LAE 2 2 0.000199 0.043
## 9 BL 2 1 0.00000543 0.265
## 10 EN 2 1 0.00043 0.374