Preliminary spatial correction of PA2024 (PSU2024) field phenotypes,
mirroring the PA2022 pipeline
(Corrected_phenotype_analysis_PSU2022.Rmd):
Rep and P_LEVEL as fixed factors.The field is a 2×2 of P_SQUAREs: P_SQ 1 = bottom-left (LowP), 2 = bottom-right (HighP), 3 = top-left (LowP), 4 = top-right (HighP). Each P_SQUARE is 10 rows × 24 cols, so the assembled field is 20 rows × 48 cols. RS 2001 sits at (X=1, Y=1) and RS 2960 at (X=48, Y=20).
library(tidyverse)
library(ggtext)
library(ggfx)
library(rstatix)
library(ggpubr)
library(SpATS)
library(statgenHTP)
library(ggbeeswarm)
library(cowplot)
library(emmeans)
DATA_DIR <- paths$data
plant_csv <- file.path(DATA_DIR, "PSU2024.csv")
field_raw <- read_csv(plant_csv, show_col_types = FALSE)
## Assembled X/Y physical coords (2x2 layout)
n_col_psq <- max(field_raw$COLUMN)
n_row_psq <- max(field_raw$ROW)
psq_x_offset <- c(`1` = 0L, `3` = 0L, `2` = n_col_psq, `4` = n_col_psq)
psq_y_offset <- c(`1` = 0L, `2` = 0L, `3` = n_row_psq, `4` = n_row_psq)
field_data <- field_raw %>%
rename(rowid = RS) %>%
mutate(
X = psq_x_offset[as.character(P_SQUARE)] + COLUMN,
Y = psq_y_offset[as.character(P_SQUARE)] + ROW,
Treatment = factor(P_LEVEL, levels = c("HIGH_P", "LOW_P")),
Rep = as.factor(REP),
Genotype = as.character(GENOTYPE)
) %>%
mutate(Treatment = fct_recode(Treatment, "+P" = "HIGH_P", "-P" = "LOW_P"))
cat("Field data dims:", dim(field_data), "\n")
## Field data dims: 960 90
cat("X range:", range(field_data$X), " Y range:", range(field_data$Y), "\n")
## X range: 1 48 Y range: 1 20
The PSU2024 trial has two biological tiers:
addCheck): every entry that is not part
of the segregating NIL mapping population. That is B73,
Inv4_B73, Inv4_MEX, Inv4_Mi21,
Inv4_PT, Oh43, Z031E0591,
Z031E0594 — eight entries total.NIL_xxxx segregating lines. Residual / BLUP normality
applies within this cohort.Rep,
P_LEVEL, spatial smooth on assembled X/Y.The focal contrast is Inv4_Mi21 vs
Inv4_B73. Both are checks (fixed effects), so their BLUEs
and the difference SE come straight out of the check coefficient
block.
For this paper, the contrast of interest is CTRL =
Inv4_B73 vs Inv4m =
Inv4_Mi21. Each entry has a donor
(the source of the Inv4m introgression at chromosome 4 —
B73/Mi21/MEX/PT)
that is parsed from the GENOTYPE label where possible.
check_set <- c("B73", "Inv4_B73", "Inv4_MEX", "Inv4_Mi21", "Inv4_PT",
"Oh43", "Z031E0591", "Z031E0594")
focal_pair <- c("Inv4_B73", "Inv4_Mi21")
donor_lookup <- c(
"B73" = "B73",
"Inv4_B73" = "B73",
"Inv4_MEX" = "MEX",
"Inv4_Mi21" = "Mi21",
"Inv4_PT" = "PT",
"Oh43" = "Oh43",
"Z031E0591" = "hybrid_check",
"Z031E0594" = "hybrid_check"
)
## NIL_xxxx donor not encoded in the genotype code — flagged as "NIL_pop" for now
## (user: replace if/when an external donor key becomes available)
field_data <- field_data %>%
mutate(
geno_class = if_else(Genotype %in% check_set, "Check", "NILpop"),
donor = unname(donor_lookup[Genotype]),
donor = if_else(is.na(donor), "NIL_pop", donor),
geno_label = case_when(
Genotype == "Inv4_B73" ~ "CTRL",
Genotype == "Inv4_Mi21" ~ "Inv4m",
TRUE ~ NA_character_
),
Genotype = factor(Genotype)
)
cat("\n--- genotype tier summary ---\n")
##
## --- genotype tier summary ---
print(field_data %>% count(geno_class))
## # A tibble: 2 × 2
## geno_class n
## <chr> <int>
## 1 Check 200
## 2 NILpop 760
print(field_data %>% filter(geno_class == "Check") %>%
count(Genotype, donor))
## # A tibble: 8 × 3
## Genotype donor n
## <fct> <chr> <int>
## 1 B73 B73 40
## 2 Inv4_B73 B73 20
## 3 Inv4_MEX MEX 20
## 4 Inv4_Mi21 Mi21 20
## 5 Inv4_PT PT 20
## 6 Oh43 Oh43 40
## 7 Z031E0591 hybrid_check 20
## 8 Z031E0594 hybrid_check 20
cat("\nFocal contrast: ",
"CTRL = ", focal_pair[1], ", Inv4m = ", focal_pair[2], "\n", sep = "")
##
## Focal contrast: CTRL = Inv4_B73, Inv4m = Inv4_Mi21
base_phenotypes <- c("PH", "DTA", "DTS", "SDW_11wk", "SDW_H")
ear_phenotypes <- c("TEW", "TEN", "TGW", "GWE", "HGW", "TGN", "AMF")
ndvi_phenotypes <- c("NDVI_T1", "NDVI_T2")
all_phenotypes <- intersect(
unique(c(base_phenotypes, ear_phenotypes, ndvi_phenotypes)),
names(field_data)
)
## Strip stray spaces (e.g. "SDW_11wk ")
fix_names <- setNames(names(field_data), trimws(names(field_data)))
if (!"SDW_11wk" %in% names(field_data) &&
"SDW_11wk " %in% names(field_data)) {
field_data <- field_data %>% rename(SDW_11wk = `SDW_11wk `)
}
missing_summary <- field_data %>%
select(all_of(all_phenotypes)) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "phenotype",
values_to = "missing_count") %>%
mutate(total_obs = nrow(field_data),
missing_pct = round(missing_count / total_obs * 100, 1),
available_n = total_obs - missing_count) %>%
arrange(missing_pct)
print(missing_summary)
## # A tibble: 14 × 5
## phenotype missing_count total_obs missing_pct available_n
## <chr> <int> <int> <dbl> <int>
## 1 NDVI_T1 0 960 0 960
## 2 NDVI_T2 0 960 0 960
## 3 PH 4 960 0.4 956
## 4 DTA 4 960 0.4 956
## 5 TEN 35 960 3.6 925
## 6 TGW 35 960 3.6 925
## 7 TEW 36 960 3.8 924
## 8 GWE 37 960 3.9 923
## 9 HGW 42 960 4.4 918
## 10 TGN 42 960 4.4 918
## 11 AMF 857 960 89.3 103
## 12 SDW_H 881 960 91.8 79
## 13 DTS 960 960 100 0
## 14 SDW_11wk 960 960 100 0
usable_phenotypes <- missing_summary %>%
filter(missing_pct < 50) %>% pull(phenotype)
cat("\nUsable phenotypes (<50% missing):",
paste(usable_phenotypes, collapse = ", "), "\n")
##
## Usable phenotypes (<50% missing): NDVI_T1, NDVI_T2, PH, DTA, TEN, TGW, TEW, GWE, HGW, TGN
environment_name <- "PSU2024"
single_time <- field_data %>%
filter(!is.na(X), !is.na(Y)) %>%
mutate(time = as.POSIXct("2024-05-22 10:00:00"))
tp_data <- createTimePoints(
dat = single_time,
experimentName = environment_name,
genotype = "Genotype",
timePoint = "time",
plotId = "rowid",
repId = "Rep",
rowNum = "Y",
colNum = "X",
checkGenotypes = check_set,
addCheck = TRUE
)
summary(tp_data)
## tp_data contains data for experiment PSU2024.
##
## It contains 1 time points.
## First time point: 2024-05-22 10:00:00
## Last time point: 2024-05-22 10:00:00
##
## The following genotypes are defined as check genotypes: B73, Inv4_B73, Inv4_MEX, Inv4_Mi21, Inv4_PT, Oh43, Z031E0591, Z031E0594.
corrected_list <- list()
fit_info <- list()
all_models <- list()
for (phen in usable_phenotypes) {
message("Fitting SpATS model for: ", phen)
no_nas <- single_time %>% filter(!is.na(.data[[phen]]))
if (nrow(no_nas) < 10) {
message("Insufficient data for ", phen, " (n=", nrow(no_nas), ")")
next
}
fit_try <- try(
fitModels(
TP = tp_data,
trait = phen,
extraFixedFactors = c("repId", "Treatment"),
timePoints = 1,
what = "random" # NIL_xxxx random; checks fixed via addCheck
),
silent = TRUE
)
if (inherits(fit_try, "try-error")) {
fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "failed")
next
}
all_models[[phen]] <- fit_try
corr <- getCorrected(fit_try) %>%
rename(!!phen := !!phen)
corrected_list[[phen]] <- corr %>%
select(plotId, genotype, !!phen)
fit_info[[phen]] <- list(n_obs = nrow(no_nas), status = "ok")
}
sp_corrected <- Reduce(function(a, b)
full_join(a, b, by = c("plotId", "genotype")),
corrected_list)
## Attach Treatment, geno_class, donor, geno_label back on
sp_corrected <- sp_corrected %>%
mutate(plotId = as.character(plotId)) %>%
left_join(field_data %>%
mutate(rowid = as.character(rowid)) %>%
select(rowid, Treatment, geno_class, donor, geno_label),
by = c("plotId" = "rowid"))
cat("Plots with corrected data:", nrow(sp_corrected), "\n")
cat("Traits with corrected values:",
paste(intersect(usable_phenotypes, names(sp_corrected)), collapse = ", "),
"\n")
if ("PH" %in% names(all_models)) {
plot(all_models$PH, timePoints = 1)
plot(all_models$PH, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
if ("DTA" %in% names(all_models)) {
plot(all_models$DTA, timePoints = 1)
plot(all_models$DTA, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
if ("DTS" %in% names(all_models)) {
plot(all_models$DTS, timePoints = 1)
plot(all_models$DTS, timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
For each trait, rstatix::t_test compares the spatially-
and nuisance-corrected plot values for Inv4_B73 (20 plots)
vs Inv4_Mi21 (20 plots), FDR-adjusted across traits. This
is the same approach as PSU2022.
We also extract the genotype BLUEs from getGenoPred()
for the CSV output — useful as a summary of each genotype’s
model-implied mean, but not what drives the panel
significance stars.
get_focal_blues <- function(model_list, focal_pair) {
out <- list()
for (trait in names(model_list)) {
pred <- try(getGenoPred(model_list[[trait]]), silent = TRUE)
if (inherits(pred, "try-error")) next
pred_df <- pred$genoPred
if (!all(focal_pair %in% pred_df$genotype)) next
sub <- pred_df %>%
filter(genotype %in% focal_pair) %>%
select(genotype, predicted.values, standard.errors)
if (nrow(sub) != 2) next
ctrl <- sub %>% filter(genotype == focal_pair[1])
inv4m <- sub %>% filter(genotype == focal_pair[2])
out[[trait]] <- tibble(
trait = trait,
BLUE_CTRL = ctrl$predicted.values,
SE_CTRL = ctrl$standard.errors,
BLUE_Inv4m = inv4m$predicted.values,
SE_Inv4m = inv4m$standard.errors
)
}
bind_rows(out)
}
blue_df <- get_focal_blues(all_models, focal_pair)
focal_corrected <- sp_corrected %>%
filter(genotype %in% focal_pair) %>%
mutate(genotype = factor(geno_label, levels = c("CTRL", "Inv4m")),
Treatment = factor(Treatment))
## Per-trait lm + emmeans: marginal Genotype contrast (pooled over P) and
## conditional Genotype contrasts within each P_LEVEL. G x T interaction p
## stored as diagnostic (NOT in FDR pool).
fit_one_trait <- function(trait_name, data) {
d <- data %>%
pivot_longer(cols = all_of(trait_name),
names_to = "trait", values_to = "value") %>%
filter(!is.na(value))
if (nrow(d) < 4 || length(unique(d$genotype)) < 2 ||
length(unique(d$Treatment)) < 2) return(NULL)
m <- tryCatch(lm(value ~ genotype * Treatment, data = d),
error = function(e) NULL)
if (is.null(m)) return(NULL)
# G x T interaction p-value (diagnostic only)
anov <- anova(m)
p_int <- if ("genotype:Treatment" %in% rownames(anov))
anov["genotype:Treatment", "Pr(>F)"] else NA_real_
marg <- pairs(emmeans(m, ~ genotype), reverse = TRUE) %>%
as_tibble() %>%
transmute(trait = trait_name,
contrast_type = "marginal",
contrast_level = "marginal",
estimate, SE, df, t = t.ratio, p = p.value,
p_int_GxT = p_int)
cond <- pairs(emmeans(m, ~ genotype | Treatment), reverse = TRUE) %>%
as_tibble() %>%
transmute(trait = trait_name,
contrast_type = "conditional",
contrast_level = paste0("Genotype|", Treatment),
estimate, SE, df, t = t.ratio, p = p.value,
p_int_GxT = p_int)
bind_rows(marg, cond)
}
## restrict to traits that actually have SpATS-corrected columns
fitted_traits <- intersect(usable_phenotypes, names(focal_corrected))
all_contrasts <- lapply(fitted_traits, fit_one_trait,
data = focal_corrected) %>%
bind_rows()
## Single FDR pool: all (trait x contrast) rows together
all_contrasts <- all_contrasts %>%
mutate(p_adj = p.adjust(p, method = "fdr"),
signif = case_when(
p_adj < 0.001 ~ "***",
p_adj < 0.01 ~ "**",
p_adj < 0.05 ~ "*",
TRUE ~ "ns"
))
cat("FDR pool size:", nrow(all_contrasts), "contrasts\n")
## FDR pool size: 27 contrasts
cat("Significant (FDR < 0.05):",
sum(all_contrasts$p_adj < 0.05), "rows\n\n")
## Significant (FDR < 0.05): 10 rows
all_contrasts %>%
arrange(p_adj) %>%
select(trait, contrast_type, contrast_level,
estimate, SE, df, t,
p, p_adj, signif, p_int_GxT) %>%
knitr::kable(digits = 4,
caption = "All 27 contrasts (9 traits x 3 contrast types). p = nominal, p_adj = BH-FDR across the 27-row pool.")
| trait | contrast_type | contrast_level | estimate | SE | df | t | p | p_adj | signif | p_int_GxT |
|---|---|---|---|---|---|---|---|---|---|---|
| GWE | marginal | marginal | 46.9254 | 9.3273 | 34 | 5.0310 | 0.0000 | 0.0004 | *** | 0.1691 |
| GWE | conditional | Genotype|-P | 60.0308 | 13.1907 | 34 | 4.5510 | 0.0001 | 0.0009 | *** | 0.1691 |
| TGW | marginal | marginal | 194.3167 | 52.0284 | 35 | 3.7348 | 0.0007 | 0.0060 | ** | 0.5053 |
| PH | conditional | Genotype|+P | 24.0000 | 6.9932 | 36 | 3.4319 | 0.0015 | 0.0103 | * | 0.0270 |
| TGN | marginal | marginal | 840.8634 | 254.4127 | 35 | 3.3051 | 0.0022 | 0.0119 | * | 0.7543 |
| TGW | conditional | Genotype|-P | 229.3333 | 74.5670 | 35 | 3.0755 | 0.0041 | 0.0183 | * | 0.5053 |
| NDVI_T2 | conditional | Genotype|-P | -0.0480 | 0.0174 | 36 | -2.7649 | 0.0089 | 0.0344 | * | 0.1405 |
| PH | marginal | marginal | 12.6000 | 4.9449 | 36 | 2.5481 | 0.0152 | 0.0412 | * | 0.0270 |
| GWE | conditional | Genotype|+P | 33.8200 | 13.1907 | 34 | 2.5639 | 0.0149 | 0.0412 | * | 0.1691 |
| TGN | conditional | Genotype|+P | 921.1220 | 354.8985 | 35 | 2.5955 | 0.0137 | 0.0412 | * | 0.7543 |
| NDVI_T2 | marginal | marginal | -0.0295 | 0.0123 | 36 | -2.4031 | 0.0215 | 0.0529 | ns | 0.1405 |
| NDVI_T1 | marginal | marginal | -0.0585 | 0.0268 | 36 | -2.1811 | 0.0358 | 0.0743 | ns | 0.4280 |
| TGW | conditional | Genotype|+P | 159.3000 | 72.5782 | 35 | 2.1949 | 0.0349 | 0.0743 | ns | 0.5053 |
| NDVI_T1 | conditional | Genotype|+P | -0.0800 | 0.0379 | 36 | -2.1091 | 0.0420 | 0.0798 | ns | 0.4280 |
| TGN | conditional | Genotype|-P | 760.6048 | 364.6236 | 35 | 2.0860 | 0.0443 | 0.0798 | ns | 0.7543 |
| HGW | conditional | Genotype|+P | -3.1000 | 2.3041 | 35 | -1.3454 | 0.1871 | 0.3158 | ns | 0.1002 |
| TEN | conditional | Genotype|-P | -0.2889 | 0.2341 | 35 | -1.2338 | 0.2255 | 0.3581 | ns | 0.3826 |
| HGW | conditional | Genotype|-P | 2.4778 | 2.3673 | 35 | 1.0467 | 0.3024 | 0.4536 | ns | 0.1002 |
| NDVI_T1 | conditional | Genotype|-P | -0.0370 | 0.0379 | 36 | -0.9755 | 0.3358 | 0.4772 | ns | 0.4280 |
| TEN | marginal | marginal | -0.1444 | 0.1634 | 35 | -0.8842 | 0.3826 | 0.5166 | ns | 0.3826 |
| NDVI_T2 | conditional | Genotype|+P | -0.0110 | 0.0174 | 36 | -0.6336 | 0.5303 | 0.6746 | ns | 0.1405 |
| DTA | conditional | Genotype|-P | -0.8000 | 1.3246 | 36 | -0.6040 | 0.5496 | 0.6746 | ns | 0.4597 |
| DTA | conditional | Genotype|+P | 0.6000 | 1.3246 | 36 | 0.4530 | 0.6533 | 0.7669 | ns | 0.4597 |
| PH | conditional | Genotype|-P | 1.2000 | 6.9932 | 36 | 0.1716 | 0.8647 | 0.9339 | ns | 0.0270 |
| HGW | marginal | marginal | -0.3111 | 1.6517 | 35 | -0.1884 | 0.8517 | 0.9339 | ns | 0.1002 |
| DTA | marginal | marginal | -0.1000 | 0.9366 | 36 | -0.1068 | 0.9156 | 0.9508 | ns | 0.4597 |
| TEN | conditional | Genotype|+P | 0.0000 | 0.2279 | 35 | 0.0000 | 1.0000 | 1.0000 | ns | 0.3826 |
## contrast_df = marginal row per trait — drives the panel star
contrast_df <- all_contrasts %>%
filter(contrast_type == "marginal") %>%
select(trait, estimate, SE, df, t, p, p_adj, signif, p_int_GxT) %>%
rename(effect = estimate, se_diff = SE) %>%
left_join(blue_df, by = "trait") %>%
mutate(CTRL = focal_pair[1],
Inv4m = focal_pair[2],
donor_CTRL = donor_lookup[focal_pair[1]],
donor_Inv4m = donor_lookup[focal_pair[2]])
contrast_df %>%
arrange(p_adj) %>%
select(trait, effect, se_diff, t, p, p_adj, signif, p_int_GxT) %>%
knitr::kable(digits = 4,
caption = "Marginal Inv4m - CTRL contrast per trait (panel-star source). p = nominal, p_adj = BH-FDR across the 27-row pool from above.")
| trait | effect | se_diff | t | p | p_adj | signif | p_int_GxT |
|---|---|---|---|---|---|---|---|
| GWE | 46.9254 | 9.3273 | 5.0310 | 0.0000 | 0.0004 | *** | 0.1691 |
| TGW | 194.3167 | 52.0284 | 3.7348 | 0.0007 | 0.0060 | ** | 0.5053 |
| TGN | 840.8634 | 254.4127 | 3.3051 | 0.0022 | 0.0119 | * | 0.7543 |
| PH | 12.6000 | 4.9449 | 2.5481 | 0.0152 | 0.0412 | * | 0.0270 |
| NDVI_T2 | -0.0295 | 0.0123 | -2.4031 | 0.0215 | 0.0529 | ns | 0.1405 |
| NDVI_T1 | -0.0585 | 0.0268 | -2.1811 | 0.0358 | 0.0743 | ns | 0.4280 |
| TEN | -0.1444 | 0.1634 | -0.8842 | 0.3826 | 0.5166 | ns | 0.3826 |
| HGW | -0.3111 | 1.6517 | -0.1884 | 0.8517 | 0.9339 | ns | 0.1002 |
| DTA | -0.1000 | 0.9366 | -0.1068 | 0.9156 | 0.9508 | ns | 0.4597 |
cat("\nTraits with significant G x T interaction (raw p < 0.05):\n")
##
## Traits with significant G x T interaction (raw p < 0.05):
contrast_df %>%
filter(p_int_GxT < 0.05) %>%
select(trait, p_int_GxT) %>%
arrange(p_int_GxT) %>%
knitr::kable(digits = 4)
| trait | p_int_GxT |
|---|---|
| PH | 0.027 |
Each panel shows the spatially- and nuisance-corrected plot-level
values for the 20 Inv4_B73 and 20 Inv4_Mi21
plots. The significance star is the FDR-adjusted Welch t-test from
§8.
## Order panels by FDR-adjusted marginal contrast p (most significant first),
## with any unfitted traits appended at the end
main_traits <- c(
contrast_df %>% arrange(p_adj) %>% pull(trait),
setdiff(usable_phenotypes,
contrast_df %>% pull(trait))
)
unit_lookup <- c(PH = "cm", DTA = "Days", DTS = "Days",
SDW_H = "g", GWE = "g", HGW = "g", TGW = "g",
SDW_11wk = "g", TEW = "g",
TGN = "count", TEN = "count",
NDVI_T1 = "index", NDVI_T2 = "index",
AMF = "")
## Descriptive y-axis labels override the bare unit for selected traits
axis_label_lookup <- c(
TGW = "Total grain weight (g)",
GWE = "Grain weight / ear (g)",
TGN = "Total grain number"
)
trait_units <- setNames(
vapply(main_traits,
function(tr) {
if (tr %in% names(axis_label_lookup)) axis_label_lookup[[tr]]
else if (tr %in% names(unit_lookup)) unit_lookup[[tr]]
else ""
},
character(1)),
main_traits)
plot_data_main <- focal_corrected %>%
pivot_longer(cols = any_of(usable_phenotypes),
names_to = "trait", values_to = "value") %>%
filter(trait %in% main_traits, !is.na(value)) %>%
mutate(trait = factor(trait, levels = main_traits))
pheno_theme_panel <- theme_classic2(base_size = 20) +
theme(
plot.title = element_blank(),
axis.title.y = element_text(size = 18),
axis.title.x = element_blank(),
axis.text.x = element_markdown(size = 18, face = "bold",
color = "black"),
axis.text.y = element_text(size = 16),
strip.background = element_blank(),
strip.text = element_text(size = 20, face = "bold"),
legend.position = "none"
)
make_trait_plot <- function(trait_name, data, contrast_tbl, units) {
d <- filter(data, trait == trait_name)
if (!nrow(d)) return(NULL)
cr <- contrast_tbl %>% filter(trait == trait_name)
star <- if (nrow(cr)) cr$signif else "ns"
y_label <- units[trait_name]
ymax <- max(d$value, na.rm = TRUE)
yrng <- diff(range(d$value, na.rm = TRUE))
bracket_df <- tibble(
group1 = "CTRL",
group2 = "Inv4m",
y.position = ymax + 0.10 * yrng,
label = star
)
ggplot(d, aes(x = genotype, y = value, color = genotype)) +
geom_boxplot(width = 0.25, linewidth = 1.5, alpha = 0) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
geom_quasirandom(size = 2) %>%
with_shadow(colour = "black", x_offset = 0, y_offset = 0, sigma = 1) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
scale_x_discrete(labels = c("CTRL" = "CTRL", "Inv4m" = "*Inv4m*")) +
stat_pvalue_manual(bracket_df,
tip.length = 0.01,
size = 8,
bracket.size = 0.8,
inherit.aes = FALSE) +
labs(title = trait_name, y = y_label, x = NULL) +
scale_y_continuous(expand = expansion(mult = c(0.10, 0.20))) +
pheno_theme_panel +
theme(plot.title = element_text(size = 20, face = "bold", hjust = 0.5))
}
trait_plots <- Filter(Negate(is.null),
lapply(main_traits, make_trait_plot,
data = plot_data_main, contrast_tbl = contrast_df,
units = trait_units))
panel_ncol <- 3L
panel_nrow <- ceiling(length(trait_plots) / panel_ncol)
main_panel <- cowplot::plot_grid(plotlist = trait_plots,
nrow = panel_nrow, ncol = panel_ncol,
align = "hv", axis = "tblr")
print(main_panel)
ggsave(file.path(paths$figures, "PSU2024_field_phenotypes_panel.png"),
main_panel,
width = 3.5 * panel_ncol,
height = 3.5 * panel_nrow,
dpi = 300)
cat("Saved panel to:",
file.path(paths$figures, "PSU2024_field_phenotypes_panel.png"), "\n")
## Saved panel to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/PSU2024_field_phenotypes_panel.png
To compare effects across traits on a common scale, we standardize each BLUE difference by the pooled within-genotype SD of the spatially corrected plot-level values for Inv4_B73 and Inv4_Mi21 (20 plots each). With n=20 per group, we also report Hedges’ g (small-sample correction).
focal_long <- sp_corrected %>%
filter(genotype %in% focal_pair) %>%
pivot_longer(cols = any_of(usable_phenotypes),
names_to = "trait", values_to = "value") %>%
filter(!is.na(value))
pooled_sd_df <- focal_long %>%
group_by(trait, genotype) %>%
summarise(sd_g = sd(value), n_g = n(), .groups = "drop") %>%
group_by(trait) %>%
summarise(pooled_sd = sqrt(sum((n_g - 1) * sd_g^2) / sum(n_g - 1)),
n_total = sum(n_g),
.groups = "drop")
contrast_df <- contrast_df %>%
left_join(pooled_sd_df, by = "trait") %>%
mutate(cohens_d = effect / pooled_sd,
se_d = se_diff / pooled_sd,
hedges_g = cohens_d * (1 - 3 / (4 * n_total - 9)))
print(contrast_df %>%
select(trait, effect, se_diff, pooled_sd,
cohens_d, se_d, hedges_g, p_adj, signif))
## # A tibble: 9 × 9
## trait effect se_diff pooled_sd cohens_d se_d hedges_g p_adj signif
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 NDVI_T1 -0.0585 0.0268 0.122 -0.480 0.220 -0.470 0.0743 ns
## 2 NDVI_T2 -0.0295 0.0123 0.0404 -0.731 0.304 -0.717 0.0529 ns
## 3 PH 12.6 4.94 16.9 0.746 0.293 0.731 0.0412 *
## 4 DTA -0.1000 0.937 4.01 -0.0249 0.233 -0.0244 0.951 ns
## 5 TEN -0.144 0.163 0.534 -0.271 0.306 -0.265 0.517 ns
## 6 TGW 194. 52.0 209. 0.930 0.249 0.911 0.00601 **
## 7 GWE 46.9 9.33 34.8 1.35 0.268 1.32 0.000423 ***
## 8 HGW -0.311 1.65 5.61 -0.0555 0.294 -0.0543 0.934 ns
## 9 TGN 841. 254. 843. 0.997 0.302 0.977 0.0119 *
forest_df <- contrast_df %>%
arrange(cohens_d) %>%
mutate(trait = factor(trait, levels = trait))
p_forest <- ggplot(forest_df,
aes(x = cohens_d, y = trait,
xmin = cohens_d - 1.96 * se_d,
xmax = cohens_d + 1.96 * se_d)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_vline(xintercept = c(-0.8, -0.5, -0.2, 0.2, 0.5, 0.8),
linetype = "dotted", color = "grey80") +
geom_pointrange(size = 0.8, color = "purple4") +
geom_text(aes(label = signif), nudge_y = 0.25, size = 5,
fontface = "bold") +
labs(x = "Cohen's d (Inv4m − CTRL) ±95% CI",
y = NULL,
title = "PA2024 — focal contrast (standardized effect)") +
theme_classic2(base_size = 14)
print(p_forest)
ggsave(file.path(paths$figures, "PSU2024_focal_contrast_forest.png"),
p_forest, width = 7, height = 5, dpi = 300)
write_csv(contrast_df,
file.path(paths$intermediate,
paste0(environment_name, "_focal_contrast_marginal.csv")))
write_csv(all_contrasts,
file.path(paths$intermediate,
paste0(environment_name,
"_focal_contrast_all_27.csv")))
write_csv(sp_corrected,
file.path(paths$intermediate,
paste0(environment_name,
"_spatially_corrected_phenotypes.csv")))
cat("Outputs:\n ",
file.path(paths$intermediate,
paste0(environment_name, "_focal_contrast_marginal.csv")),
"\n ",
file.path(paths$intermediate,
paste0(environment_name, "_focal_contrast_all_27.csv")),
"\n ",
file.path(paths$intermediate,
paste0(environment_name, "_spatially_corrected_phenotypes.csv")),
"\n ",
file.path(paths$figures, "PSU2024_focal_contrast_forest.png"), "\n ",
file.path(paths$figures, "PSU2024_field_phenotypes_panel.png"), "\n")
## Outputs:
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2024_focal_contrast_marginal.csv
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2024_focal_contrast_all_27.csv
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/intermediate/PSU2024_spatially_corrected_phenotypes.csv
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/PSU2024_focal_contrast_forest.png
## /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/inversion_paper/figures/PSU2024_field_phenotypes_panel.png
Genotype random over the 41 NIL_xxxx lines
(residual normality applies here)B73, Inv4_B73,
Inv4_MEX, Inv4_Mi21, Inv4_PT,
Oh43, Z031E0591/4) added as checks (fixed via
addCheck = TRUE)Rep and Treatment (P_LEVEL) as fixed
nuisance termsPSU2022_phenotype_marginal_means.Rmd from the phosphorus
paper for cross-paper congruence.
lm(corrected ~ Genotype * Treatment) per trait on the
focal-pair plots (20 Inv4_B73 + 20 Inv4_Mi21 = 40 plots/trait)BLUE_* columns in the marginal-contrast CSV come
from getGenoPred() on the SpATS fit (reference summary, not
the primary test)._SPAT columns as a methods-parity check before finalizing
the Figure 2 hybrid panel.