This notebook fits a SpATS PSANOVA surface per phenotype and
treatment and returns spatially-corrected plot-level values for
each phenotype. The approach uses statgenHTP::fitModels()
(which wraps SpATS when provided a TimePoints object) and
getCorrected()
to extract corrected plot-level
observations. The workflow:
plot_data
(plot-level means)fitModels()
→
getCorrected()
for each treatmentWarning: fitting many SpATS models can be computationally intensive. Run interactively on a machine with sufficient RAM/CPU.
library(tidyverse)
library(dplyr)
library(ggplot2)
library(ggtext)
library(ggfx)
library(nlme) # Mixed-effects models (if needed)
library(gstat) # Variograms
library(rstatix)
library(ggpubr)
library(VIM)
library(knitr)
library(SpATS) # SpATS package (used internally by statgenHTP)
library(statgenHTP) # Provides fitModels() and getCorrected()
DATA_DIR <- "/Users/fvrodriguez/Desktop/inv4mHighland/data"
plant_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
ear_csv <- file.path(DATA_DIR, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field_ear_pheno.csv")
field_data_raw <- read.csv(plant_csv, na.strings = c("", "#N/A", "NA"))
ear_data_raw <- read.csv(ear_csv, na.strings = c("", "n/a", "NA"), skip = 1)
field_data <- field_data_raw %>%
filter(P22. >= 3004, P22. <= 4192) %>%
rename(
rowid = P22.,
Genotype = Who.What,
PH = Height_Anthesis,
STW40 = X40_DAP_dw,
STW50 = X50_DAP_dw,
STW60 = X60_DAP_dw,
STWHV = harvest_dw
) %>%
mutate(
# Set +P as reference level (first level)
Treatment = factor(Treatment, levels = c("HighP", "LowP")),
Genotype = factor(Genotype),
Rep = as.factor(Rep)
) %>%
mutate(
Treatment = fct_recode(Treatment, "+P" = "HighP", "-P" = "LowP"),
# Set CTRL as reference genotype
Genotype = fct_recode(Genotype, "Inv4m" = "INV4M"),
Genotype = fct_relevel(Genotype, "CTRL")
)
ear_data <- ear_data_raw %>%
select(-description, -RK, -CC, -NIR, ear_rep = rep) %>%
rename(rowid = row) %>%
arrange(rowid) %>%
group_by(rowid) %>%
select(-ear_rep) %>%
summarise_all(mean, na.rm = TRUE) %>%
droplevels()
field_data_complete <- field_data %>%
left_join(ear_data, by = "rowid") %>%
mutate(HI = TKW / STWHV) %>%
mutate(
Plot_Column = case_when(
Plot == "PlotVIII" ~ Plot_Column + 10,
Plot == "PlotVI" ~ Plot_Column,
TRUE ~ Plot_Column
)
) %>%
filter(!is.na(PH)) %>%
droplevels()
# Define phenotypes
base_phenotypes <- c("PH", "DTA", "DTS", "STW40", "STW50", "STW60", "STWHV")
ear_phenotypes <- c("EW", "EL", "ED", "KRN", "CD", "TKW", "KW50", "TKN", "HI")
all_phenotypes <- unique(c(base_phenotypes, ear_phenotypes))
plot_data <- field_data_complete %>%
group_by(rowid, Rep, Plot_Row, Plot_Column, Genotype, Treatment) %>%
summarise(
across(all_of(all_phenotypes), ~ mean(.x, na.rm = TRUE)),
n_plants = n(),
.groups = 'drop'
) %>%
mutate(across(all_of(all_phenotypes), ~ ifelse(is.nan(.x), NA, .x)))
plot_data$Plot_Row <- as.numeric(plot_data$Plot_Row)
plot_data$Plot_Column <- as.numeric(plot_data$Plot_Column)
plot_data$Rep <- as.factor(plot_data$Rep)
cat("Plot-level summary dimensions:", dim(plot_data), "\n")
## Plot-level summary dimensions: 127 23
cat("Average plants per plot:", round(mean(plot_data$n_plants, na.rm = TRUE), 1), "\n")
## Average plants per plot: 1
missing_summary <- plot_data %>%
select(all_of(all_phenotypes)) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
mutate(
total_obs = nrow(plot_data),
missing_pct = round(missing_count / total_obs * 100, 1),
available_n = total_obs - missing_count
) %>%
arrange(missing_pct)
print(missing_summary)
## # A tibble: 16 × 5
## phenotype missing_count total_obs missing_pct available_n
## <chr> <int> <int> <dbl> <int>
## 1 PH 0 127 0 127
## 2 DTA 0 127 0 127
## 3 DTS 0 127 0 127
## 4 STW40 2 127 1.6 125
## 5 STWHV 2 127 1.6 125
## 6 STW50 15 127 11.8 112
## 7 STW60 20 127 15.7 107
## 8 EW 22 127 17.3 105
## 9 EL 22 127 17.3 105
## 10 ED 22 127 17.3 105
## 11 KRN 22 127 17.3 105
## 12 CD 22 127 17.3 105
## 13 TKW 22 127 17.3 105
## 14 KW50 22 127 17.3 105
## 15 TKN 22 127 17.3 105
## 16 HI 22 127 17.3 105
usable_phenotypes <- missing_summary %>%
filter(missing_pct < 50) %>%
pull(phenotype)
cat("Usable phenotypes (<50% missing):", paste(usable_phenotypes, collapse = ", "), "\n")
## Usable phenotypes (<50% missing): PH, DTA, DTS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI
# Prepare data for TimePoints
single_time <- plot_data %>%
mutate(
Genotype = as.character(Genotype),
Treatment = as.character(Treatment),
Rep = as.character(Rep),
Plot_Row = as.numeric(Plot_Row),
Plot_Column = as.numeric(Plot_Column)
) %>%
mutate(time = as.POSIXct("2022-05-22 10:00:00"))
# Create TimePoints object for +P treatment
tp_data_plus_p <- createTimePoints(
dat = single_time %>% filter(Treatment == "+P"),
experimentName = "PSU2022_PlusP",
genotype = "Genotype",
timePoint = "time",
plotId = "rowid",
repId = "Rep",
rowNum = "Plot_Row",
colNum = "Plot_Column",
checkGenotypes = c("NUE", "B73"),
addCheck = TRUE
)
summary(tp_data_plus_p)
# Initialize lists for +P treatment
corrected_list_plus_p <- list()
fit_info_plus_p <- list()
all_models_plus_p <- list()
for (phen in usable_phenotypes) {
message("Fitting SpATS model for +P treatment, phenotype: ", phen)
# Check data availability
no_nas_plus_p <- single_time %>%
filter(Treatment == "+P", !is.na(.data[[phen]]))
if (nrow(no_nas_plus_p) < 10) {
message("Insufficient data for ", phen, " in +P treatment (n=", nrow(no_nas_plus_p), ")")
next
}
# Fit model
fit_try <- try(
fitModels(
TP = tp_data_plus_p,
trait = phen,
timePoints = 1,
what = "fixed"
),
silent = TRUE
)
if (inherits(fit_try, "try-error")) {
message("Fit failed for ", phen, " in +P treatment: ", fit_try)
next
}
all_models_plus_p[[phen]] <- fit_try
# Extract corrected values
corr <- try(getCorrected(fit_try), silent = TRUE)
if (inherits(corr, "try-error") || is.null(corr)) {
message("getCorrected() failed for ", phen, " in +P treatment")
next
}
corr <- corr %>% select(-phen)
corrected_list_plus_p[[phen]] <- corr
fit_info_plus_p[[phen]] <- list(n_obs = nrow(no_nas_plus_p), status = "success")
}
# Combine all corrected data for +P treatment
sp_corrected_plus_p <- lapply(
corrected_list_plus_p,
function(corr_df) {
corr_df %>%
select(timeNumber, timePoint, genotype, rowId, colId, plotId)
}
) %>%
bind_rows() %>%
arrange(plotId) %>%
distinct()
# Join each corrected phenotype for +P
for (phen in names(corrected_list_plus_p)) {
phen_corr <- paste0(phen, "_corr")
corr_df <- corrected_list_plus_p[[phen]] %>%
select(plotId, all_of(phen_corr))
sp_corrected_plus_p <- left_join(
sp_corrected_plus_p,
corr_df,
by = "plotId"
)
}
# Clean column names (remove _corr suffix)
colnames(sp_corrected_plus_p) <- gsub(
"_corr", "",
colnames(sp_corrected_plus_p)
)
# Add treatment information
sp_corrected_plus_p <- sp_corrected_plus_p %>%
inner_join(
tp_data_plus_p[[1]] %>% select(plotId, Treatment),
by = "plotId"
) %>%
select(timeNumber:genotype, Treatment, everything())
cat("Final sp_corrected_plus_p dimensions:", dim(sp_corrected_plus_p), "\n")
# Create TimePoints object for -P treatment
tp_data_minus_p <- createTimePoints(
dat = single_time %>% filter(Treatment == "-P"),
experimentName = "PSU2022_MinusP",
genotype = "Genotype",
timePoint = "time",
plotId = "rowid",
repId = "Rep",
rowNum = "Plot_Row",
colNum = "Plot_Column",
checkGenotypes = c("NUE", "B73"),
addCheck = TRUE
)
summary(tp_data_minus_p)
# Initialize lists for -P treatment
corrected_list_minus_p <- list()
fit_info_minus_p <- list()
all_models_minus_p <- list()
for (phen in usable_phenotypes) {
message("Fitting SpATS model for -P treatment, phenotype: ", phen)
# Check if we have enough data for this phenotype in -P
no_nas_minus_p <- single_time %>%
filter(Treatment == "-P", !is.na(.data[[phen]]))
if (nrow(no_nas_minus_p) < 10) {
message("Insufficient data for ", phen, " in -P treatment (n=", nrow(no_nas_minus_p), ")")
next
}
# Fit model
fit_try <- try(
fitModels(
TP = tp_data_minus_p,
trait = phen,
timePoints = 1,
what = "fixed"
),
silent = TRUE
)
if (inherits(fit_try, "try-error")) {
message("Fit failed for ", phen, " in -P treatment: ", fit_try)
next
}
all_models_minus_p[[phen]] <- fit_try
# Extract corrected values
corr <- try(getCorrected(fit_try), silent = TRUE)
if (inherits(corr, "try-error") || is.null(corr)) {
message("getCorrected() failed for ", phen, " in -P treatment")
next
}
corr <- corr %>% select(-phen)
corrected_list_minus_p[[phen]] <- corr
fit_info_minus_p[[phen]] <- list(n_obs = nrow(no_nas_minus_p), status = "success")
}
# Combine all corrected data for -P treatment
sp_corrected_minus_p <- lapply(
corrected_list_minus_p,
function(corr_df) {
corr_df %>%
select(timeNumber, timePoint, genotype, rowId, colId, plotId)
}
) %>%
bind_rows() %>%
arrange(plotId) %>%
distinct()
# Join each corrected phenotype for -P
for (phen in names(corrected_list_minus_p)) {
phen_corr <- paste0(phen, "_corr")
corr_df <- corrected_list_minus_p[[phen]] %>%
select(plotId, all_of(phen_corr))
sp_corrected_minus_p <- left_join(
sp_corrected_minus_p,
corr_df,
by = "plotId"
)
}
# Clean column names (remove _corr suffix)
colnames(sp_corrected_minus_p) <- gsub(
"_corr", "",
colnames(sp_corrected_minus_p)
)
# Add treatment information
sp_corrected_minus_p <- sp_corrected_minus_p %>%
inner_join(
tp_data_minus_p[[1]] %>% select(plotId, Treatment),
by = "plotId"
) %>%
select(timeNumber:genotype, Treatment, everything())
cat("Final sp_corrected_minus_p dimensions:", dim(sp_corrected_minus_p), "\n")
# Combine +P and -P corrected data
sp_corrected_combined <- bind_rows(
sp_corrected_plus_p, # +P treatment
sp_corrected_minus_p # -P treatment
) %>%
arrange(plotId)
cat("Combined sp_corrected dimensions:", dim(sp_corrected_combined), "\n")
## Combined sp_corrected dimensions: 127 23
cat("Treatment distribution:\n")
## Treatment distribution:
table(sp_corrected_combined$Treatment)
##
## -P +P
## 63 64
# Verify we have data for both treatments
treatment_summary <- sp_corrected_combined %>%
group_by(Treatment) %>%
summarise(
n_plots = n(),
n_genotypes = n_distinct(genotype),
.groups = "drop"
)
print(treatment_summary)
## # A tibble: 2 × 3
## Treatment n_plots n_genotypes
## <chr> <int> <int>
## 1 +P 64 4
## 2 -P 63 4
# Check for overlapping phenotypes between treatments
plus_p_phenos <- names(sp_corrected_plus_p)[!names(sp_corrected_plus_p) %in%
c("timeNumber", "timePoint", "genotype",
"rowId", "colId", "plotId", "Treatment")]
minus_p_phenos <- names(sp_corrected_minus_p)[!names(sp_corrected_minus_p) %in%
c("timeNumber", "timePoint", "genotype",
"rowId", "colId", "plotId", "Treatment")]
common_phenos <- intersect(plus_p_phenos, minus_p_phenos)
cat("Phenotypes corrected in both treatments:", paste(common_phenos, collapse = ", "), "\n")
## Phenotypes corrected in both treatments: PH, DTA, DTS, STW40, STWHV, STW50, STW60, EW, EL, ED, KRN, CD, TKW, KW50, TKN, HI
# Example spatial diagnostic plots for key phenotypes
key_phenotypes <- intersect(c("PH", "HI", "DTS", "TKW"), common_phenos)
for (phen in key_phenotypes[1:2]) { # Limit to first 2 to avoid too many plots
if (phen %in% names(all_models_plus_p)) {
cat("\n### +P Treatment:", phen, "\n")
plot(all_models_plus_p[[phen]], timePoints = 1)
plot(all_models_plus_p[[phen]], timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
if (phen %in% names(all_models_minus_p)) {
cat("\n### -P Treatment:", phen, "\n")
plot(all_models_minus_p[[phen]], timePoints = 1)
plot(all_models_minus_p[[phen]], timePoints = 1, plotType = "spatial", spaTrend = "raw")
}
}
##
## ### +P Treatment: PH
##
## ### -P Treatment: PH
##
## ### +P Treatment: HI
##
## ### -P Treatment: HI
# Filter to NILs for treatment × genotype analysis
nil_corrected_combined <- sp_corrected_combined %>%
filter(genotype %in% c("Inv4m", "CTRL")) %>%
mutate(
# Ensure proper factor levels with +P and CTRL as references
Treatment = factor(Treatment, levels = c("+P", "-P")),
genotype = factor(genotype, levels = c("CTRL", "Inv4m"))
) %>%
droplevels()
cat("NIL data dimensions:", dim(nil_corrected_combined), "\n")
## NIL data dimensions: 64 23
cat("Treatment × Genotype combinations:\n")
## Treatment × Genotype combinations:
table(nil_corrected_combined$Treatment, nil_corrected_combined$genotype)
##
## CTRL Inv4m
## +P 16 16
## -P 16 16
# Extract model coefficients for all common phenotypes
model_coefficients <- list()
cat("\n=== Treatment and Genotype × Treatment Models ===\n")
##
## === Treatment and Genotype × Treatment Models ===
for (phen in common_phenos) {
if (sum(!is.na(nil_corrected_combined[[phen]])) < 20) {
message("Skipping ", phen, " - insufficient data")
next
}
model_formula <- as.formula(paste(phen, "~ genotype * Treatment"))
lm_result <- try(
lm(model_formula, data = nil_corrected_combined),
silent = TRUE
)
if (!inherits(lm_result, "try-error")) {
# Extract coefficients
coef_summary <- summary(lm_result)$coefficients
coef_df <- data.frame(
trait = phen,
term = rownames(coef_summary),
estimate = coef_summary[, "Estimate"],
std_error = coef_summary[, "Std. Error"],
t_value = coef_summary[, "t value"],
p_value = coef_summary[, "Pr(>|t|)"],
stringsAsFactors = FALSE
)
model_coefficients[[phen]] <- coef_df
cat("\n=== ", phen, " ===\n")
print(summary(lm_result))
}
}
##
## === PH ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8349 -2.7792 0.4735 2.8230 9.6399
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 161.658 1.047 154.406 < 2e-16 ***
## genotypeInv4m 5.570 1.481 3.762 0.000385 ***
## Treatment-P 2.588 1.481 1.748 0.085629 .
## genotypeInv4m:Treatment-P 1.677 2.094 0.801 0.426346
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.188 on 60 degrees of freedom
## Multiple R-squared: 0.4486, Adjusted R-squared: 0.421
## F-statistic: 16.27 on 3 and 60 DF, p-value: 7.49e-08
##
##
## === DTA ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.35736 -0.70516 -0.01931 0.60844 2.88214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.1200 0.2599 277.458 < 2e-16 ***
## genotypeInv4m -1.4778 0.3676 -4.020 0.000165 ***
## Treatment-P 3.4329 0.3676 9.339 2.67e-13 ***
## genotypeInv4m:Treatment-P 0.3374 0.5199 0.649 0.518774
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.04 on 60 degrees of freedom
## Multiple R-squared: 0.784, Adjusted R-squared: 0.7732
## F-statistic: 72.59 on 3 and 60 DF, p-value: < 2.2e-16
##
##
## === DTS ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.30539 -0.58763 0.05908 0.52957 2.51458
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.6544 0.2277 323.494 < 2e-16 ***
## genotypeInv4m -1.2842 0.3220 -3.988 0.000183 ***
## Treatment-P 3.0621 0.3220 9.510 1.38e-13 ***
## genotypeInv4m:Treatment-P 0.7087 0.4554 1.556 0.124886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9107 on 60 degrees of freedom
## Multiple R-squared: 0.8028, Adjusted R-squared: 0.7929
## F-statistic: 81.42 on 3 and 60 DF, p-value: < 2.2e-16
##
##
## === STW40 ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.338 -1.303 -0.032 1.450 6.476
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.0737 0.7239 16.679 < 2e-16 ***
## genotypeInv4m -2.9345 1.0076 -2.912 0.00506 **
## Treatment-P -6.5170 1.0076 -6.468 2.14e-08 ***
## genotypeInv4m:Treatment-P 2.8619 1.4134 2.025 0.04741 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.804 on 59 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5015, Adjusted R-squared: 0.4761
## F-statistic: 19.78 on 3 and 59 DF, p-value: 5.389e-09
##
##
## === STWHV ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.370 -7.546 1.087 5.429 43.449
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 106.989 3.433 31.163 < 2e-16 ***
## genotypeInv4m -11.655 4.701 -2.479 0.0161 *
## Treatment-P -22.407 4.701 -4.766 1.3e-05 ***
## genotypeInv4m:Treatment-P 7.421 6.537 1.135 0.2609
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.85 on 58 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.3988, Adjusted R-squared: 0.3677
## F-statistic: 12.82 on 3 and 58 DF, p-value: 1.557e-06
##
##
## === STW50 ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.2540 -2.2394 0.1365 2.5199 11.9398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.984 1.159 31.060 <2e-16 ***
## genotypeInv4m -4.113 1.638 -2.510 0.0152 *
## Treatment-P -18.759 1.583 -11.851 <2e-16 ***
## genotypeInv4m:Treatment-P 4.620 2.222 2.079 0.0425 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.177 on 53 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.8097, Adjusted R-squared: 0.7989
## F-statistic: 75.15 on 3 and 53 DF, p-value: < 2.2e-16
##
##
## === STW60 ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.086 -7.884 -0.080 4.937 35.809
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 77.003 3.517 21.896 < 2e-16 ***
## genotypeInv4m -2.906 5.076 -0.572 0.570
## Treatment-P -33.343 4.973 -6.704 1.74e-08 ***
## genotypeInv4m:Treatment-P 10.807 6.941 1.557 0.126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.68 on 50 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.5715, Adjusted R-squared: 0.5458
## F-statistic: 22.23 on 3 and 50 DF, p-value: 2.768e-09
##
##
## === EW ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.605 -10.703 1.946 12.888 21.022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 86.460 4.788 18.056 <2e-16 ***
## genotypeInv4m 12.388 6.399 1.936 0.0587 .
## Treatment-P -2.487 6.304 -0.395 0.6949
## genotypeInv4m:Treatment-P -7.283 8.784 -0.829 0.4111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.88 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1261, Adjusted R-squared: 0.07264
## F-statistic: 2.358 on 3 and 49 DF, p-value: 0.08307
##
##
## === EL ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6282 -0.2277 0.0289 0.4594 1.3351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.7579 0.2905 50.804 <2e-16 ***
## genotypeInv4m 0.8936 0.3882 2.302 0.0256 *
## Treatment-P 0.8272 0.3824 2.163 0.0354 *
## genotypeInv4m:Treatment-P -1.0603 0.5329 -1.990 0.0522 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9634 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1154, Adjusted R-squared: 0.06126
## F-statistic: 2.131 on 3 and 49 DF, p-value: 0.1083
##
##
## === ED ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3132 -0.6363 0.1322 1.1502 4.9629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.04209 0.84882 45.996 <2e-16 ***
## genotypeInv4m -0.13902 1.13429 -0.123 0.903
## Treatment-P 0.07826 1.11753 0.070 0.944
## genotypeInv4m:Treatment-P -2.13106 1.55712 -1.369 0.177
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.815 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1073, Adjusted R-squared: 0.05259
## F-statistic: 1.962 on 3 and 49 DF, p-value: 0.1319
##
##
## === KRN ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3323 -0.3699 0.0803 0.6031 1.7838
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.73541 0.28661 58.390 <2e-16 ***
## genotypeInv4m -0.23275 0.38300 -0.608 0.546
## Treatment-P 0.09206 0.37734 0.244 0.808
## genotypeInv4m:Treatment-P -0.56062 0.52578 -1.066 0.292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9506 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1014, Adjusted R-squared: 0.04643
## F-statistic: 1.844 on 3 and 49 DF, p-value: 0.1515
##
##
## === CD ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6747 -0.7966 0.0991 0.8650 2.9659
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.1623 0.5325 49.135 <2e-16 ***
## genotypeInv4m 0.1583 0.7115 0.222 0.825
## Treatment-P -0.1906 0.7010 -0.272 0.787
## genotypeInv4m:Treatment-P -2.6177 0.9768 -2.680 0.010 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.766 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3109, Adjusted R-squared: 0.2687
## F-statistic: 7.37 on 3 and 49 DF, p-value: 0.0003591
##
##
## === TKW ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.299 -8.307 2.268 9.737 19.859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.6977 4.0227 16.580 <2e-16 ***
## genotypeInv4m 11.8710 5.3755 2.208 0.0319 *
## Treatment-P -0.3746 5.2961 -0.071 0.9439
## genotypeInv4m:Treatment-P -8.0556 7.3794 -1.092 0.2803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.34 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1326, Adjusted R-squared: 0.07952
## F-statistic: 2.497 on 3 and 49 DF, p-value: 0.07059
##
##
## === KW50 ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2755 -0.9140 -0.2116 0.7555 4.7007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.4346 0.4073 25.617 < 2e-16 ***
## genotypeInv4m 0.2029 0.5443 0.373 0.71094
## Treatment-P -1.8014 0.5363 -3.359 0.00152 **
## genotypeInv4m:Treatment-P -0.1211 0.7472 -0.162 0.87193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.351 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.3437, Adjusted R-squared: 0.3035
## F-statistic: 8.553 on 3 and 49 DF, p-value: 0.0001139
##
##
## === TKN ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -255.83 -52.03 11.53 64.12 150.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 338.75 26.40 12.833 <2e-16 ***
## genotypeInv4m 39.63 35.27 1.123 0.267
## Treatment-P 69.22 34.75 1.992 0.052 .
## genotypeInv4m:Treatment-P -21.07 48.42 -0.435 0.665
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 87.55 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.1236, Adjusted R-squared: 0.06995
## F-statistic: 2.304 on 3 and 49 DF, p-value: 0.0885
##
##
## === HI ===
##
## Call:
## lm(formula = model_formula, data = nil_corrected_combined)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45944 -0.11480 0.02008 0.12552 0.37169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.66198 0.04909 13.485 <2e-16 ***
## genotypeInv4m 0.17185 0.06560 2.620 0.0117 *
## Treatment-P 0.12871 0.06463 1.991 0.0520 .
## genotypeInv4m:Treatment-P -0.09458 0.09006 -1.050 0.2988
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1628 on 49 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 0.18, Adjusted R-squared: 0.1298
## F-statistic: 3.586 on 3 and 49 DF, p-value: 0.02013
# Combine all coefficients into single data frame
if (length(model_coefficients) > 0) {
all_coefficients <- bind_rows(model_coefficients) %>%
mutate(
term = case_when(
term == "(Intercept)" ~ "Intercept (CTRL, +P)",
term == "genotypeInv4m" ~ "Genotype Effect (Inv4m vs CTRL)",
term == "Treatment-P" ~ "Treatment Effect (-P vs +P)",
term == "genotypeInv4m:Treatment-P" ~ "Inv4m:-P",
TRUE ~ term
)
)
# Apply FDR correction within each term type
all_coefficients <- all_coefficients %>%
group_by(term) %>%
mutate(
p_adj_fdr = p.adjust(p_value, method = "fdr")
) %>%
ungroup()
cat("\n=== All Model Coefficients ===\n")
print(all_coefficients %>% filter(grepl("Interaction",term)) %>% arrange(p_value))
}
##
## === All Model Coefficients ===
## # A tibble: 0 × 7
## # ℹ 7 variables: trait <chr>, term <chr>, estimate <dbl>, std_error <dbl>,
## # t_value <dbl>, p_value <dbl>, p_adj_fdr <dbl>
if (exists("all_coefficients") && nrow(all_coefficients) > 0) {
# Treatment effects table (main effect of -P vs +P)
treatment_effects <- all_coefficients %>%
filter(term == "Treatment Effect (-P vs +P)") %>%
arrange(p_value) %>%
mutate(
significance = case_when(
p_adj_fdr < 0.001 ~ "***",
p_adj_fdr < 0.01 ~ "**",
p_adj_fdr < 0.05 ~ "*",
p_adj_fdr < 0.1 ~ ".",
TRUE ~ ""
)
)
cat("\n=== Treatment Effects: -P vs +P (Reference) ===\n")
cat("Negative estimates indicate reduction under -P treatment\n")
cat("Positive estimates indicate increase under -P treatment\n\n")
print(treatment_effects %>%
select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
mutate(across(where(is.numeric), ~ round(.x, 4))))
# Genotype × Treatment interaction effects table
interaction_effects <- all_coefficients %>%
filter(term == "Inv4m:-P") %>%
arrange(p_value) %>%
mutate(
significance = case_when(
p_adj_fdr < 0.001 ~ "***",
p_adj_fdr < 0.01 ~ "**",
p_adj_fdr < 0.05 ~ "*",
p_adj_fdr < 0.1 ~ ".",
TRUE ~ ""
)
)
cat("\n=== Inv4m:-P Effects ===\n")
cat("Tests whether Inv4m responds differently to -P vs +P compared to CTRL\n")
cat("Negative estimates: Inv4m shows greater reduction under -P than CTRL\n")
cat("Positive estimates: Inv4m shows less reduction (or greater increase) under -P than CTRL\n\n")
print(interaction_effects %>%
select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
mutate(across(where(is.numeric), ~ round(.x, 4))))
# Genotype main effects table
genotype_effects <- all_coefficients %>%
filter(term == "Genotype Effect (Inv4m vs CTRL)") %>%
arrange(p_value) %>%
mutate(
significance = case_when(
p_adj_fdr < 0.001 ~ "***",
p_adj_fdr < 0.01 ~ "**",
p_adj_fdr < 0.05 ~ "*",
p_adj_fdr < 0.1 ~ ".",
TRUE ~ ""
)
)
cat("\n=== Genotype Effects: Inv4m vs CTRL (at +P reference level) ===\n")
print(genotype_effects %>%
select(trait, estimate, std_error, t_value, p_value, p_adj_fdr, significance) %>%
mutate(across(where(is.numeric), ~ round(.x, 4))))
# Summary counts
cat("\n=== Summary of Significant Effects (FDR < 0.05) ===\n")
cat("Treatment effects:", sum(treatment_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(treatment_effects), "\n")
cat("Interaction effects:", sum(interaction_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(interaction_effects), "\n")
cat("Genotype effects:", sum(genotype_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(genotype_effects), "\n")
}
##
## === Treatment Effects: -P vs +P (Reference) ===
## Negative estimates indicate reduction under -P treatment
## Positive estimates indicate increase under -P treatment
##
## # A tibble: 16 × 7
## trait estimate std_error t_value p_value p_adj_fdr significance
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 STW50 -18.8 1.58 -11.9 0 0 "***"
## 2 DTS 3.06 0.322 9.51 0 0 "***"
## 3 DTA 3.43 0.368 9.34 0 0 "***"
## 4 STW60 -33.3 4.97 -6.70 0 0 "***"
## 5 STW40 -6.52 1.01 -6.47 0 0 "***"
## 6 STWHV -22.4 4.70 -4.77 0 0 "***"
## 7 KW50 -1.80 0.536 -3.36 0.0015 0.0035 "**"
## 8 EL 0.827 0.382 2.16 0.0354 0.0709 "."
## 9 TKN 69.2 34.8 1.99 0.052 0.0832 "."
## 10 HI 0.129 0.0646 1.99 0.052 0.0832 "."
## 11 PH 2.59 1.48 1.75 0.0856 0.125 ""
## 12 EW -2.49 6.30 -0.394 0.695 0.924 ""
## 13 CD -0.191 0.701 -0.272 0.787 0.924 ""
## 14 KRN 0.0921 0.377 0.244 0.808 0.924 ""
## 15 TKW -0.375 5.30 -0.0707 0.944 0.944 ""
## 16 ED 0.0783 1.12 0.07 0.944 0.944 ""
##
## === Inv4m:-P Effects ===
## Tests whether Inv4m responds differently to -P vs +P compared to CTRL
## Negative estimates: Inv4m shows greater reduction under -P than CTRL
## Positive estimates: Inv4m shows less reduction (or greater increase) under -P than CTRL
##
## # A tibble: 16 × 7
## trait estimate std_error t_value p_value p_adj_fdr significance
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 CD -2.62 0.977 -2.68 0.01 0.16 ""
## 2 STW50 4.62 2.22 2.08 0.0425 0.209 ""
## 3 STW40 2.86 1.41 2.02 0.0474 0.209 ""
## 4 EL -1.06 0.533 -1.99 0.0522 0.209 ""
## 5 DTS 0.709 0.455 1.56 0.125 0.336 ""
## 6 STW60 10.8 6.94 1.56 0.126 0.336 ""
## 7 ED -2.13 1.56 -1.37 0.177 0.405 ""
## 8 STWHV 7.42 6.54 1.14 0.261 0.435 ""
## 9 TKW -8.06 7.38 -1.09 0.280 0.435 ""
## 10 KRN -0.561 0.526 -1.07 0.292 0.435 ""
## 11 HI -0.0946 0.0901 -1.05 0.299 0.435 ""
## 12 EW -7.28 8.78 -0.829 0.411 0.525 ""
## 13 PH 1.68 2.09 0.801 0.426 0.525 ""
## 14 DTA 0.337 0.520 0.649 0.519 0.593 ""
## 15 TKN -21.1 48.4 -0.435 0.665 0.710 ""
## 16 KW50 -0.121 0.747 -0.162 0.872 0.872 ""
##
## === Genotype Effects: Inv4m vs CTRL (at +P reference level) ===
## # A tibble: 16 × 7
## trait estimate std_error t_value p_value p_adj_fdr significance
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 DTA -1.48 0.368 -4.02 0.0002 0.0015 "**"
## 2 DTS -1.28 0.322 -3.99 0.0002 0.0015 "**"
## 3 PH 5.57 1.48 3.76 0.0004 0.0021 "**"
## 4 STW40 -2.93 1.01 -2.91 0.0051 0.0202 "*"
## 5 HI 0.172 0.0656 2.62 0.0117 0.0368 "*"
## 6 STW50 -4.11 1.64 -2.51 0.0152 0.0368 "*"
## 7 STWHV -11.7 4.70 -2.48 0.0161 0.0368 "*"
## 8 EL 0.894 0.388 2.30 0.0256 0.0512 "."
## 9 TKW 11.9 5.38 2.21 0.0319 0.0568 "."
## 10 EW 12.4 6.40 1.94 0.0587 0.0938 "."
## 11 TKN 39.6 35.3 1.12 0.267 0.388 ""
## 12 KRN -0.233 0.383 -0.608 0.546 0.701 ""
## 13 STW60 -2.91 5.08 -0.572 0.570 0.701 ""
## 14 KW50 0.203 0.544 0.373 0.711 0.812 ""
## 15 CD 0.158 0.712 0.222 0.825 0.880 ""
## 16 ED -0.139 1.13 -0.123 0.903 0.903 ""
##
## === Summary of Significant Effects (FDR < 0.05) ===
## Treatment effects: 7 of 16
## Interaction effects: 0 of 16
## Genotype effects: 7 of 16
# Define plotting theme
pheno_theme2 <- ggpubr::theme_classic2(base_size = 16) +
ggplot2::theme(
plot.title = ggtext::element_markdown(hjust = 0.5, face = "bold"),
axis.title.y = ggtext::element_markdown(),
axis.title.x = element_text(face = "bold"),
axis.text.x = element_text(size = 14, face = "bold", color = "black"),
strip.background = element_blank(),
strip.text = ggtext::element_markdown(size = 14),
legend.position = "bottom",
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 12)
)
if (length(common_phenos) > 0 && exists("all_coefficients")) {
# Prepare data for plotting
plot_data_combined <- nil_corrected_combined %>%
pivot_longer(
cols = all_of(common_phenos),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
mutate(trait = as.character(trait))
# Treatment × Genotype interaction plot (emphasizing treatment effect first)
interaction_plot <- ggplot(
plot_data_combined,
aes(x = Treatment, y = value, color = genotype, group = genotype)
) +
stat_summary(
fun = mean,
geom = "point",
size = 4
) +
stat_summary(
fun = mean,
geom = "line",
linewidth = 1.5
) +
stat_summary(
fun.data = mean_se,
geom = "errorbar",
width = 0.1,
linewidth = 1
) +
scale_color_manual(values = c("CTRL" = "gold", "Inv4m" = "purple4")) +
scale_x_discrete(labels = c("+P" = "+P\n(Reference)", "-P" = "-P\n(Low P)")) +
labs(
title = "Treatment Effects and Inv4m:-Ps",
subtitle = "+P is reference treatment; Lines show genotype-specific responses",
y = "Spatially Corrected Value",
x = "Phosphorus Treatment",
color = "Genotype"
) +
facet_wrap(~ trait, scales = "free_y", ncol = 4) +
pheno_theme2
print(interaction_plot)
# Focus on significant treatment effects
if (exists("treatment_effects")) {
sig_treatment_traits <- treatment_effects %>%
filter(p_adj_fdr < 0.05) %>%
pull(trait)
if (length(sig_treatment_traits) > 0) {
treatment_plot_data <- plot_data_combined %>%
filter(trait %in% sig_treatment_traits)
treatment_plot <- ggplot(
treatment_plot_data,
aes(x = Treatment, y = value, fill = Treatment)
) +
geom_boxplot(width = 0.6, alpha = 0.7, linewidth = 0.8) +
geom_point(
position = position_jitter(width = 0.2),
size = 2,
alpha = 0.6
) +
scale_fill_manual(
values = c("+P" = "lightblue", "-P" = "lightcoral"),
labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
) +
labs(
title = "Significant Treatment Effects (FDR < 0.05)",
subtitle = "Traits showing significant response to phosphorus limitation",
y = "Spatially Corrected Value",
x = "Phosphorus Treatment",
fill = "Treatment"
) +
facet_wrap(~ trait, scales = "free_y", ncol = 3) +
pheno_theme2
print(treatment_plot)
}
}
# Focus on significant interaction effects
if (exists("interaction_effects")) {
sig_interaction_traits <- interaction_effects %>%
filter(p_adj_fdr < 0.05) %>%
pull(trait)
if (length(sig_interaction_traits) > 0) {
interaction_plot_data <- plot_data_combined %>%
filter(trait %in% sig_interaction_traits)
interaction_detailed_plot <- ggplot(
interaction_plot_data,
aes(x = genotype, y = value, fill = Treatment)
) +
geom_boxplot(
width = 0.6,
alpha = 0.7,
position = position_dodge(width = 0.8),
linewidth = 0.8
) +
geom_point(
aes(color = Treatment),
position = position_jitterdodge(
dodge.width = 0.8,
jitter.width = 0.2
),
size = 1.5,
alpha = 0.6
) +
scale_fill_manual(
values = c("+P" = "lightblue", "-P" = "lightcoral"),
labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
) +
scale_color_manual(
values = c("+P" = "blue", "-P" = "red"),
labels = c("+P" = "+P (Reference)", "-P" = "-P (Low P)")
) +
labs(
title = "Significant Inv4m:-Ps (FDR < 0.05)",
subtitle = "Genotypes respond differently to phosphorus limitation",
y = "Spatially Corrected Value",
x = "Genotype",
fill = "P Treatment",
color = "P Treatment"
) +
facet_wrap(~ trait, scales = "free_y", ncol = 3) +
pheno_theme2 +
guides(color = "none") # Remove duplicate legend
print(interaction_detailed_plot)
}
}
}
# Calculate effect sizes and summary statistics
effect_summary <- nil_corrected_combined %>%
pivot_longer(
cols = all_of(common_phenos),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
group_by(trait, Treatment, genotype) %>%
summarise(
n = n(),
mean = mean(value),
sd = sd(value),
se = sd / sqrt(n),
.groups = "drop"
) %>%
pivot_wider(
names_from = genotype,
values_from = c(n, mean, sd, se),
names_sep = "_"
) %>%
mutate(
diff_mean = mean_Inv4m - mean_CTRL,
percent_change = (diff_mean / mean_CTRL) * 100,
pooled_sd = sqrt(((n_Inv4m - 1) * sd_Inv4m^2 + (n_CTRL - 1) * sd_CTRL^2) /
(n_Inv4m + n_CTRL - 2)),
cohens_d = diff_mean / pooled_sd
) %>%
arrange(Treatment, desc(abs(cohens_d)))
cat("\n=== Effect Size Summary ===\n")
##
## === Effect Size Summary ===
print(effect_summary %>%
select(trait, Treatment, diff_mean, percent_change, cohens_d) %>%
mutate(across(where(is.numeric), ~ round(.x, 3))))
## # A tibble: 32 × 5
## trait Treatment diff_mean percent_change cohens_d
## <chr> <fct> <dbl> <dbl> <dbl>
## 1 DTS +P -1.28 -1.74 -1.56
## 2 DTA +P -1.48 -2.05 -1.43
## 3 PH +P 5.57 3.44 1.29
## 4 EL +P 0.894 6.06 0.917
## 5 STW40 +P -2.94 -24.3 -0.89
## 6 HI +P 0.172 26.0 0.826
## 7 STW50 +P -4.11 -11.4 -0.739
## 8 STWHV +P -11.7 -10.9 -0.736
## 9 TKW +P 11.9 17.8 0.683
## 10 EW +P 12.4 14.3 0.632
## # ℹ 22 more rows
# Export the combined spatially corrected dataset
combined_output_file <- "PSU2022_spatially_corrected_both_treatments.csv"
write.csv(sp_corrected_combined, combined_output_file, row.names = FALSE)
# Export all model coefficients
if (exists("all_coefficients")) {
coefficients_output_file <- "PSU2022_model_coefficients_spatially_corrected.csv"
write.csv(all_coefficients, coefficients_output_file, row.names = FALSE)
# Export treatment effects table
if (exists("treatment_effects")) {
treatment_output_file <- "PSU2022_treatment_effects_spatially_corrected.csv"
write.csv(treatment_effects, treatment_output_file, row.names = FALSE)
}
# Export interaction effects table
if (exists("interaction_effects")) {
interaction_output_file <- "PSU2022_interaction_effects_spatially_corrected.csv"
write.csv(interaction_effects, interaction_output_file, row.names = FALSE)
}
# Export genotype effects table
if (exists("genotype_effects")) {
genotype_output_file <- "PSU2022_genotype_effects_spatially_corrected.csv"
write.csv(genotype_effects, genotype_output_file, row.names = FALSE)
}
}
# Export summary statistics with treatment contrasts
effect_summary_treatment <- nil_corrected_combined %>%
pivot_longer(
cols = all_of(common_phenos),
names_to = "trait",
values_to = "value"
) %>%
filter(!is.na(value)) %>%
group_by(trait, Treatment, genotype) %>%
summarise(
n = n(),
mean = mean(value),
sd = sd(value),
se = sd / sqrt(n),
.groups = "drop"
) %>%
# Calculate treatment effects (relative to +P reference)
pivot_wider(
names_from = Treatment,
values_from = c(n, mean, sd, se),
names_sep = "_"
) %>%
mutate(
treatment_effect = `mean_-P` - `mean_+P`,
treatment_percent_change = (treatment_effect / `mean_+P`) * 100
) %>%
# Add genotype comparison within each treatment
group_by(trait) %>%
mutate(
genotype_effect_plus_p = case_when(
genotype == "Inv4m" ~ `mean_+P` - `mean_+P`[genotype == "CTRL"],
TRUE ~ NA_real_
),
genotype_effect_minus_p = case_when(
genotype == "Inv4m" ~ `mean_-P` - `mean_-P`[genotype == "CTRL"],
TRUE ~ NA_real_
)
) %>%
ungroup()
summary_output_file <- "PSU2022_effect_summary_with_treatment_reference.csv"
write.csv(effect_summary_treatment, summary_output_file, row.names = FALSE)
cat("=== Files Exported ===\n")
## === Files Exported ===
cat("Spatially corrected data:", combined_output_file, "\n")
## Spatially corrected data: PSU2022_spatially_corrected_both_treatments.csv
if (exists("all_coefficients")) {
cat("All model coefficients:", coefficients_output_file, "\n")
if (exists("treatment_effects")) cat("Treatment effects:", treatment_output_file, "\n")
if (exists("interaction_effects")) cat("Interaction effects:", interaction_output_file, "\n")
if (exists("genotype_effects")) cat("Genotype effects:", genotype_output_file, "\n")
}
## All model coefficients: PSU2022_model_coefficients_spatially_corrected.csv
## Treatment effects: PSU2022_treatment_effects_spatially_corrected.csv
## Interaction effects: PSU2022_interaction_effects_spatially_corrected.csv
## Genotype effects: PSU2022_genotype_effects_spatially_corrected.csv
cat("Effect summary (treatment reference):", summary_output_file, "\n")
## Effect summary (treatment reference): PSU2022_effect_summary_with_treatment_reference.csv
# Final analysis summary
cat("\n=== Final Analysis Summary ===\n")
##
## === Final Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_phenotypes), "\n")
## Total phenotypes evaluated: 16
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 16
cat("Successfully fitted models (+P):", length(all_models_plus_p), "\n")
## Successfully fitted models (+P): 16
cat("Successfully fitted models (-P):", length(all_models_minus_p), "\n")
## Successfully fitted models (-P): 16
cat("Common phenotypes between treatments:", length(common_phenos), "\n")
## Common phenotypes between treatments: 16
cat("Total plots with corrected data:", nrow(sp_corrected_combined), "\n")
## Total plots with corrected data: 127
cat("Significant traits (by treatment):", length(sig_treatment_traits), "\n")
## Significant traits (by treatment): 7
# Print model fit information for both treatments
if (length(fit_info_plus_p) > 0) {
cat("\n=== +P Model Fit Details ===\n")
for (trait in names(fit_info_plus_p)) {
info <- fit_info_plus_p[[trait]]
cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
}
}
##
## === +P Model Fit Details ===
## PH: n=64, status=success
## DTA: n=64, status=success
## DTS: n=64, status=success
## STW40: n=62, status=success
## STWHV: n=62, status=success
## STW50: n=51, status=success
## STW60: n=50, status=success
## EW: n=49, status=success
## EL: n=49, status=success
## ED: n=49, status=success
## KRN: n=49, status=success
## CD: n=49, status=success
## TKW: n=49, status=success
## KW50: n=49, status=success
## TKN: n=49, status=success
## HI: n=49, status=success
# Final analysis summary
cat("\n=== Final Analysis Summary ===\n")
##
## === Final Analysis Summary ===
cat("Total phenotypes evaluated:", length(all_phenotypes), "\n")
## Total phenotypes evaluated: 16
cat("Usable phenotypes (<50% missing):", length(usable_phenotypes), "\n")
## Usable phenotypes (<50% missing): 16
cat("Successfully fitted models (+P):", length(all_models_plus_p), "\n")
## Successfully fitted models (+P): 16
cat("Successfully fitted models (-P):", length(all_models_minus_p), "\n")
## Successfully fitted models (-P): 16
cat("Common phenotypes between treatments:", length(common_phenos), "\n")
## Common phenotypes between treatments: 16
cat("Total plots with corrected data:", nrow(sp_corrected_combined), "\n")
## Total plots with corrected data: 127
if (exists("treatment_effects") && exists("interaction_effects") && exists("genotype_effects")) {
cat("Significant treatment effects (FDR < 0.05):", sum(treatment_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(treatment_effects), "\n")
cat("Significant interaction effects (FDR < 0.05):", sum(interaction_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(interaction_effects), "\n")
cat("Significant genotype effects (FDR < 0.05):", sum(genotype_effects$p_adj_fdr < 0.05, na.rm = TRUE), "of", nrow(genotype_effects), "\n")
}
## Significant treatment effects (FDR < 0.05): 7 of 16
## Significant interaction effects (FDR < 0.05): 0 of 16
## Significant genotype effects (FDR < 0.05): 7 of 16
# Print model fit information for both treatments
if (length(fit_info_plus_p) > 0) {
cat("\n=== +P Model Fit Details ===\n")
for (trait in names(fit_info_plus_p)) {
info <- fit_info_plus_p[[trait]]
cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
}
}
##
## === +P Model Fit Details ===
## PH: n=64, status=success
## DTA: n=64, status=success
## DTS: n=64, status=success
## STW40: n=62, status=success
## STWHV: n=62, status=success
## STW50: n=51, status=success
## STW60: n=50, status=success
## EW: n=49, status=success
## EL: n=49, status=success
## ED: n=49, status=success
## KRN: n=49, status=success
## CD: n=49, status=success
## TKW: n=49, status=success
## KW50: n=49, status=success
## TKN: n=49, status=success
## HI: n=49, status=success
if (length(fit_info_minus_p) > 0) {
cat("\n=== -P Model Fit Details ===\n")
for (trait in names(fit_info_minus_p)) {
info <- fit_info_minus_p[[trait]]
cat(sprintf("%s: n=%d, status=%s\n", trait, info$n_obs, info$status))
}
}
##
## === -P Model Fit Details ===
## PH: n=63, status=success
## DTA: n=63, status=success
## DTS: n=63, status=success
## STW40: n=63, status=success
## STWHV: n=63, status=success
## STW50: n=61, status=success
## STW60: n=57, status=success
## EW: n=56, status=success
## EL: n=56, status=success
## ED: n=56, status=success
## KRN: n=56, status=success
## CD: n=56, status=success
## TKW: n=56, status=success
## KW50: n=56, status=success
## TKN: n=56, status=success
## HI: n=56, status=success
# Check for missing data patterns in final dataset
missing_final <- sp_corrected_combined %>%
select(all_of(common_phenos)) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "phenotype", values_to = "missing_count") %>%
mutate(
total_plots = nrow(sp_corrected_combined),
missing_pct = round(missing_count / total_plots * 100, 1)
) %>%
arrange(missing_pct)
cat("\n=== Missing Data in Final Dataset ===\n")
##
## === Missing Data in Final Dataset ===
print(missing_final)
## # A tibble: 16 × 4
## phenotype missing_count total_plots missing_pct
## <chr> <int> <int> <dbl>
## 1 PH 0 127 0
## 2 DTA 0 127 0
## 3 DTS 0 127 0
## 4 STW40 2 127 1.6
## 5 STWHV 2 127 1.6
## 6 STW50 15 127 11.8
## 7 STW60 20 127 15.7
## 8 EW 22 127 17.3
## 9 EL 22 127 17.3
## 10 ED 22 127 17.3
## 11 KRN 22 127 17.3
## 12 CD 22 127 17.3
## 13 TKW 22 127 17.3
## 14 KW50 22 127 17.3
## 15 TKN 22 127 17.3
## 16 HI 22 127 17.3
# Key findings summary
if (exists("treatment_effects") && exists("interaction_effects")) {
cat("\n=== Key Findings Summary ===\n")
# Most significant treatment effects
top_treatment <- treatment_effects %>%
filter(p_adj_fdr < 0.05) %>%
arrange(p_adj_fdr) %>%
head(3)
if (nrow(top_treatment) > 0) {
cat("Top treatment effects (-P vs +P):\n")
for (i in 1:nrow(top_treatment)) {
trait <- top_treatment$trait[i]
est <- round(top_treatment$estimate[i], 3)
pval <- format(top_treatment$p_adj_fdr[i], scientific = TRUE, digits = 2)
direction <- ifelse(est < 0, "decreased", "increased")
cat(sprintf(" %s: %s by %s units (FDR = %s)\n", trait, direction, abs(est), pval))
}
}
# Most significant interaction effects
top_interactions <- interaction_effects %>%
filter(p_adj_fdr < 0.05) %>%
arrange(p_adj_fdr) %>%
head(3)
if (nrow(top_interactions) > 0) {
cat("\nTop genotype × treatment interactions:\n")
for (i in 1:nrow(top_interactions)) {
trait <- top_interactions$trait[i]
est <- round(top_interactions$estimate[i], 3)
pval <- format(top_interactions$p_adj_fdr[i], scientific = TRUE, digits = 2)
direction <- ifelse(est < 0, "more sensitive", "less sensitive")
cat(sprintf(" %s: Inv4m is %s to -P than CTRL (interaction = %s, FDR = %s)\n",
trait, direction, est, pval))
}
}
}
##
## === Key Findings Summary ===
## Top treatment effects (-P vs +P):
## STW50: decreased by 18.759 units (FDR = 2.6e-15)
## DTS: increased by 3.062 units (FDR = 1.1e-12)
## DTA: increased by 3.433 units (FDR = 1.4e-12)