library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
These ANOVA models quantify the extent of a Genotype × Quantile interaction at each time point (measured in days after transplanting, DAT).
# Note - the distributional data files are too large to be stored on GitHub. Once you download them, save them to their own directory and replace the value of data_path_server with that path.
# Replace data_path with the folder location where you downloaded all other data related to the repository.
# You can optionally specify a path to save RData when the loop completes one round of processing ANOVAs for all DATs associated with a specific vegetation index.
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
data_path_server <- '//murray_lab/FLIGHTS2/Processed2/Texas/CS24/MULTI/CS24-STEL/Data_Products/Individual_VIs/'
RData_path <- 'C:/Users/aaron.desalvio/Downloads/Temp_RData/'
csvs <- list.files(path = data_path_server, pattern = '\\.csv$')
all_vis <- lapply(strsplit(as.character(csvs), '-'), '[', 6) %>% unlist()
# We will change the csvs vector to only contain one element to reduce the duration of producing this RMarkdown file. If you'd like to run the entire loop with all VIs, comment out the next line.
csvs <- csvs[1]
# We'll also only run a few DATs (time points) to speed up processing of this file. If you'd like to run the entire analysis, first comment out the next line, and then UNCOMMENT the line of code marked with # UNCOMMENT HERE below.
dat.loop <- c(107, 108, 109)
# Initialize empty lists to store results
G_list <- list()
VC_list <- list()
for (i in 1:length(csvs)) {
current_vi <- all_vis[i]
print(paste('Current VI:', current_vi))
# Read data from CSV files
print(paste('Loading VI data frame for', current_vi))
df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
print(paste0('VI data frame loaded for ', current_vi, '!'))
# Remove low-quality DATs
DATs_exclude <- c(46, 71:72, 99:100)
df <- df %>% filter(!DAT %in% DATs_exclude)
# Remove plant that died early in the season
df <- df %>% filter(!Row_Range == '413_15')
# Basic plot for inspection of data
sort(unique(df$DAT))
df.plot <- df %>% filter(Pedigree == 5016, DAT == 140)
plot(df.plot$probability, df.plot$quantile)
# DAT vector for loop
# dat.loop <- sort(unique(df$DAT)) # UNCOMMENT HERE
# Loop over each environment
for (dat in dat.loop) {
# Subset data for the current DAT
df.subset <- df[df$DAT == dat, ]
# Convert variables to appropriate data types
df.subset$Range <- as.factor(df.subset$Range)
df.subset$Row <- as.factor(df.subset$Row)
df.subset$Replicate <- as.factor(df.subset$Replicate)
df.subset$Pedigree <- as.factor(df.subset$Pedigree)
df.subset$probability <- as.factor(df.subset$probability)
# Progress report
print(paste('ANOVA Model:', 'DAT', dat, 'VI', current_vi))
# Fit the linear mixed-effects model after setting "Inf" and NaN as NA
response <- df.subset$quantile
response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
anova <- lmer(response ~
(1 | Pedigree) +
(1 | probability) +
(1 | Pedigree:probability) +
(1 | Range:probability) +
(1 | Row:probability) +
(1 | Replicate:probability),
data = df.subset)
# Calculate RMSE
rmse_value <- sqrt(mean(residuals(anova)^2))
# Calculate R-squared
R <- MuMIn::r.squaredGLMM(anova)[, "R2c"] # Conditional R-squared
# Extract variance components
VC <- as.data.frame(VarCorr(anova))
VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
# Calculate heritability
nRep <- length(unique(df.subset$Replicate))
nProb <- length(unique(df.subset$probability))
Vg <- VC[VC$grp == "Pedigree", "vcov"]
Vgp <- VC[VC$grp == "Pedigree:probability", "vcov"]
Ve <- VC[VC$grp == "Residual", "vcov"]
#heritability <- Vg / (Vg + (Vgp / nProb) + (Ve / (nRep * nProb)))
#heritability <- round(heritability, 3)
heritability <- Vg / (Vg + (Vgp) + (Ve / (nRep)))
heritability <- round(heritability, 3)
# Add additional information to VC data frame
VC$Rmse <- rmse_value
VC$Heritability <- heritability
VC$R_squared <- R
VC$Vegetation.Index <- current_vi
VC$DAT <- dat
print(VC)
# Store VC in the list
VC_list[[paste(dat, current_vi, sep = '_')]] <- VC
# Extract random effects for 'Pedigree'
G <- coef(anova)$Pedigree
G$Pedigree <- rownames(G)
# Store G in the list
G_list[[paste(dat, current_vi, sep = '_')]] <- G
}
# Save RData for current loop iteration
save.image(paste0(RData_path, 'ANOVA_Within_DAT_Across_Quantile_', current_vi, '.RData'))
}
## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"
## [1] "VI data frame loaded for BCC!"
## [1] "ANOVA Model: DAT 107 VI BCC"
## grp var1 var2 vcov sdcor Percent
## 1 Row:probability (Intercept) <NA> 2.627023e-05 0.0051254490 3.61
## 2 Replicate:probability (Intercept) <NA> 3.157604e-08 0.0001776965 0.00
## 3 Pedigree:probability (Intercept) <NA> 0.000000e+00 0.0000000000 0.00
## 4 Range:probability (Intercept) <NA> 1.980610e-06 0.0014073415 0.27
## 5 probability (Intercept) <NA> 6.350885e-04 0.0252009626 87.36
## 6 Pedigree (Intercept) <NA> 1.737181e-05 0.0041679508 2.39
## 7 Residual <NA> <NA> 4.623500e-05 0.0067996325 6.36
## Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.006399464 0.919 0.9364011 BCC 107
## 2 0.006399464 0.919 0.9364011 BCC 107
## 3 0.006399464 0.919 0.9364011 BCC 107
## 4 0.006399464 0.919 0.9364011 BCC 107
## 5 0.006399464 0.919 0.9364011 BCC 107
## 6 0.006399464 0.919 0.9364011 BCC 107
## 7 0.006399464 0.919 0.9364011 BCC 107
## [1] "ANOVA Model: DAT 108 VI BCC"
## grp var1 var2 vcov sdcor Percent
## 1 Row:probability (Intercept) <NA> 6.388885e-05 0.007993050 5.67
## 2 Replicate:probability (Intercept) <NA> 3.885413e-06 0.001971145 0.34
## 3 Pedigree:probability (Intercept) <NA> 1.792040e-06 0.001338671 0.16
## 4 Range:probability (Intercept) <NA> 2.949287e-06 0.001717349 0.26
## 5 probability (Intercept) <NA> 9.326327e-04 0.030539035 82.73
## 6 Pedigree (Intercept) <NA> 1.925773e-05 0.004388363 1.71
## 7 Residual <NA> <NA> 1.029599e-04 0.010146916 9.13
## Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.009417045 0.787 0.9086721 BCC 108
## 2 0.009417045 0.787 0.9086721 BCC 108
## 3 0.009417045 0.787 0.9086721 BCC 108
## 4 0.009417045 0.787 0.9086721 BCC 108
## 5 0.009417045 0.787 0.9086721 BCC 108
## 6 0.009417045 0.787 0.9086721 BCC 108
## 7 0.009417045 0.787 0.9086721 BCC 108
## [1] "ANOVA Model: DAT 109 VI BCC"
## grp var1 var2 vcov sdcor Percent
## 1 Row:probability (Intercept) <NA> 3.712641e-05 0.006093145 4.96
## 2 Replicate:probability (Intercept) <NA> 1.362821e-06 0.001167399 0.18
## 3 Pedigree:probability (Intercept) <NA> 0.000000e+00 0.000000000 0.00
## 4 Range:probability (Intercept) <NA> 2.276986e-06 0.001508969 0.30
## 5 probability (Intercept) <NA> 6.429619e-04 0.025356693 85.87
## 6 Pedigree (Intercept) <NA> 1.546753e-05 0.003932878 2.07
## 7 Residual <NA> <NA> 4.957899e-05 0.007041235 6.62
## Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.006565553 0.903 0.9337865 BCC 109
## 2 0.006565553 0.903 0.9337865 BCC 109
## 3 0.006565553 0.903 0.9337865 BCC 109
## 4 0.006565553 0.903 0.9337865 BCC 109
## 5 0.006565553 0.903 0.9337865 BCC 109
## 6 0.006565553 0.903 0.9337865 BCC 109
## 7 0.006565553 0.903 0.9337865 BCC 109
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)
# View the first few rows of the results
head(G_df)
## (Intercept) Pedigree
## 107_BCC.5001 0.3339126 5001
## 107_BCC.5002 0.3372073 5002
## 107_BCC.5003 0.3328357 5003
## 107_BCC.5004 0.3340158 5004
## 107_BCC.5005 0.3314073 5005
## 107_BCC.5006 0.3390244 5006
head(VC_df)
## grp var1 var2 vcov sdcor
## 107_BCC.1 Row:probability (Intercept) <NA> 2.627023e-05 0.0051254490
## 107_BCC.2 Replicate:probability (Intercept) <NA> 3.157604e-08 0.0001776965
## 107_BCC.3 Pedigree:probability (Intercept) <NA> 0.000000e+00 0.0000000000
## 107_BCC.4 Range:probability (Intercept) <NA> 1.980610e-06 0.0014073415
## 107_BCC.5 probability (Intercept) <NA> 6.350885e-04 0.0252009626
## 107_BCC.6 Pedigree (Intercept) <NA> 1.737181e-05 0.0041679508
## Percent Rmse Heritability R_squared Vegetation.Index DAT
## 107_BCC.1 3.61 0.006399464 0.919 0.9364011 BCC 107
## 107_BCC.2 0.00 0.006399464 0.919 0.9364011 BCC 107
## 107_BCC.3 0.00 0.006399464 0.919 0.9364011 BCC 107
## 107_BCC.4 0.27 0.006399464 0.919 0.9364011 BCC 107
## 107_BCC.5 87.36 0.006399464 0.919 0.9364011 BCC 107
## 107_BCC.6 2.39 0.006399464 0.919 0.9364011 BCC 107
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)
colnames(G_df)[1] <- 'BLUP'
G_df$DAT_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)
# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_DAT_Across_Quantile_Revised_Herit.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_DAT_Across_Quantile_Revised_Herit.csv'))