library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
These ANOVA models are one of the key analyses presented in this paper. They explore heritability and genotypic variance at every quantile level WITHIN flight dates - proposing the concept of within-day variance decomposition. The ANOVA model itself is identical to AM2 (and HM3 is identical to HM2), but the difference is that instead of using this model only at the median (or mean) vegetation index values for each flight date, we’re now investigating this model at all the remaining quantile levels from 0.01, 0.02, … , 1.00.
# Set working directory and source external functions
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
data_path_server <- '//murray_lab/FLIGHTS2/Processed2/Texas/CS24/MULTI/CS24-STEL/Data_Products/Individual_VIs/'
RData_path <- 'C:/Users/aaron.desalvio/Downloads/Temp/'
# Note - the distributional data files are too large to be stored on GitHub. Once you download them, save them to their own directory and replace the path above for data_path_server.
# The RData_path can be changed to a location where you'd like to store the RData files associated with each loop iteration. This is done because the computations take a long time and in the event of a crash or power outage, the loop can be restarted with the next vegetation index based on the last RData file that was exported.
csvs <- list.files(path = data_path_server, pattern = '\\.csv$')
all_vis <- lapply(strsplit(as.character(csvs), '-'), '[', 6) %>% unlist()
CSV_VI <- data.frame(CSV = csvs, VI = all_vis)
# Make sure VIs and CSVs match
CSV_VI$Check <- lapply(strsplit(as.character(CSV_VI$CSV), '-'), '[', 6) %>% unlist()
identical(CSV_VI$Check, CSV_VI$VI)
## [1] TRUE
# Changing the csvs vector to only contain one element to reduce the duration of producing this RMarkdown file. If you'd like to run the entire loop with all VIs, comment out the next line
csvs <- csvs[1]
# We'll also change the number of DATs to be just one - comment out this line too and UNCOMMENT the line within the loop (marked with UNCOMMENT HERE #1) if you'd like to run the entire loop.
dat.loop <- 118
# Lastly, we'll work with one vegetation index for brevity as well - comment out this line and UNCOMMENT the line within the loop (marked with UNCOMMENT HERE #2) if you'd like to run the entire loop.
VI.Names <- 'BCC'
# Initialize empty lists to store results
G_list <- list()
VC_list <- list()
for (i in 1:length(csvs)) {
current_vi <- all_vis[i]
print(paste('Current VI:', current_vi))
# Read data from CSV files
print(paste('Loading VI data frame for', current_vi))
df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
print(paste0('VI data frame loaded for ', current_vi, '!'))
# Remove low-quality DATs
DATs_exclude <- c(46, 71:72, 99:100)
df <- df %>% filter(!DAT %in% DATs_exclude)
# Remove plant that died early in the season
df <- df %>% filter(!Row_Range == '413_15')
# Vector containing DATs to loop through
#dat.loop <- sort(unique(df$DAT)) # UNCOMMENT HERE #1
# Vector for inner loop containing VI names
# VI.Names <- sort(unique(df$vi_name)) # UNCOMMENT HERE #2
# Vector for probability steps
probabilities <- sort(unique(df$probability))
# Loop over each environment
for (dat in dat.loop) {
# Subset data for the current DAT
df.subset <- df[df$DAT == dat, ]
for (vi.name in VI.Names) {
for (prob in probabilities) {
# Progress report
print(paste('ANOVA Model:', 'DAT', dat, 'Quantile level', prob, 'VI', vi.name))
# Subset data for the current probability step
df.subset.prob <- df.subset[df.subset$probability == prob, ]
# Convert variables to appropriate data types
df.subset.prob$Range <- as.factor(df.subset.prob$Range)
df.subset.prob$Row <- as.factor(df.subset.prob$Row)
df.subset.prob$Replicate <- as.factor(df.subset.prob$Replicate)
df.subset.prob$Pedigree <- as.factor(df.subset.prob$Pedigree)
# Fit the linear mixed-effects model after setting "Inf" and NaN as NA
response <- df.subset.prob$quantile
response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
anova <- lmer(response ~
(1 | Pedigree) +
(1 | Range) +
(1 | Row) +
(1 | Replicate),
data = df.subset.prob)
# Calculate RMSE
rmse_value <- sqrt(mean(residuals(anova)^2))
# Calculate R-squared
R <- MuMIn::r.squaredGLMM(anova)[, "R2c"] # Conditional R-squared
# Extract variance components
VC <- as.data.frame(VarCorr(anova))
VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
# Calculate heritability
nRep <- length(unique(df.subset.prob$Replicate))
Vg <- VC[VC$grp == "Pedigree", "vcov"]
Ve <- VC[VC$grp == "Residual", "vcov"]
heritability <- Vg / (Vg + (Ve / nRep))
heritability <- round(heritability, 3)
# Add additional information to VC data frame
VC$Rmse <- rmse_value
VC$Heritability <- heritability
VC$R_squared <- R
VC$DAT <- dat
VC$Vegetation.Index <- vi.name
# Store VC in the list
VC_list[[paste(dat, prob, vi.name, sep = '_')]] <- VC
# Extract random effects for 'Pedigree'
G <- coef(anova)$Pedigree
G$Pedigree <- rownames(G)
G$DAT <- dat
# Store G in the list
G_list[[paste(dat, prob, vi.name, sep = '_')]] <- G
}
}
}
# Save RData for current loop iteration
save.image(paste0(RData_path, 'ANOVA_per_Quantile_Within_DAT_', current_vi, '.RData'))
}
## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"
## [1] "VI data frame loaded for BCC!"
## [1] "ANOVA Model: DAT 118 Quantile level 0.01 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.02 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.03 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.04 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.05 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.06 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.07 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.08 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.09 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.1 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.11 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.12 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.13 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.14 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.15 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.16 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.17 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.18 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.19 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.2 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.21 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.22 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.23 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.24 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.25 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.26 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.27 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.28 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.29 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.3 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.31 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.32 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.33 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.34 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.35 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.36 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.37 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.38 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.39 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.4 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.41 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.42 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.43 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.44 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.45 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.46 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.47 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.48 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.49 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.5 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.51 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.52 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.53 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.54 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.55 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.56 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.57 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.58 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.59 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.6 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.61 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.62 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.63 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.64 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.65 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.66 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.67 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.68 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.69 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.7 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.71 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.72 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.73 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.74 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.75 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.76 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.77 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.78 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.79 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.8 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.81 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.82 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.83 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.84 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.85 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.86 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.87 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.88 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.89 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.9 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.91 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.92 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.93 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.94 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.95 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.96 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.97 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.98 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.99 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 1 VI BCC"
# Combine the lists into data frames
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)
# View the first few rows of the results
head(G_df)
## (Intercept) Pedigree DAT
## 118_0.01_BCC.5001 0.2659521 5001 118
## 118_0.01_BCC.5002 0.2675456 5002 118
## 118_0.01_BCC.5003 0.2654205 5003 118
## 118_0.01_BCC.5004 0.2602625 5004 118
## 118_0.01_BCC.5005 0.2643899 5005 118
## 118_0.01_BCC.5006 0.2708853 5006 118
head(VC_df)
## grp var1 var2 vcov sdcor Percent
## 118_0.01_BCC.1 Row (Intercept) <NA> 1.015454e-04 0.010076972 38.34
## 118_0.01_BCC.2 Replicate (Intercept) <NA> 0.000000e+00 0.000000000 0.00
## 118_0.01_BCC.3 Pedigree (Intercept) <NA> 2.002861e-05 0.004475333 7.56
## 118_0.01_BCC.4 Range (Intercept) <NA> 6.783357e-06 0.002604488 2.56
## 118_0.01_BCC.5 Residual <NA> <NA> 1.364964e-04 0.011683165 51.54
## 118_0.02_BCC.1 Row (Intercept) <NA> 8.467381e-05 0.009201837 39.21
## Rmse Heritability R_squared DAT Vegetation.Index
## 118_0.01_BCC.1 0.01081575 0.815 0.4846349 118 BCC
## 118_0.01_BCC.2 0.01081575 0.815 0.4846349 118 BCC
## 118_0.01_BCC.3 0.01081575 0.815 0.4846349 118 BCC
## 118_0.01_BCC.4 0.01081575 0.815 0.4846349 118 BCC
## 118_0.01_BCC.5 0.01081575 0.815 0.4846349 118 BCC
## 118_0.02_BCC.1 0.00936769 0.851 0.5199328 118 BCC
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)
VC_df$Probability <- lapply(strsplit(as.character(VC_df$DAT_Prob_VI), '_'), '[', 2) %>% unlist() %>% as.numeric()
colnames(G_df)[1] <- 'Quantile.BLUP'
G_df$DAT_Prob_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$DAT_Prob_VI.Pedigree), '_'), '[', 3) %>% unlist()
G_df$Probability <- lapply(strsplit(as.character(G_df$DAT_Prob_VI.Pedigree), '_'), '[', 2) %>% unlist() %>% as.numeric()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)
# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_per_Quantile_Within_DAT.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_per_Quantile_Within_DAT.csv'))