library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)

Analysis of Variance - AM3 & HM3

These ANOVA models are one of the key analyses presented in this paper. They explore heritability and genotypic variance at every quantile level WITHIN flight dates - proposing the concept of within-day variance decomposition. The ANOVA model itself is identical to AM2 (and HM3 is identical to HM2), but the difference is that instead of using this model only at the median (or mean) vegetation index values for each flight date, we’re now investigating this model at all the remaining quantile levels from 0.01, 0.02, … , 1.00.

# Set working directory and source external functions
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
data_path_server <- '//murray_lab/FLIGHTS2/Processed2/Texas/CS24/MULTI/CS24-STEL/Data_Products/Individual_VIs/'
RData_path <- 'C:/Users/aaron.desalvio/Downloads/Temp/'

# Note - the distributional data files are too large to be stored on GitHub. Once you download them, save them to their own directory and replace the path above for data_path_server.
# The RData_path can be changed to a location where you'd like to store the RData files associated with each loop iteration. This is done because the computations take a long time and in the event of a crash or power outage, the loop can be restarted with the next vegetation index based on the last RData file that was exported.

csvs <- list.files(path = data_path_server, pattern = '\\.csv$')
all_vis <- lapply(strsplit(as.character(csvs), '-'), '[', 6) %>% unlist()
CSV_VI <- data.frame(CSV = csvs, VI = all_vis)

# Make sure VIs and CSVs match
CSV_VI$Check <- lapply(strsplit(as.character(CSV_VI$CSV), '-'), '[', 6) %>% unlist()
identical(CSV_VI$Check, CSV_VI$VI)
## [1] TRUE
# Changing the csvs vector to only contain one element to reduce the duration of producing this RMarkdown file. If you'd like to run the entire loop with all VIs, comment out the next line
csvs <- csvs[1]

# We'll also change the number of DATs to be just one - comment out this line too and UNCOMMENT the line within the loop (marked with UNCOMMENT HERE #1) if you'd like to run the entire loop.
dat.loop <- 118

# Lastly, we'll work with one vegetation index for brevity as well - comment out this line and UNCOMMENT the line within the loop (marked with UNCOMMENT HERE #2) if you'd like to run the entire loop.
VI.Names <- 'BCC'

Loop to run ANOVA models for each VI/DAT/Quantile level combination

# Initialize empty lists to store results
G_list <- list()
VC_list <- list()

for (i in 1:length(csvs)) {

  current_vi <- all_vis[i]
  print(paste('Current VI:', current_vi))

  # Read data from CSV files
  print(paste('Loading VI data frame for', current_vi))
  df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
  print(paste0('VI data frame loaded for ', current_vi, '!'))

  # Remove low-quality DATs
  DATs_exclude <- c(46, 71:72, 99:100)
  df <- df %>% filter(!DAT %in% DATs_exclude)

  # Remove plant that died early in the season
  df <- df %>% filter(!Row_Range == '413_15')

  # Vector containing DATs to loop through
  #dat.loop <- sort(unique(df$DAT)) # UNCOMMENT HERE #1

  # Vector for inner loop containing VI names
  # VI.Names <- sort(unique(df$vi_name)) # UNCOMMENT HERE #2

  # Vector for probability steps
  probabilities <- sort(unique(df$probability))

  # Loop over each environment
  for (dat in dat.loop) {
  
    # Subset data for the current DAT
    df.subset <- df[df$DAT == dat, ]
  
    for (vi.name in VI.Names) {
    
      for (prob in probabilities) {
      
        # Progress report
        print(paste('ANOVA Model:', 'DAT', dat, 'Quantile level', prob, 'VI', vi.name))
      
        # Subset data for the current probability step
        df.subset.prob <- df.subset[df.subset$probability == prob, ]
        
        # Convert variables to appropriate data types
        df.subset.prob$Range <- as.factor(df.subset.prob$Range)
        df.subset.prob$Row <- as.factor(df.subset.prob$Row)
        df.subset.prob$Replicate <- as.factor(df.subset.prob$Replicate)
        df.subset.prob$Pedigree <- as.factor(df.subset.prob$Pedigree)
      
        # Fit the linear mixed-effects model after setting "Inf" and NaN as NA
        response <- df.subset.prob$quantile
        response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
      
        anova <- lmer(response ~
                        (1 | Pedigree) +
                        (1 | Range) +
                        (1 | Row) +
                        (1 | Replicate),
                      data = df.subset.prob)
      
        # Calculate RMSE
        rmse_value <- sqrt(mean(residuals(anova)^2))
      
        # Calculate R-squared
        R <- MuMIn::r.squaredGLMM(anova)[, "R2c"]  # Conditional R-squared
      
        # Extract variance components
        VC <- as.data.frame(VarCorr(anova))
        VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
      
        # Calculate heritability
        nRep <- length(unique(df.subset.prob$Replicate))
        Vg <- VC[VC$grp == "Pedigree", "vcov"]
        Ve <- VC[VC$grp == "Residual", "vcov"]
        heritability <- Vg / (Vg + (Ve / nRep))
        heritability <- round(heritability, 3)
      
        # Add additional information to VC data frame
        VC$Rmse <- rmse_value
        VC$Heritability <- heritability
        VC$R_squared <- R
        VC$DAT <- dat
        VC$Vegetation.Index <- vi.name
      
        # Store VC in the list
        VC_list[[paste(dat, prob, vi.name, sep = '_')]] <- VC
      
        # Extract random effects for 'Pedigree'
        G <- coef(anova)$Pedigree
        G$Pedigree <- rownames(G)
        G$DAT <- dat
      
        # Store G in the list
        G_list[[paste(dat, prob, vi.name, sep = '_')]] <- G

      }
    }
  }
  # Save RData for current loop iteration
  save.image(paste0(RData_path, 'ANOVA_per_Quantile_Within_DAT_', current_vi, '.RData'))
}
## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"
## [1] "VI data frame loaded for BCC!"
## [1] "ANOVA Model: DAT 118 Quantile level 0.01 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.02 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.03 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.04 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.05 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.06 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.07 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.08 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.09 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.1 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.11 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.12 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.13 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.14 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.15 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.16 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.17 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.18 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.19 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.2 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.21 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.22 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.23 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.24 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.25 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.26 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.27 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.28 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.29 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.3 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.31 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.32 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.33 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.34 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.35 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.36 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.37 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.38 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.39 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.4 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.41 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.42 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.43 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.44 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.45 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.46 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.47 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.48 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.49 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.5 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.51 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.52 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.53 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.54 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.55 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.56 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.57 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.58 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.59 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.6 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.61 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.62 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.63 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.64 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.65 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.66 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.67 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.68 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.69 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.7 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.71 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.72 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.73 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.74 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.75 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.76 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.77 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.78 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.79 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.8 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.81 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.82 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.83 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.84 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.85 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.86 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.87 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.88 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.89 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.9 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.91 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.92 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.93 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.94 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.95 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.96 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.97 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.98 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 0.99 VI BCC"
## [1] "ANOVA Model: DAT 118 Quantile level 1 VI BCC"
# Combine the lists into data frames
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)

# View the first few rows of the results
head(G_df)
##                   (Intercept) Pedigree DAT
## 118_0.01_BCC.5001   0.2659521     5001 118
## 118_0.01_BCC.5002   0.2675456     5002 118
## 118_0.01_BCC.5003   0.2654205     5003 118
## 118_0.01_BCC.5004   0.2602625     5004 118
## 118_0.01_BCC.5005   0.2643899     5005 118
## 118_0.01_BCC.5006   0.2708853     5006 118
head(VC_df)
##                      grp        var1 var2         vcov       sdcor Percent
## 118_0.01_BCC.1       Row (Intercept) <NA> 1.015454e-04 0.010076972   38.34
## 118_0.01_BCC.2 Replicate (Intercept) <NA> 0.000000e+00 0.000000000    0.00
## 118_0.01_BCC.3  Pedigree (Intercept) <NA> 2.002861e-05 0.004475333    7.56
## 118_0.01_BCC.4     Range (Intercept) <NA> 6.783357e-06 0.002604488    2.56
## 118_0.01_BCC.5  Residual        <NA> <NA> 1.364964e-04 0.011683165   51.54
## 118_0.02_BCC.1       Row (Intercept) <NA> 8.467381e-05 0.009201837   39.21
##                      Rmse Heritability R_squared DAT Vegetation.Index
## 118_0.01_BCC.1 0.01081575        0.815 0.4846349 118              BCC
## 118_0.01_BCC.2 0.01081575        0.815 0.4846349 118              BCC
## 118_0.01_BCC.3 0.01081575        0.815 0.4846349 118              BCC
## 118_0.01_BCC.4 0.01081575        0.815 0.4846349 118              BCC
## 118_0.01_BCC.5 0.01081575        0.815 0.4846349 118              BCC
## 118_0.02_BCC.1 0.00936769        0.851 0.5199328 118              BCC
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)
VC_df$Probability <- lapply(strsplit(as.character(VC_df$DAT_Prob_VI), '_'), '[', 2) %>% unlist() %>% as.numeric()

colnames(G_df)[1] <- 'Quantile.BLUP'
G_df$DAT_Prob_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$DAT_Prob_VI.Pedigree), '_'), '[', 3) %>% unlist()
G_df$Probability <- lapply(strsplit(as.character(G_df$DAT_Prob_VI.Pedigree), '_'), '[', 2) %>% unlist() %>% as.numeric()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)

# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_per_Quantile_Within_DAT.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_per_Quantile_Within_DAT.csv'))