library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)

Analysis of Variance - AM4 & HM4

These ANOVA models quantify the extent of a Genotype × Time interaction at the other 99 quantile levels not tested by AM1 and HM1, but the formulas are identical.

# Note - the distributional data files are too large to be stored on GitHub. Once you download them, save them to their own directory and replace their path below for data_path_server.
# Replace data_path with the folder location where you downloaded all other data related to the repository.

data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
data_path_server <- '//murray_lab/FLIGHTS2/Processed2/Texas/CS24/MULTI/CS24-STEL/Data_Products/Individual_VIs/'
RData_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/RData/'
csvs <- list.files(path = data_path_server, pattern = '\\.csv$')
all_vis <- lapply(strsplit(as.character(csvs), '-'), '[', 6) %>% unlist()

# Changing the csvs vector to only contain one element to reduce the duration of producing this RMarkdown file. If you'd like to run the entire loop with all VIs, comment out the next line
csvs <- csvs[1]

# We'll also only run a subset of quantile levels to speed up computation time. Comment out this line too if you'd like to run the entire loop, and also UNCOMMENT the line of code that's annotated with # UNCOMMENT HERE
probabilities <- c(0.47, 0.48, 0.49)

Loop to run ANOVA models with Genotype × Time interaction at all 100 quantile levels

# Initialize empty lists to store results
G_list <- list()
VC_list <- list()

for (i in 1:length(csvs)) {

  current_vi <- all_vis[i]
  print(paste('Current VI:', current_vi))

  # Read data from CSV files
  print(paste('Loading VI data frame for', current_vi))
  df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
  print(paste0('VI data frame loaded for ', current_vi, '!'))

  # Remove low-quality DATs
  DATs_exclude <- c(46, 71:72, 99:100)
  df <- df %>% filter(!DAT %in% DATs_exclude)

  # Remove plant that died early in the season
  df <- df %>% filter(!Row_Range == '413_15')

  # Basic plot for inspection of data
  sort(unique(df$DAT))
  df.plot <- df %>% filter(Pedigree == 5016, DAT == 140)
  plot(df.plot$probability, df.plot$quantile)

  # Vector for probability steps
  #probabilities <- sort(unique(df$probability)) # UNCOMMENT HERE

  # Loop over each environment
  for (prob in probabilities) {
  
    # Subset data for the current DAT
    df.subset <- df[df$probability == prob, ]
    
    # Convert variables to appropriate data types
    df.subset$Range <- as.factor(df.subset$Range)
    df.subset$Row <- as.factor(df.subset$Row)
    df.subset$Replicate <- as.factor(df.subset$Replicate)
    df.subset$Pedigree <- as.factor(df.subset$Pedigree)
    df.subset$DAT <- as.factor(df.subset$DAT)

    # Progress report
    print(paste('ANOVA Model:', 'Quantile level', prob, 'VI', current_vi))
      
    # Fit the linear mixed-effects model after setting "Inf" and NaN as NA
    response <- df.subset$quantile
    response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
      
    anova <- lmer(response ~
                    (1 | Pedigree) +
                    (1 | DAT) +
                    (1 | Pedigree:DAT) +
                    (1 | Range:DAT) +
                    (1 | Row:DAT) +
                    (1 | Replicate:DAT),
                  data = df.subset)
    
    # Calculate RMSE
    rmse_value <- sqrt(mean(residuals(anova)^2))
    
    # Calculate R-squared
    R <- MuMIn::r.squaredGLMM(anova)[, "R2c"]  # Conditional R-squared
    
    # Extract variance components
    VC <- as.data.frame(VarCorr(anova))
    VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
    
    # Calculate heritability
    nRep <- length(unique(df.subset$Replicate))
    nFlight <- length(unique(df.subset$DAT))
    Vg <- VC[VC$grp == "Pedigree", "vcov"]
    Vgf <- VC[VC$grp == "Pedigree:DAT", "vcov"]
    Ve <- VC[VC$grp == "Residual", "vcov"]
    #heritability <- Vg / (Vg + (Vgf / nFlight) + (Ve / (nRep * nFlight)))
    #heritability <- round(heritability, 3)
    heritability <- Vg / (Vg + (Vgf) + (Ve / (nRep)))
    heritability <- round(heritability, 3)
      
    # Add additional information to VC data frame
    VC$Rmse <- rmse_value
    VC$Heritability <- heritability
    VC$R_squared <- R
    VC$Vegetation.Index <- current_vi
    print(VC)
      
    # Store VC in the list
    VC_list[[paste(prob, current_vi, sep = '_')]] <- VC
      
    # Extract random effects for 'Pedigree'
    G <- coef(anova)$Pedigree
    G$Pedigree <- rownames(G)
      
    # Store G in the list
    G_list[[paste(prob, current_vi, sep = '_')]] <- G

  }
  # Save RData for current loop iteration
  save.image(paste0(RData_path, 'ANOVA_Within_Quantile_Across_DAT_', current_vi, '.RData'))
}
## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"
## [1] "VI data frame loaded for BCC!"

## [1] "ANOVA Model: Quantile level 0.47 VI BCC"
##             grp        var1 var2         vcov       sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.126227e-05 0.005591267   14.25 0.006459121
## 2 Replicate:DAT (Intercept) <NA> 9.388234e-07 0.000968929    0.43 0.006459121
## 3  Pedigree:DAT (Intercept) <NA> 2.134529e-06 0.001461003    0.97 0.006459121
## 4     Range:DAT (Intercept) <NA> 1.273382e-06 0.001128442    0.58 0.006459121
## 5           DAT (Intercept) <NA> 1.260672e-04 0.011227967   57.46 0.006459121
## 6      Pedigree (Intercept) <NA> 9.140641e-06 0.003023349    4.17 0.006459121
## 7      Residual        <NA> <NA> 4.857686e-05 0.006969710   22.14 0.006459121
##   Heritability R_squared Vegetation.Index
## 1        0.709 0.7785859              BCC
## 2        0.709 0.7785859              BCC
## 3        0.709 0.7785859              BCC
## 4        0.709 0.7785859              BCC
## 5        0.709 0.7785859              BCC
## 6        0.709 0.7785859              BCC
## 7        0.709 0.7785859              BCC
## [1] "ANOVA Model: Quantile level 0.48 VI BCC"
##             grp        var1 var2         vcov        sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.124878e-05 0.0055900609   14.24 0.006464575
## 2 Replicate:DAT (Intercept) <NA> 9.453900e-07 0.0009723117    0.43 0.006464575
## 3  Pedigree:DAT (Intercept) <NA> 2.138514e-06 0.0014623658    0.97 0.006464575
## 4     Range:DAT (Intercept) <NA> 1.291790e-06 0.0011365694    0.59 0.006464575
## 5           DAT (Intercept) <NA> 1.259221e-04 0.0112215006   57.40 0.006464575
## 6      Pedigree (Intercept) <NA> 9.175287e-06 0.0030290736    4.18 0.006464575
## 7      Residual        <NA> <NA> 4.866146e-05 0.0069757770   22.18 0.006464575
##   Heritability R_squared Vegetation.Index
## 1        0.709 0.7781898              BCC
## 2        0.709 0.7781898              BCC
## 3        0.709 0.7781898              BCC
## 4        0.709 0.7781898              BCC
## 5        0.709 0.7781898              BCC
## 6        0.709 0.7781898              BCC
## 7        0.709 0.7781898              BCC
## [1] "ANOVA Model: Quantile level 0.49 VI BCC"
##             grp        var1 var2         vcov        sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.122558e-05 0.0055879857   14.23 0.006468996
## 2 Replicate:DAT (Intercept) <NA> 9.606558e-07 0.0009801305    0.44 0.006468996
## 3  Pedigree:DAT (Intercept) <NA> 2.144654e-06 0.0014644639    0.98 0.006468996
## 4     Range:DAT (Intercept) <NA> 1.312367e-06 0.0011455858    0.60 0.006468996
## 5           DAT (Intercept) <NA> 1.258411e-04 0.0112178920   57.35 0.006468996
## 6      Pedigree (Intercept) <NA> 9.207038e-06 0.0030343102    4.20 0.006468996
## 7      Residual        <NA> <NA> 4.873520e-05 0.0069810603   22.21 0.006468996
##   Heritability R_squared Vegetation.Index
## 1         0.71 0.7778975              BCC
## 2         0.71 0.7778975              BCC
## 3         0.71 0.7778975              BCC
## 4         0.71 0.7778975              BCC
## 5         0.71 0.7778975              BCC
## 6         0.71 0.7778975              BCC
## 7         0.71 0.7778975              BCC

Save the ANOVA results

# Aggregate the results
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)

# View the first few rows of the results
head(G_df)
##               (Intercept) Pedigree
## 0.47_BCC.5001   0.3201816     5001
## 0.47_BCC.5002   0.3210825     5002
## 0.47_BCC.5003   0.3183686     5003
## 0.47_BCC.5004   0.3173446     5004
## 0.47_BCC.5005   0.3177650     5005
## 0.47_BCC.5006   0.3209569     5006
head(VC_df)
##                      grp        var1 var2         vcov       sdcor Percent
## 0.47_BCC.1       Row:DAT (Intercept) <NA> 3.126227e-05 0.005591267   14.25
## 0.47_BCC.2 Replicate:DAT (Intercept) <NA> 9.388234e-07 0.000968929    0.43
## 0.47_BCC.3  Pedigree:DAT (Intercept) <NA> 2.134529e-06 0.001461003    0.97
## 0.47_BCC.4     Range:DAT (Intercept) <NA> 1.273382e-06 0.001128442    0.58
## 0.47_BCC.5           DAT (Intercept) <NA> 1.260672e-04 0.011227967   57.46
## 0.47_BCC.6      Pedigree (Intercept) <NA> 9.140641e-06 0.003023349    4.17
##                   Rmse Heritability R_squared Vegetation.Index
## 0.47_BCC.1 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.2 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.3 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.4 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.5 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.6 0.006459121        0.709 0.7785859              BCC
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$Prob_VI <- rownames(VC_df)
VC_df$Probability <- lapply(strsplit(as.character(VC_df$Prob_VI), '_'), '[', 1) %>% unlist() %>% as.numeric()

colnames(G_df)[1] <- 'BLUP'
G_df$Prob_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$Prob_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Probability <- lapply(strsplit(as.character(G_df$Prob_VI.Pedigree), '_'), '[', 1) %>% unlist() %>% as.numeric()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)

# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_Quantile_Across_DAT_Revised_Herit.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_Quantile_Across_DAT_Revised_Herit.csv'))