Loop to run ANOVA models with Genotype × Time interaction at all 100 quantile levels

# Initialize empty lists to store results
G_list <- list()
VC_list <- list()

for (i in 1:length(csvs)) {

  current_vi <- all_vis[i]
  print(paste('Current VI:', current_vi))

  # Read data from CSV files
  print(paste('Loading VI data frame for', current_vi))
  df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
  print(paste0('VI data frame loaded for ', current_vi, '!'))

  # Remove low-quality DATs
  DATs_exclude <- c(46, 71:72, 99:100)
  df <- df %>% filter(!DAT %in% DATs_exclude)

  # Remove plant that died early in the season
  df <- df %>% filter(!Row_Range == '413_15')

  # Basic plot for inspection of data
  sort(unique(df$DAT))
  df.plot <- df %>% filter(Pedigree == 5016, DAT == 140)
  plot(df.plot$probability, df.plot$quantile)

  # Vector for probability steps
  #probabilities <- sort(unique(df$probability)) # UNCOMMENT HERE

  # Loop over each environment
  for (prob in probabilities) {
  
    # Subset data for the current DAT
    df.subset <- df[df$probability == prob, ]
    
    # Convert variables to appropriate data types
    df.subset$Range <- as.factor(df.subset$Range)
    df.subset$Row <- as.factor(df.subset$Row)
    df.subset$Replicate <- as.factor(df.subset$Replicate)
    df.subset$Pedigree <- as.factor(df.subset$Pedigree)
    df.subset$DAT <- as.factor(df.subset$DAT)

    # Progress report
    print(paste('ANOVA Model:', 'Quantile level', prob, 'VI', current_vi))
      
    # Fit the linear mixed-effects model after setting "Inf" and NaN as NA
    response <- df.subset$quantile
    response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
      
    anova <- lmer(response ~
                    (1 | Pedigree) +
                    (1 | DAT) +
                    (1 | Pedigree:DAT) +
                    (1 | Range:DAT) +
                    (1 | Row:DAT) +
                    (1 | Replicate:DAT),
                  data = df.subset)
    
    # Calculate RMSE
    rmse_value <- sqrt(mean(residuals(anova)^2))
    
    # Calculate R-squared
    R <- MuMIn::r.squaredGLMM(anova)[, "R2c"]  # Conditional R-squared
    
    # Extract variance components
    VC <- as.data.frame(VarCorr(anova))
    VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
    
    # Calculate heritability
    nRep <- length(unique(df.subset$Replicate))
    nFlight <- length(unique(df.subset$DAT))
    Vg <- VC[VC$grp == "Pedigree", "vcov"]
    Vgf <- VC[VC$grp == "Pedigree:DAT", "vcov"]
    Ve <- VC[VC$grp == "Residual", "vcov"]
    #heritability <- Vg / (Vg + (Vgf / nFlight) + (Ve / (nRep * nFlight)))
    #heritability <- round(heritability, 3)
    heritability <- Vg / (Vg + (Vgf) + (Ve / (nRep)))
    heritability <- round(heritability, 3)
      
    # Add additional information to VC data frame
    VC$Rmse <- rmse_value
    VC$Heritability <- heritability
    VC$R_squared <- R
    VC$Vegetation.Index <- current_vi
    print(VC)
      
    # Store VC in the list
    VC_list[[paste(prob, current_vi, sep = '_')]] <- VC
      
    # Extract random effects for 'Pedigree'
    G <- coef(anova)$Pedigree
    G$Pedigree <- rownames(G)
      
    # Store G in the list
    G_list[[paste(prob, current_vi, sep = '_')]] <- G

  }
  # Save RData for current loop iteration
  save.image(paste0(RData_path, 'ANOVA_Within_Quantile_Across_DAT_', current_vi, '.RData'))
}

## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"

## [1] "VI data frame loaded for BCC!"

## [1] "ANOVA Model: Quantile level 0.47 VI BCC"
##             grp        var1 var2         vcov       sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.126227e-05 0.005591267   14.25 0.006459121
## 2 Replicate:DAT (Intercept) <NA> 9.388234e-07 0.000968929    0.43 0.006459121
## 3  Pedigree:DAT (Intercept) <NA> 2.134529e-06 0.001461003    0.97 0.006459121
## 4     Range:DAT (Intercept) <NA> 1.273382e-06 0.001128442    0.58 0.006459121
## 5           DAT (Intercept) <NA> 1.260672e-04 0.011227967   57.46 0.006459121
## 6      Pedigree (Intercept) <NA> 9.140641e-06 0.003023349    4.17 0.006459121
## 7      Residual        <NA> <NA> 4.857686e-05 0.006969710   22.14 0.006459121
##   Heritability R_squared Vegetation.Index
## 1        0.709 0.7785859              BCC
## 2        0.709 0.7785859              BCC
## 3        0.709 0.7785859              BCC
## 4        0.709 0.7785859              BCC
## 5        0.709 0.7785859              BCC
## 6        0.709 0.7785859              BCC
## 7        0.709 0.7785859              BCC
## [1] "ANOVA Model: Quantile level 0.48 VI BCC"
##             grp        var1 var2         vcov        sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.124878e-05 0.0055900609   14.24 0.006464575
## 2 Replicate:DAT (Intercept) <NA> 9.453900e-07 0.0009723117    0.43 0.006464575
## 3  Pedigree:DAT (Intercept) <NA> 2.138514e-06 0.0014623658    0.97 0.006464575
## 4     Range:DAT (Intercept) <NA> 1.291790e-06 0.0011365694    0.59 0.006464575
## 5           DAT (Intercept) <NA> 1.259221e-04 0.0112215006   57.40 0.006464575
## 6      Pedigree (Intercept) <NA> 9.175287e-06 0.0030290736    4.18 0.006464575
## 7      Residual        <NA> <NA> 4.866146e-05 0.0069757770   22.18 0.006464575
##   Heritability R_squared Vegetation.Index
## 1        0.709 0.7781898              BCC
## 2        0.709 0.7781898              BCC
## 3        0.709 0.7781898              BCC
## 4        0.709 0.7781898              BCC
## 5        0.709 0.7781898              BCC
## 6        0.709 0.7781898              BCC
## 7        0.709 0.7781898              BCC
## [1] "ANOVA Model: Quantile level 0.49 VI BCC"
##             grp        var1 var2         vcov        sdcor Percent        Rmse
## 1       Row:DAT (Intercept) <NA> 3.122558e-05 0.0055879857   14.23 0.006468996
## 2 Replicate:DAT (Intercept) <NA> 9.606558e-07 0.0009801305    0.44 0.006468996
## 3  Pedigree:DAT (Intercept) <NA> 2.144654e-06 0.0014644639    0.98 0.006468996
## 4     Range:DAT (Intercept) <NA> 1.312367e-06 0.0011455858    0.60 0.006468996
## 5           DAT (Intercept) <NA> 1.258411e-04 0.0112178920   57.35 0.006468996
## 6      Pedigree (Intercept) <NA> 9.207038e-06 0.0030343102    4.20 0.006468996
## 7      Residual        <NA> <NA> 4.873520e-05 0.0069810603   22.21 0.006468996
##   Heritability R_squared Vegetation.Index
## 1         0.71 0.7778975              BCC
## 2         0.71 0.7778975              BCC
## 3         0.71 0.7778975              BCC
## 4         0.71 0.7778975              BCC
## 5         0.71 0.7778975              BCC
## 6         0.71 0.7778975              BCC
## 7         0.71 0.7778975              BCC

Save the ANOVA results

# Aggregate the results
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)

# View the first few rows of the results
head(G_df)

##               (Intercept) Pedigree
## 0.47_BCC.5001   0.3201816     5001
## 0.47_BCC.5002   0.3210825     5002
## 0.47_BCC.5003   0.3183686     5003
## 0.47_BCC.5004   0.3173446     5004
## 0.47_BCC.5005   0.3177650     5005
## 0.47_BCC.5006   0.3209569     5006

head(VC_df)

##                      grp        var1 var2         vcov       sdcor Percent
## 0.47_BCC.1       Row:DAT (Intercept) <NA> 3.126227e-05 0.005591267   14.25
## 0.47_BCC.2 Replicate:DAT (Intercept) <NA> 9.388234e-07 0.000968929    0.43
## 0.47_BCC.3  Pedigree:DAT (Intercept) <NA> 2.134529e-06 0.001461003    0.97
## 0.47_BCC.4     Range:DAT (Intercept) <NA> 1.273382e-06 0.001128442    0.58
## 0.47_BCC.5           DAT (Intercept) <NA> 1.260672e-04 0.011227967   57.46
## 0.47_BCC.6      Pedigree (Intercept) <NA> 9.140641e-06 0.003023349    4.17
##                   Rmse Heritability R_squared Vegetation.Index
## 0.47_BCC.1 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.2 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.3 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.4 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.5 0.006459121        0.709 0.7785859              BCC
## 0.47_BCC.6 0.006459121        0.709 0.7785859              BCC

# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$Prob_VI <- rownames(VC_df)
VC_df$Probability <- lapply(strsplit(as.character(VC_df$Prob_VI), '_'), '[', 1) %>% unlist() %>% as.numeric()

colnames(G_df)[1] <- 'BLUP'
G_df$Prob_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$Prob_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Probability <- lapply(strsplit(as.character(G_df$Prob_VI.Pedigree), '_'), '[', 1) %>% unlist() %>% as.numeric()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)

# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_Quantile_Across_DAT_Revised_Herit.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_Quantile_Across_DAT_Revised_Herit.csv'))

ANOVA Within Quantile Level Across DAT

Aaron J. DeSalvio

2025-09-06

Analysis of Variance - AM4 & HM4

Loop to run ANOVA models with Genotype × Time interaction at all 100 quantile levels

Save the ANOVA results