Loop to run ANOVA models with Genotype × Quantile interaction at all time points

# Initialize empty lists to store results
G_list <- list()
VC_list <- list()

for (i in 1:length(csvs)) {
  
  current_vi <- all_vis[i]
  print(paste('Current VI:', current_vi))
  
  # Read data from CSV files
  print(paste('Loading VI data frame for', current_vi))
  df <- fread(paste0(data_path_server, 'CS24-STEL-MULTI-RTK-Distributions-', current_vi, '-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
  print(paste0('VI data frame loaded for ', current_vi, '!'))
  
  # Remove low-quality DATs
  DATs_exclude <- c(46, 71:72, 99:100)
  df <- df %>% filter(!DAT %in% DATs_exclude)
  
  # Remove plant that died early in the season
  df <- df %>% filter(!Row_Range == '413_15')
  
  # Basic plot for inspection of data
  sort(unique(df$DAT))
  df.plot <- df %>% filter(Pedigree == 5016, DAT == 140)
  plot(df.plot$probability, df.plot$quantile)
  
  # DAT vector for loop
  # dat.loop <- sort(unique(df$DAT)) # UNCOMMENT HERE
  
  # Loop over each environment
  for (dat in dat.loop) {
    
    # Subset data for the current DAT
    df.subset <- df[df$DAT == dat, ]
    
    # Convert variables to appropriate data types
    df.subset$Range <- as.factor(df.subset$Range)
    df.subset$Row <- as.factor(df.subset$Row)
    df.subset$Replicate <- as.factor(df.subset$Replicate)
    df.subset$Pedigree <- as.factor(df.subset$Pedigree)
    df.subset$probability <- as.factor(df.subset$probability)
    
    # Progress report
    print(paste('ANOVA Model:', 'DAT', dat, 'VI', current_vi))
    
    # Fit the linear mixed-effects model after setting "Inf" and NaN as NA
    response <- df.subset$quantile
    response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
    
    anova <- lmer(response ~
                    (1 | Pedigree) +
                    (1 | probability) +
                    (1 | Pedigree:probability) +
                    (1 | Range:probability) +
                    (1 | Row:probability) +
                    (1 | Replicate:probability),
                  data = df.subset)
    
    # Calculate RMSE
    rmse_value <- sqrt(mean(residuals(anova)^2))
    
    # Calculate R-squared
    R <- MuMIn::r.squaredGLMM(anova)[, "R2c"]  # Conditional R-squared
    
    # Extract variance components
    VC <- as.data.frame(VarCorr(anova))
    VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
    
    # Calculate heritability
    nRep <- length(unique(df.subset$Replicate))
    nProb <- length(unique(df.subset$probability))
    Vg <- VC[VC$grp == "Pedigree", "vcov"]
    Vgp <- VC[VC$grp == "Pedigree:probability", "vcov"]
    Ve <- VC[VC$grp == "Residual", "vcov"]
    #heritability <- Vg / (Vg + (Vgp / nProb) + (Ve / (nRep * nProb)))
    #heritability <- round(heritability, 3)
    heritability <- Vg / (Vg + (Vgp) + (Ve / (nRep)))
    heritability <- round(heritability, 3)
    
    # Add additional information to VC data frame
    VC$Rmse <- rmse_value
    VC$Heritability <- heritability
    VC$R_squared <- R
    VC$Vegetation.Index <- current_vi
    VC$DAT <- dat
    print(VC)
    
    # Store VC in the list
    VC_list[[paste(dat, current_vi, sep = '_')]] <- VC
    
    # Extract random effects for 'Pedigree'
    G <- coef(anova)$Pedigree
    G$Pedigree <- rownames(G)
    
    # Store G in the list
    G_list[[paste(dat, current_vi, sep = '_')]] <- G
    
  }
  # Save RData for current loop iteration
  save.image(paste0(RData_path, 'ANOVA_Within_DAT_Across_Quantile_', current_vi, '.RData'))
}

## [1] "Current VI: BCC"
## [1] "Loading VI data frame for BCC"

## [1] "VI data frame loaded for BCC!"

## [1] "ANOVA Model: DAT 107 VI BCC"

##                     grp        var1 var2         vcov        sdcor Percent
## 1       Row:probability (Intercept) <NA> 2.627023e-05 0.0051254490    3.61
## 2 Replicate:probability (Intercept) <NA> 3.157604e-08 0.0001776965    0.00
## 3  Pedigree:probability (Intercept) <NA> 0.000000e+00 0.0000000000    0.00
## 4     Range:probability (Intercept) <NA> 1.980610e-06 0.0014073415    0.27
## 5           probability (Intercept) <NA> 6.350885e-04 0.0252009626   87.36
## 6              Pedigree (Intercept) <NA> 1.737181e-05 0.0041679508    2.39
## 7              Residual        <NA> <NA> 4.623500e-05 0.0067996325    6.36
##          Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.006399464        0.919 0.9364011              BCC 107
## 2 0.006399464        0.919 0.9364011              BCC 107
## 3 0.006399464        0.919 0.9364011              BCC 107
## 4 0.006399464        0.919 0.9364011              BCC 107
## 5 0.006399464        0.919 0.9364011              BCC 107
## 6 0.006399464        0.919 0.9364011              BCC 107
## 7 0.006399464        0.919 0.9364011              BCC 107
## [1] "ANOVA Model: DAT 108 VI BCC"
##                     grp        var1 var2         vcov       sdcor Percent
## 1       Row:probability (Intercept) <NA> 6.388885e-05 0.007993050    5.67
## 2 Replicate:probability (Intercept) <NA> 3.885413e-06 0.001971145    0.34
## 3  Pedigree:probability (Intercept) <NA> 1.792040e-06 0.001338671    0.16
## 4     Range:probability (Intercept) <NA> 2.949287e-06 0.001717349    0.26
## 5           probability (Intercept) <NA> 9.326327e-04 0.030539035   82.73
## 6              Pedigree (Intercept) <NA> 1.925773e-05 0.004388363    1.71
## 7              Residual        <NA> <NA> 1.029599e-04 0.010146916    9.13
##          Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.009417045        0.787 0.9086721              BCC 108
## 2 0.009417045        0.787 0.9086721              BCC 108
## 3 0.009417045        0.787 0.9086721              BCC 108
## 4 0.009417045        0.787 0.9086721              BCC 108
## 5 0.009417045        0.787 0.9086721              BCC 108
## 6 0.009417045        0.787 0.9086721              BCC 108
## 7 0.009417045        0.787 0.9086721              BCC 108
## [1] "ANOVA Model: DAT 109 VI BCC"

##                     grp        var1 var2         vcov       sdcor Percent
## 1       Row:probability (Intercept) <NA> 3.712641e-05 0.006093145    4.96
## 2 Replicate:probability (Intercept) <NA> 1.362821e-06 0.001167399    0.18
## 3  Pedigree:probability (Intercept) <NA> 0.000000e+00 0.000000000    0.00
## 4     Range:probability (Intercept) <NA> 2.276986e-06 0.001508969    0.30
## 5           probability (Intercept) <NA> 6.429619e-04 0.025356693   85.87
## 6              Pedigree (Intercept) <NA> 1.546753e-05 0.003932878    2.07
## 7              Residual        <NA> <NA> 4.957899e-05 0.007041235    6.62
##          Rmse Heritability R_squared Vegetation.Index DAT
## 1 0.006565553        0.903 0.9337865              BCC 109
## 2 0.006565553        0.903 0.9337865              BCC 109
## 3 0.006565553        0.903 0.9337865              BCC 109
## 4 0.006565553        0.903 0.9337865              BCC 109
## 5 0.006565553        0.903 0.9337865              BCC 109
## 6 0.006565553        0.903 0.9337865              BCC 109
## 7 0.006565553        0.903 0.9337865              BCC 109

Save the ANOVA results

G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)

# View the first few rows of the results
head(G_df)

##              (Intercept) Pedigree
## 107_BCC.5001   0.3339126     5001
## 107_BCC.5002   0.3372073     5002
## 107_BCC.5003   0.3328357     5003
## 107_BCC.5004   0.3340158     5004
## 107_BCC.5005   0.3314073     5005
## 107_BCC.5006   0.3390244     5006

head(VC_df)

##                             grp        var1 var2         vcov        sdcor
## 107_BCC.1       Row:probability (Intercept) <NA> 2.627023e-05 0.0051254490
## 107_BCC.2 Replicate:probability (Intercept) <NA> 3.157604e-08 0.0001776965
## 107_BCC.3  Pedigree:probability (Intercept) <NA> 0.000000e+00 0.0000000000
## 107_BCC.4     Range:probability (Intercept) <NA> 1.980610e-06 0.0014073415
## 107_BCC.5           probability (Intercept) <NA> 6.350885e-04 0.0252009626
## 107_BCC.6              Pedigree (Intercept) <NA> 1.737181e-05 0.0041679508
##           Percent        Rmse Heritability R_squared Vegetation.Index DAT
## 107_BCC.1    3.61 0.006399464        0.919 0.9364011              BCC 107
## 107_BCC.2    0.00 0.006399464        0.919 0.9364011              BCC 107
## 107_BCC.3    0.00 0.006399464        0.919 0.9364011              BCC 107
## 107_BCC.4    0.27 0.006399464        0.919 0.9364011              BCC 107
## 107_BCC.5   87.36 0.006399464        0.919 0.9364011              BCC 107
## 107_BCC.6    2.39 0.006399464        0.919 0.9364011              BCC 107

# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)

colnames(G_df)[1] <- 'BLUP'
G_df$DAT_VI.Pedigree <- rownames(G_df)
G_df$VI.Pedigree <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$VI.Pedigree), '\\.'), '[', 1)

# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_DAT_Across_Quantile_Revised_Herit.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_DAT_Across_Quantile_Revised_Herit.csv'))

ANOVA Within DAT Across Quantile Levels

Aaron J. DeSalvio

2025-09-06

Analysis of Variance - AM5 & HM5

Loop to run ANOVA models with Genotype × Quantile interaction at all time points

Save the ANOVA results