library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)

Analysis of Variance - AM2 & HM2

The following ANOVA models are primarily aimed at quantifying the heritability of each VI and the genotypic variance for each VI at each time point (measured in days after transplanting).

# Set working directory and source external functions
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'

# Read data from CSV files
df <- fread(paste0(data_path, 'CS24-STEL-MULTI-RTK-VIs-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)

# Remove low-quality DATs
DATs_exclude <- c(46, 71:72, 99:100)
df <- df %>% filter(!DAT %in% DATs_exclude)

# Remove plant that died early in the season
df <- df %>% filter(!Row_Range == '413_15')

# Basic plot for inspection of data
sort(unique(df$DAT))
##   [1]  35  36  37  38  39  40  41  42  43  44  45  47  48  49  50  51  52  53
##  [19]  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  73
##  [37]  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91
##  [55]  92  93  94  95  96  97  98 101 102 103 104 105 106 107 108 109 110 111
##  [73] 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
##  [91] 130 131 132 133 134 135 136 137 138 139 140
df.plot <- df %>% filter(Pedigree == 5001)
plot(df.plot$DAT, df.plot$NDVI_mean, ylim = c(0.0, 1.0))

# Vector containing DATs to loop through
dat.loop <- sort(unique(df$DAT))

# Vector for inner loop containing VI names
VI.Names <- sort(unique(colnames(df)[19:ncol(df)]))

# For the purposes of this document, we'll only perform one iteration of the loop.
# Comment out the following line if you'd like to run the entire loop
VI.Names <- 'NGRDI_median'

Loop to run ANOVA models for each VI

# Initialize empty lists to store results
G_list <- list()
VC_list <- list()

# Loop over each environment
for (dat in dat.loop) {
  
  # Subset data for the current DAT
  df.subset <- df[df$DAT == dat, ]
  
  # Convert variables to appropriate data types
  df.subset$Range <- as.factor(df.subset$Range)
  df.subset$Row <- as.factor(df.subset$Row)
  df.subset$Replicate <- as.factor(df.subset$Replicate)
  df.subset$Pedigree <- as.factor(df.subset$Pedigree)
  
  for (vi.name in VI.Names) {
      
      # Progress report
      print(paste('ANOVA Model:', 'DAT', dat, 'VI', vi.name))
      
      # Fit the linear mixed-effects model after setting "Inf" and NaN as NA
      response <- df.subset[,vi.name]
      response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
      
      anova <- lmer(response ~
                      (1 | Pedigree) +
                      (1 | Range) +
                      (1 | Row) +
                      (1 | Replicate),
                    data = df.subset)
      
      # Calculate RMSE
      rmse_value <- sqrt(mean(residuals(anova)^2))
      
      # Calculate R-squared
      R <- MuMIn::r.squaredGLMM(anova)[, "R2c"]  # Conditional R-squared
      
      # Extract variance components
      VC <- as.data.frame(VarCorr(anova))
      VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
      
      # Calculate heritability
      nRep <- length(unique(df.subset$Replicate))
      Vg <- VC[VC$grp == "Pedigree", "vcov"]
      Ve <- VC[VC$grp == "Residual", "vcov"]
      heritability <- Vg / (Vg + (Ve / nRep))
      heritability <- round(heritability, 3)
      
      # Add additional information to VC data frame
      VC$Rmse <- rmse_value
      VC$Heritability <- heritability
      VC$R_squared <- R
      VC$DAT <- dat
      VC$Vegetation.Index <- vi.name
      
      # Store VC in the list
      VC_list[[paste(dat, vi.name, sep = '_')]] <- VC
      
      # Extract random effects for 'Pedigree'
      G <- coef(anova)$Pedigree
      G$Pedigree <- rownames(G)
      G$DAT <- dat
      
      # Store G in the list
      G_list[[paste(dat, vi.name, sep = '_')]] <- G
      
  }
}
## [1] "ANOVA Model: DAT 35 VI NGRDI_median"
## [1] "ANOVA Model: DAT 36 VI NGRDI_median"
## [1] "ANOVA Model: DAT 37 VI NGRDI_median"
## [1] "ANOVA Model: DAT 38 VI NGRDI_median"
## [1] "ANOVA Model: DAT 39 VI NGRDI_median"
## [1] "ANOVA Model: DAT 40 VI NGRDI_median"
## [1] "ANOVA Model: DAT 41 VI NGRDI_median"
## [1] "ANOVA Model: DAT 42 VI NGRDI_median"
## [1] "ANOVA Model: DAT 43 VI NGRDI_median"
## [1] "ANOVA Model: DAT 44 VI NGRDI_median"
## [1] "ANOVA Model: DAT 45 VI NGRDI_median"
## [1] "ANOVA Model: DAT 47 VI NGRDI_median"
## [1] "ANOVA Model: DAT 48 VI NGRDI_median"
## [1] "ANOVA Model: DAT 49 VI NGRDI_median"
## [1] "ANOVA Model: DAT 50 VI NGRDI_median"
## [1] "ANOVA Model: DAT 51 VI NGRDI_median"
## [1] "ANOVA Model: DAT 52 VI NGRDI_median"
## [1] "ANOVA Model: DAT 53 VI NGRDI_median"
## [1] "ANOVA Model: DAT 54 VI NGRDI_median"
## [1] "ANOVA Model: DAT 55 VI NGRDI_median"
## [1] "ANOVA Model: DAT 56 VI NGRDI_median"
## [1] "ANOVA Model: DAT 57 VI NGRDI_median"
## [1] "ANOVA Model: DAT 58 VI NGRDI_median"
## [1] "ANOVA Model: DAT 59 VI NGRDI_median"
## [1] "ANOVA Model: DAT 60 VI NGRDI_median"
## [1] "ANOVA Model: DAT 61 VI NGRDI_median"
## [1] "ANOVA Model: DAT 62 VI NGRDI_median"
## [1] "ANOVA Model: DAT 63 VI NGRDI_median"
## [1] "ANOVA Model: DAT 64 VI NGRDI_median"
## [1] "ANOVA Model: DAT 65 VI NGRDI_median"
## [1] "ANOVA Model: DAT 66 VI NGRDI_median"
## [1] "ANOVA Model: DAT 67 VI NGRDI_median"
## [1] "ANOVA Model: DAT 68 VI NGRDI_median"
## [1] "ANOVA Model: DAT 69 VI NGRDI_median"
## [1] "ANOVA Model: DAT 70 VI NGRDI_median"
## [1] "ANOVA Model: DAT 73 VI NGRDI_median"
## [1] "ANOVA Model: DAT 74 VI NGRDI_median"
## [1] "ANOVA Model: DAT 75 VI NGRDI_median"
## [1] "ANOVA Model: DAT 76 VI NGRDI_median"
## [1] "ANOVA Model: DAT 77 VI NGRDI_median"
## [1] "ANOVA Model: DAT 78 VI NGRDI_median"
## [1] "ANOVA Model: DAT 79 VI NGRDI_median"
## [1] "ANOVA Model: DAT 80 VI NGRDI_median"
## [1] "ANOVA Model: DAT 81 VI NGRDI_median"
## [1] "ANOVA Model: DAT 82 VI NGRDI_median"
## [1] "ANOVA Model: DAT 83 VI NGRDI_median"
## [1] "ANOVA Model: DAT 84 VI NGRDI_median"
## [1] "ANOVA Model: DAT 85 VI NGRDI_median"
## [1] "ANOVA Model: DAT 86 VI NGRDI_median"
## [1] "ANOVA Model: DAT 87 VI NGRDI_median"
## [1] "ANOVA Model: DAT 88 VI NGRDI_median"
## [1] "ANOVA Model: DAT 89 VI NGRDI_median"
## [1] "ANOVA Model: DAT 90 VI NGRDI_median"
## [1] "ANOVA Model: DAT 91 VI NGRDI_median"
## [1] "ANOVA Model: DAT 92 VI NGRDI_median"
## [1] "ANOVA Model: DAT 93 VI NGRDI_median"
## [1] "ANOVA Model: DAT 94 VI NGRDI_median"
## [1] "ANOVA Model: DAT 95 VI NGRDI_median"
## [1] "ANOVA Model: DAT 96 VI NGRDI_median"
## [1] "ANOVA Model: DAT 97 VI NGRDI_median"
## [1] "ANOVA Model: DAT 98 VI NGRDI_median"
## [1] "ANOVA Model: DAT 101 VI NGRDI_median"
## [1] "ANOVA Model: DAT 102 VI NGRDI_median"
## [1] "ANOVA Model: DAT 103 VI NGRDI_median"
## [1] "ANOVA Model: DAT 104 VI NGRDI_median"
## [1] "ANOVA Model: DAT 105 VI NGRDI_median"
## [1] "ANOVA Model: DAT 106 VI NGRDI_median"
## [1] "ANOVA Model: DAT 107 VI NGRDI_median"
## [1] "ANOVA Model: DAT 108 VI NGRDI_median"
## [1] "ANOVA Model: DAT 109 VI NGRDI_median"
## [1] "ANOVA Model: DAT 110 VI NGRDI_median"
## [1] "ANOVA Model: DAT 111 VI NGRDI_median"
## [1] "ANOVA Model: DAT 112 VI NGRDI_median"
## [1] "ANOVA Model: DAT 113 VI NGRDI_median"
## [1] "ANOVA Model: DAT 114 VI NGRDI_median"
## [1] "ANOVA Model: DAT 115 VI NGRDI_median"
## [1] "ANOVA Model: DAT 116 VI NGRDI_median"
## [1] "ANOVA Model: DAT 117 VI NGRDI_median"
## [1] "ANOVA Model: DAT 118 VI NGRDI_median"
## [1] "ANOVA Model: DAT 119 VI NGRDI_median"
## [1] "ANOVA Model: DAT 120 VI NGRDI_median"
## [1] "ANOVA Model: DAT 121 VI NGRDI_median"
## [1] "ANOVA Model: DAT 122 VI NGRDI_median"
## [1] "ANOVA Model: DAT 123 VI NGRDI_median"
## [1] "ANOVA Model: DAT 124 VI NGRDI_median"
## [1] "ANOVA Model: DAT 125 VI NGRDI_median"
## [1] "ANOVA Model: DAT 126 VI NGRDI_median"
## [1] "ANOVA Model: DAT 127 VI NGRDI_median"
## [1] "ANOVA Model: DAT 128 VI NGRDI_median"
## [1] "ANOVA Model: DAT 129 VI NGRDI_median"
## [1] "ANOVA Model: DAT 130 VI NGRDI_median"
## [1] "ANOVA Model: DAT 131 VI NGRDI_median"
## [1] "ANOVA Model: DAT 132 VI NGRDI_median"
## [1] "ANOVA Model: DAT 133 VI NGRDI_median"
## [1] "ANOVA Model: DAT 134 VI NGRDI_median"
## [1] "ANOVA Model: DAT 135 VI NGRDI_median"
## [1] "ANOVA Model: DAT 136 VI NGRDI_median"
## [1] "ANOVA Model: DAT 137 VI NGRDI_median"
## [1] "ANOVA Model: DAT 138 VI NGRDI_median"
## [1] "ANOVA Model: DAT 139 VI NGRDI_median"
## [1] "ANOVA Model: DAT 140 VI NGRDI_median"

Save the ANOVA results

# Combine the lists into data frames
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)

# View the first few rows of the results
head(G_df)
##                      (Intercept) Pedigree DAT
## 35_NGRDI_median.5001   0.1702508     5001  35
## 35_NGRDI_median.5002   0.1720844     5002  35
## 35_NGRDI_median.5003   0.1716521     5003  35
## 35_NGRDI_median.5004   0.1665713     5004  35
## 35_NGRDI_median.5005   0.1678183     5005  35
## 35_NGRDI_median.5006   0.1650658     5006  35
head(VC_df)
##                         grp        var1 var2         vcov       sdcor Percent
## 35_NGRDI_median.1       Row (Intercept) <NA> 3.827564e-04 0.019564161   36.25
## 35_NGRDI_median.2 Replicate (Intercept) <NA> 7.693568e-06 0.002773728    0.73
## 35_NGRDI_median.3  Pedigree (Intercept) <NA> 7.452550e-05 0.008632815    7.06
## 35_NGRDI_median.4     Range (Intercept) <NA> 5.305965e-06 0.002303468    0.50
## 35_NGRDI_median.5  Residual        <NA> <NA> 5.854611e-04 0.024196304   55.45
## 36_NGRDI_median.1       Row (Intercept) <NA> 6.592677e-04 0.025676208   44.19
##                         Rmse Heritability R_squared DAT Vegetation.Index
## 35_NGRDI_median.1 0.02234519        0.792 0.4454509  35     NGRDI_median
## 35_NGRDI_median.2 0.02234519        0.792 0.4454509  35     NGRDI_median
## 35_NGRDI_median.3 0.02234519        0.792 0.4454509  35     NGRDI_median
## 35_NGRDI_median.4 0.02234519        0.792 0.4454509  35     NGRDI_median
## 35_NGRDI_median.5 0.02234519        0.792 0.4454509  35     NGRDI_median
## 36_NGRDI_median.1 0.02508136        0.756 0.5032436  36     NGRDI_median
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)
VC_df <- dplyr::select(VC_df, -c(DAT_Prob_VI))

colnames(G_df)[1] <- 'VI.BLUP'
G_df$DAT_VI.Pedigree <- rownames(G_df)
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Vegetation.Index.Type <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 3) %>% unlist()
G_df$Type <- lapply(strsplit(as.character(G_df$Vegetation.Index.Type), '\\.'), '[', 1) %>% unlist()
G_df$Vegetation.Index <- paste(G_df$Vegetation.Index, G_df$Type, sep = '.')
G_df <- dplyr::select(G_df, -c(Vegetation.Index.Type, Type))

# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_DAT_Standard_VIs.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_DAT_Standard_VIs.csv'))