library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
The following ANOVA models are primarily aimed at quantifying the degree of Genotype x Time interaction present in the vegetation indices. We’re using “standard” VIs, meaning only the median value is being tested.
# Set working directory and source external functions
# Replace this path with the actual path where you saved the repository's files
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
# Read data from CSV files
df <- data.table::fread(paste0(data_path, 'CS24-STEL-MULTI-RTK-VIs-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
# Remove low-quality DATs
DATs_exclude <- c(46, 71:72, 99:100)
df <- df %>% filter(!DAT %in% DATs_exclude)
# Remove plant that died early in the season
df <- df %>% filter(!Row_Range == '413_15')
# Basic plot for inspection of data
sort(unique(df$DAT))
## [1] 35 36 37 38 39 40 41 42 43 44 45 47 48 49 50 51 52 53
## [19] 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 73
## [37] 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
## [55] 92 93 94 95 96 97 98 101 102 103 104 105 106 107 108 109 110 111
## [73] 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
## [91] 130 131 132 133 134 135 136 137 138 139 140
df.plot <- df %>% filter(Pedigree == 5001)
plot(df.plot$DAT, df.plot$NDVI_mean, ylim = c(0.0, 1.0))
# Vector of VI names for loop
VI.Names <- sort(unique(colnames(df)[19:ncol(df)]))
# For the purposes of this document, we'll only perform one iteration of the loop.
# Comment out the following line if you'd like to run the entire loop
VI.Names <- 'NGRDI_median'
# Initialize empty lists to store results
G_list <- list()
VC_list <- list()
for (vi.name in VI.Names) {
# Subset data for the current VI
df.subset <- df %>% select(all_of(c('Range', 'Row', 'Replicate', 'Pedigree', 'DAT', vi.name)))
# Convert variables to appropriate data types
df.subset$Range <- as.factor(df.subset$Range)
df.subset$Row <- as.factor(df.subset$Row)
df.subset$Replicate <- as.factor(df.subset$Replicate)
df.subset$Pedigree <- as.factor(df.subset$Pedigree)
df.subset$DAT <- as.factor(df.subset$DAT)
# Progress report
print(paste('ANOVA Model:', vi.name))
# Fit the linear mixed-effects model after setting "Inf" and NaN as NA
response <- df.subset[,vi.name]
response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
anova <- lmer(response ~
(1 | Pedigree) +
(1 | DAT) +
(1 | Pedigree:DAT) +
(1 | Range:DAT) +
(1 | Row:DAT) +
(1 | Replicate:DAT),
data = df.subset)
# Calculate RMSE
rmse_value <- sqrt(mean(residuals(anova)^2))
# Calculate R-squared
R <- MuMIn::r.squaredGLMM(anova)[, "R2c"] # Conditional R-squared
# Extract variance components
VC <- as.data.frame(VarCorr(anova))
VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
# Calculate heritability
nRep <- length(unique(df.subset$Replicate))
nFlight <- length(unique(df.subset$DAT))
Vg <- VC[VC$grp == "Pedigree", "vcov"]
Vgf <- VC[VC$grp == "Pedigree:DAT", "vcov"]
Ve <- VC[VC$grp == "Residual", "vcov"]
#heritability <- Vg / (Vg + (Vgf / nFlight) + (Ve / (nRep * nFlight)))
#heritability <- round(heritability, 3)
heritability <- Vg / (Vg + (Vgf) + (Ve / (nRep)))
heritability <- round(heritability, 3)
# Add additional information to VC data frame
VC$Rmse <- rmse_value
VC$Heritability <- heritability
VC$R_squared <- R
VC$Vegetation.Index <- vi.name
print(VC)
# Store VC in the list
VC_list[[vi.name]] <- VC
# Extract random effects for 'Pedigree'
G <- coef(anova)$Pedigree
G$Pedigree <- rownames(G)
# Store G in the list
G_list[[vi.name]] <- G
}
## [1] "ANOVA Model: NGRDI_median"
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00357318 (tol = 0.002, component 1)
## grp var1 var2 vcov sdcor Percent Rmse
## 1 Row:DAT (Intercept) <NA> 4.839831e-04 0.021999617 9.53 0.02931909
## 2 Replicate:DAT (Intercept) <NA> 1.935293e-05 0.004399197 0.38 0.02931909
## 3 Pedigree:DAT (Intercept) <NA> 6.108809e-05 0.007815887 1.20 0.02931909
## 4 Range:DAT (Intercept) <NA> 4.429150e-05 0.006655186 0.87 0.02931909
## 5 DAT (Intercept) <NA> 3.290852e-03 0.057365946 64.81 0.02931909
## 6 Pedigree (Intercept) <NA> 1.785007e-04 0.013360414 3.52 0.02931909
## 7 Residual <NA> <NA> 9.995879e-04 0.031616261 19.69 0.02931909
## Heritability R_squared Vegetation.Index
## 1 0.654 0.8031399 NGRDI_median
## 2 0.654 0.8031399 NGRDI_median
## 3 0.654 0.8031399 NGRDI_median
## 4 0.654 0.8031399 NGRDI_median
## 5 0.654 0.8031399 NGRDI_median
## 6 0.654 0.8031399 NGRDI_median
## 7 0.654 0.8031399 NGRDI_median
# Combine the lists into data frames
G_df <- rbindlist(G_list, fill = T) %>% as.data.frame()
VC_df <- rbindlist(VC_list, fill = T) %>% as.data.frame()
# View the first few rows of the results
head(G_df)
## (Intercept) Pedigree
## 1 0.2495714 5001
## 2 0.2552577 5002
## 3 0.2531053 5003
## 4 0.2560163 5004
## 5 0.2573020 5005
## 6 0.2550449 5006
head(VC_df)
## grp var1 var2 vcov sdcor Percent Rmse
## 1 Row:DAT (Intercept) <NA> 4.839831e-04 0.021999617 9.53 0.02931909
## 2 Replicate:DAT (Intercept) <NA> 1.935293e-05 0.004399197 0.38 0.02931909
## 3 Pedigree:DAT (Intercept) <NA> 6.108809e-05 0.007815887 1.20 0.02931909
## 4 Range:DAT (Intercept) <NA> 4.429150e-05 0.006655186 0.87 0.02931909
## 5 DAT (Intercept) <NA> 3.290852e-03 0.057365946 64.81 0.02931909
## 6 Pedigree (Intercept) <NA> 1.785007e-04 0.013360414 3.52 0.02931909
## Heritability R_squared Vegetation.Index
## 1 0.654 0.8031399 NGRDI_median
## 2 0.654 0.8031399 NGRDI_median
## 3 0.654 0.8031399 NGRDI_median
## 4 0.654 0.8031399 NGRDI_median
## 5 0.654 0.8031399 NGRDI_median
## 6 0.654 0.8031399 NGRDI_median
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
colnames(G_df)[1] <- 'VI.BLUP'
VI_vec <- rep(VI.Names, each = length(unique(df.subset$Pedigree)))
length(VI_vec) == nrow(G_df)
## [1] TRUE
G_df$Vegetation.Index <- VI_vec
G_df$Vegetation.Index.Type <- lapply(strsplit(as.character(G_df$Vegetation.Index), '_'), '[', 2) %>% unlist()
# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Across_All_DATs_Standard_VIs_Revised_Herit.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_DAT_Standard_VIs_Revised_Herit.csv'))