library(lubridate)
library(lme4)
library(lmerTest)
library(ggplot2)
library(dplyr)
library(tidyr)
library(data.table)
The following ANOVA models are primarily aimed at quantifying the heritability of each VI and the genotypic variance for each VI at each time point (measured in days after transplanting).
# Set working directory and source external functions
data_path <- 'D:/Users/aaron.desalvio/My Drive/TAMU/Murray Lab/Writing/Manuscripts/2025_Distributional_Data/CS24_Cotton/0.0_Data/'
# Read data from CSV files
df <- fread(paste0(data_path, 'CS24-STEL-MULTI-RTK-VIs-FieldCoords.csv')) %>% as.data.frame() %>% arrange(DAT)
# Remove low-quality DATs
DATs_exclude <- c(46, 71:72, 99:100)
df <- df %>% filter(!DAT %in% DATs_exclude)
# Remove plant that died early in the season
df <- df %>% filter(!Row_Range == '413_15')
# Basic plot for inspection of data
sort(unique(df$DAT))
## [1] 35 36 37 38 39 40 41 42 43 44 45 47 48 49 50 51 52 53
## [19] 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 73
## [37] 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
## [55] 92 93 94 95 96 97 98 101 102 103 104 105 106 107 108 109 110 111
## [73] 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
## [91] 130 131 132 133 134 135 136 137 138 139 140
df.plot <- df %>% filter(Pedigree == 5001)
plot(df.plot$DAT, df.plot$NDVI_mean, ylim = c(0.0, 1.0))
# Vector containing DATs to loop through
dat.loop <- sort(unique(df$DAT))
# Vector for inner loop containing VI names
VI.Names <- sort(unique(colnames(df)[19:ncol(df)]))
# For the purposes of this document, we'll only perform one iteration of the loop.
# Comment out the following line if you'd like to run the entire loop
VI.Names <- 'NGRDI_median'
# Initialize empty lists to store results
G_list <- list()
VC_list <- list()
# Loop over each environment
for (dat in dat.loop) {
# Subset data for the current DAT
df.subset <- df[df$DAT == dat, ]
# Convert variables to appropriate data types
df.subset$Range <- as.factor(df.subset$Range)
df.subset$Row <- as.factor(df.subset$Row)
df.subset$Replicate <- as.factor(df.subset$Replicate)
df.subset$Pedigree <- as.factor(df.subset$Pedigree)
for (vi.name in VI.Names) {
# Progress report
print(paste('ANOVA Model:', 'DAT', dat, 'VI', vi.name))
# Fit the linear mixed-effects model after setting "Inf" and NaN as NA
response <- df.subset[,vi.name]
response[is.na(response) | response == 'Inf' | is.nan(response) | response == '-Inf'] <- NA
anova <- lmer(response ~
(1 | Pedigree) +
(1 | Range) +
(1 | Row) +
(1 | Replicate),
data = df.subset)
# Calculate RMSE
rmse_value <- sqrt(mean(residuals(anova)^2))
# Calculate R-squared
R <- MuMIn::r.squaredGLMM(anova)[, "R2c"] # Conditional R-squared
# Extract variance components
VC <- as.data.frame(VarCorr(anova))
VC$Percent <- round(VC$vcov / sum(VC$vcov) * 100, 2)
# Calculate heritability
nRep <- length(unique(df.subset$Replicate))
Vg <- VC[VC$grp == "Pedigree", "vcov"]
Ve <- VC[VC$grp == "Residual", "vcov"]
heritability <- Vg / (Vg + (Ve / nRep))
heritability <- round(heritability, 3)
# Add additional information to VC data frame
VC$Rmse <- rmse_value
VC$Heritability <- heritability
VC$R_squared <- R
VC$DAT <- dat
VC$Vegetation.Index <- vi.name
# Store VC in the list
VC_list[[paste(dat, vi.name, sep = '_')]] <- VC
# Extract random effects for 'Pedigree'
G <- coef(anova)$Pedigree
G$Pedigree <- rownames(G)
G$DAT <- dat
# Store G in the list
G_list[[paste(dat, vi.name, sep = '_')]] <- G
}
}
## [1] "ANOVA Model: DAT 35 VI NGRDI_median"
## [1] "ANOVA Model: DAT 36 VI NGRDI_median"
## [1] "ANOVA Model: DAT 37 VI NGRDI_median"
## [1] "ANOVA Model: DAT 38 VI NGRDI_median"
## [1] "ANOVA Model: DAT 39 VI NGRDI_median"
## [1] "ANOVA Model: DAT 40 VI NGRDI_median"
## [1] "ANOVA Model: DAT 41 VI NGRDI_median"
## [1] "ANOVA Model: DAT 42 VI NGRDI_median"
## [1] "ANOVA Model: DAT 43 VI NGRDI_median"
## [1] "ANOVA Model: DAT 44 VI NGRDI_median"
## [1] "ANOVA Model: DAT 45 VI NGRDI_median"
## [1] "ANOVA Model: DAT 47 VI NGRDI_median"
## [1] "ANOVA Model: DAT 48 VI NGRDI_median"
## [1] "ANOVA Model: DAT 49 VI NGRDI_median"
## [1] "ANOVA Model: DAT 50 VI NGRDI_median"
## [1] "ANOVA Model: DAT 51 VI NGRDI_median"
## [1] "ANOVA Model: DAT 52 VI NGRDI_median"
## [1] "ANOVA Model: DAT 53 VI NGRDI_median"
## [1] "ANOVA Model: DAT 54 VI NGRDI_median"
## [1] "ANOVA Model: DAT 55 VI NGRDI_median"
## [1] "ANOVA Model: DAT 56 VI NGRDI_median"
## [1] "ANOVA Model: DAT 57 VI NGRDI_median"
## [1] "ANOVA Model: DAT 58 VI NGRDI_median"
## [1] "ANOVA Model: DAT 59 VI NGRDI_median"
## [1] "ANOVA Model: DAT 60 VI NGRDI_median"
## [1] "ANOVA Model: DAT 61 VI NGRDI_median"
## [1] "ANOVA Model: DAT 62 VI NGRDI_median"
## [1] "ANOVA Model: DAT 63 VI NGRDI_median"
## [1] "ANOVA Model: DAT 64 VI NGRDI_median"
## [1] "ANOVA Model: DAT 65 VI NGRDI_median"
## [1] "ANOVA Model: DAT 66 VI NGRDI_median"
## [1] "ANOVA Model: DAT 67 VI NGRDI_median"
## [1] "ANOVA Model: DAT 68 VI NGRDI_median"
## [1] "ANOVA Model: DAT 69 VI NGRDI_median"
## [1] "ANOVA Model: DAT 70 VI NGRDI_median"
## [1] "ANOVA Model: DAT 73 VI NGRDI_median"
## [1] "ANOVA Model: DAT 74 VI NGRDI_median"
## [1] "ANOVA Model: DAT 75 VI NGRDI_median"
## [1] "ANOVA Model: DAT 76 VI NGRDI_median"
## [1] "ANOVA Model: DAT 77 VI NGRDI_median"
## [1] "ANOVA Model: DAT 78 VI NGRDI_median"
## [1] "ANOVA Model: DAT 79 VI NGRDI_median"
## [1] "ANOVA Model: DAT 80 VI NGRDI_median"
## [1] "ANOVA Model: DAT 81 VI NGRDI_median"
## [1] "ANOVA Model: DAT 82 VI NGRDI_median"
## [1] "ANOVA Model: DAT 83 VI NGRDI_median"
## [1] "ANOVA Model: DAT 84 VI NGRDI_median"
## [1] "ANOVA Model: DAT 85 VI NGRDI_median"
## [1] "ANOVA Model: DAT 86 VI NGRDI_median"
## [1] "ANOVA Model: DAT 87 VI NGRDI_median"
## [1] "ANOVA Model: DAT 88 VI NGRDI_median"
## [1] "ANOVA Model: DAT 89 VI NGRDI_median"
## [1] "ANOVA Model: DAT 90 VI NGRDI_median"
## [1] "ANOVA Model: DAT 91 VI NGRDI_median"
## [1] "ANOVA Model: DAT 92 VI NGRDI_median"
## [1] "ANOVA Model: DAT 93 VI NGRDI_median"
## [1] "ANOVA Model: DAT 94 VI NGRDI_median"
## [1] "ANOVA Model: DAT 95 VI NGRDI_median"
## [1] "ANOVA Model: DAT 96 VI NGRDI_median"
## [1] "ANOVA Model: DAT 97 VI NGRDI_median"
## [1] "ANOVA Model: DAT 98 VI NGRDI_median"
## [1] "ANOVA Model: DAT 101 VI NGRDI_median"
## [1] "ANOVA Model: DAT 102 VI NGRDI_median"
## [1] "ANOVA Model: DAT 103 VI NGRDI_median"
## [1] "ANOVA Model: DAT 104 VI NGRDI_median"
## [1] "ANOVA Model: DAT 105 VI NGRDI_median"
## [1] "ANOVA Model: DAT 106 VI NGRDI_median"
## [1] "ANOVA Model: DAT 107 VI NGRDI_median"
## [1] "ANOVA Model: DAT 108 VI NGRDI_median"
## [1] "ANOVA Model: DAT 109 VI NGRDI_median"
## [1] "ANOVA Model: DAT 110 VI NGRDI_median"
## [1] "ANOVA Model: DAT 111 VI NGRDI_median"
## [1] "ANOVA Model: DAT 112 VI NGRDI_median"
## [1] "ANOVA Model: DAT 113 VI NGRDI_median"
## [1] "ANOVA Model: DAT 114 VI NGRDI_median"
## [1] "ANOVA Model: DAT 115 VI NGRDI_median"
## [1] "ANOVA Model: DAT 116 VI NGRDI_median"
## [1] "ANOVA Model: DAT 117 VI NGRDI_median"
## [1] "ANOVA Model: DAT 118 VI NGRDI_median"
## [1] "ANOVA Model: DAT 119 VI NGRDI_median"
## [1] "ANOVA Model: DAT 120 VI NGRDI_median"
## [1] "ANOVA Model: DAT 121 VI NGRDI_median"
## [1] "ANOVA Model: DAT 122 VI NGRDI_median"
## [1] "ANOVA Model: DAT 123 VI NGRDI_median"
## [1] "ANOVA Model: DAT 124 VI NGRDI_median"
## [1] "ANOVA Model: DAT 125 VI NGRDI_median"
## [1] "ANOVA Model: DAT 126 VI NGRDI_median"
## [1] "ANOVA Model: DAT 127 VI NGRDI_median"
## [1] "ANOVA Model: DAT 128 VI NGRDI_median"
## [1] "ANOVA Model: DAT 129 VI NGRDI_median"
## [1] "ANOVA Model: DAT 130 VI NGRDI_median"
## [1] "ANOVA Model: DAT 131 VI NGRDI_median"
## [1] "ANOVA Model: DAT 132 VI NGRDI_median"
## [1] "ANOVA Model: DAT 133 VI NGRDI_median"
## [1] "ANOVA Model: DAT 134 VI NGRDI_median"
## [1] "ANOVA Model: DAT 135 VI NGRDI_median"
## [1] "ANOVA Model: DAT 136 VI NGRDI_median"
## [1] "ANOVA Model: DAT 137 VI NGRDI_median"
## [1] "ANOVA Model: DAT 138 VI NGRDI_median"
## [1] "ANOVA Model: DAT 139 VI NGRDI_median"
## [1] "ANOVA Model: DAT 140 VI NGRDI_median"
# Combine the lists into data frames
G_df <- do.call(rbind, G_list)
VC_df <- do.call(rbind, VC_list)
# View the first few rows of the results
head(G_df)
## (Intercept) Pedigree DAT
## 35_NGRDI_median.5001 0.1702508 5001 35
## 35_NGRDI_median.5002 0.1720844 5002 35
## 35_NGRDI_median.5003 0.1716521 5003 35
## 35_NGRDI_median.5004 0.1665713 5004 35
## 35_NGRDI_median.5005 0.1678183 5005 35
## 35_NGRDI_median.5006 0.1650658 5006 35
head(VC_df)
## grp var1 var2 vcov sdcor Percent
## 35_NGRDI_median.1 Row (Intercept) <NA> 3.827564e-04 0.019564161 36.25
## 35_NGRDI_median.2 Replicate (Intercept) <NA> 7.693568e-06 0.002773728 0.73
## 35_NGRDI_median.3 Pedigree (Intercept) <NA> 7.452550e-05 0.008632815 7.06
## 35_NGRDI_median.4 Range (Intercept) <NA> 5.305965e-06 0.002303468 0.50
## 35_NGRDI_median.5 Residual <NA> <NA> 5.854611e-04 0.024196304 55.45
## 36_NGRDI_median.1 Row (Intercept) <NA> 6.592677e-04 0.025676208 44.19
## Rmse Heritability R_squared DAT Vegetation.Index
## 35_NGRDI_median.1 0.02234519 0.792 0.4454509 35 NGRDI_median
## 35_NGRDI_median.2 0.02234519 0.792 0.4454509 35 NGRDI_median
## 35_NGRDI_median.3 0.02234519 0.792 0.4454509 35 NGRDI_median
## 35_NGRDI_median.4 0.02234519 0.792 0.4454509 35 NGRDI_median
## 35_NGRDI_median.5 0.02234519 0.792 0.4454509 35 NGRDI_median
## 36_NGRDI_median.1 0.02508136 0.756 0.5032436 36 NGRDI_median
# Data wrangling before exporting
VC_df_cols <- colnames(VC_df)
VC_df_cols <- VC_df_cols[!VC_df_cols %in% c('var1', 'var2')]
VC_df <- VC_df[,VC_df_cols]
VC_df$DAT_Prob_VI <- rownames(VC_df)
VC_df <- dplyr::select(VC_df, -c(DAT_Prob_VI))
colnames(G_df)[1] <- 'VI.BLUP'
G_df$DAT_VI.Pedigree <- rownames(G_df)
G_df$Vegetation.Index <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 2) %>% unlist()
G_df$Vegetation.Index.Type <- lapply(strsplit(as.character(G_df$DAT_VI.Pedigree), '_'), '[', 3) %>% unlist()
G_df$Type <- lapply(strsplit(as.character(G_df$Vegetation.Index.Type), '\\.'), '[', 1) %>% unlist()
G_df$Vegetation.Index <- paste(G_df$Vegetation.Index, G_df$Type, sep = '.')
G_df <- dplyr::select(G_df, -c(Vegetation.Index.Type, Type))
# Save the data frames
#fwrite(VC_df, paste0(data_path, 'VarComp_Within_DAT_Standard_VIs.csv'))
#fwrite(G_df, paste0(data_path, 'Quantile_BLUPs_Within_DAT_Standard_VIs.csv'))