Descriptive analysis of wheat dataset

A desciptive summary of genomic relationships in wheat dataset

Published

February 29, 2024

All Z and G matrices output are based on filtered genotype and phenotype data containing no duplicates

Raw phenotypes file

raw_pheno <- read_xlsx("Raw_phenotypes.xlsx")
dim(raw_pheno)
[1] 11424    19
raw_pheno[1:5, 1:5]
# A tibble: 5 × 5
  Genotype   Range   Row Location    Year
  <chr>      <dbl> <dbl> <chr>      <dbl>
1 AYT-O-1848     1     1 Roseworthy  2014
2 AYT-O-0974     1     2 Roseworthy  2014
3 AYT-S-0579     1     3 Roseworthy  2014
4 AYT-S-0299     1     4 Roseworthy  2014
5 AYT-S-0664     1     5 Roseworthy  2014

Ajusted phenotypes file

adj_pheno <- read.csv("adj_phenotypes.csv")
adj_pheno <- adj_pheno[,-c(8:20)]
dim(adj_pheno)
[1] 11424    20
adj_pheno[1:5, 1:5]
  X         ID Range Row   Location
1 1 AYT-O-1848     1   1 Roseworthy
2 2 AYT-O-0974     1   2 Roseworthy
3 3 AYT-S-0579     1   3 Roseworthy
4 4 AYT-S-0299     1   4 Roseworthy
5 5 AYT-S-0664     1   5 Roseworthy
#Unique phenotype
unique_pheno <- adj_pheno[!duplicated(adj_pheno$ID), ]
dim(unique_pheno)
[1] 10375    20
summary(unique_pheno)
       X              ID                Range            Row       
 Min.   :    1   Length:10375       Min.   : 1.00   Min.   :  1.0  
 1st Qu.: 2846   Class :character   1st Qu.: 6.00   1st Qu.:119.0  
 Median : 5702   Mode  :character   Median :12.00   Median :238.0  
 Mean   : 5705                      Mean   :12.48   Mean   :238.3  
 3rd Qu.: 8562                      3rd Qu.:18.00   3rd Qu.:357.0  
 Max.   :11423                      Max.   :24.00   Max.   :476.0  
                                                                   
   Location              Year       GrowthHabit       adj_NDVI        
 Length:10375       Min.   :2014   Min.   :1.000   Min.   :-0.495391  
 Class :character   1st Qu.:2014   1st Qu.:2.000   1st Qu.:-0.048276  
 Mode  :character   Median :2014   Median :2.000   Median : 0.004227  
                    Mean   :2014   Mean   :2.365   Mean   :-0.000125  
                    3rd Qu.:2014   3rd Qu.:3.000   3rd Qu.: 0.050554  
                    Max.   :2014   Max.   :8.000   Max.   : 0.266729  
                                   NA's   :9       NA's   :14         
 adj_PlantHeight      adj_LeafLoss       adj_Glaucousness   
 Min.   :-4.064010   Min.   :-4.538660   Min.   :-4.027074  
 1st Qu.:-0.627596   1st Qu.:-1.150594   1st Qu.:-1.485844  
 Median :-0.026288   Median :-0.113439   Median :-0.373189  
 Mean   :-0.000589   Mean   : 0.000217   Mean   : 0.001182  
 3rd Qu.: 0.727739   3rd Qu.: 1.175993   3rd Qu.: 1.251936  
 Max.   : 3.513630   Max.   : 4.686780   Max.   : 6.144128  
 NA's   :8           NA's   :8           NA's   :8          
 adj_Greenness       adj_LeafWidth        adj_Biomass        adj_Yellows       
 Min.   :-4.768577   Min.   :-3.942787   Min.   :-4.87902   Min.   :-1.417275  
 1st Qu.:-1.097327   1st Qu.:-0.949922   1st Qu.:-0.69445   1st Qu.:-0.605954  
 Median : 0.084101   Median : 0.018754   Median : 0.04168   Median :-0.402283  
 Mean   : 0.000695   Mean   : 0.000468   Mean   :-0.00068   Mean   : 0.000276  
 3rd Qu.: 1.049599   3rd Qu.: 0.950542   3rd Qu.: 0.72300   3rd Qu.: 0.415370  
 Max.   : 3.755015   Max.   : 3.388017   Max.   : 3.33733   Max.   : 7.033529  
 NA's   :8           NA's   :8           NA's   :115        NA's   :8          
  adj_Maturity        adj_GrainYield      adj_TestWeight    
 Min.   :-22.252293   Min.   :-2643.450   Min.   :-10.3673  
 1st Qu.: -4.352765   1st Qu.: -365.178   1st Qu.: -0.9844  
 Median :  0.584405   Median :   27.237   Median :  0.1853  
 Mean   : -0.001202   Mean   :    0.168   Mean   : -0.0002  
 3rd Qu.:  4.641852   3rd Qu.:  389.405   3rd Qu.:  1.1655  
 Max.   : 15.284852   Max.   : 2080.314   Max.   :  4.9427  
 NA's   :8            NA's   :131         NA's   :34        
 adj_GrainSizeTKW    adj_GrainProtein  
 Min.   :-14.64684   Min.   :-2.85376  
 1st Qu.: -2.92215   1st Qu.:-0.51878  
 Median : -0.13660   Median :-0.02669  
 Mean   :  0.00064   Mean   : 0.00060  
 3rd Qu.:  2.79050   3rd Qu.: 0.49063  
 Max.   : 20.14239   Max.   : 3.73942  
 NA's   :35          NA's   :80        

Quality control

  • Phenotypes with duplicates
Pheno_dup <- adj_pheno|>
  group_by(ID) |>
  summarise(count = n()) |>
  filter((count >1)) #adj_phenotypes w/ dupllicates
Pheno_dup
# A tibble: 8 × 2
  ID         count
  <chr>      <int>
1 AYT-O-1994   150
2 AYT-O-2008   150
3 AYT-O-2021   150
4 AYT-O-2031   150
5 AYT-O-2033   150
6 AYT-O-2048   150
7 AYT-O-2052     7
8 AYT-O-2062   150
#Check duplicates without NAs
#Outputs implies NA counts
check <- list()
for(i in 1:nrow(Pheno_dup)){
  ch <- adj_pheno[adj_pheno$ID == Pheno_dup$ID[i], 8]
  check[[i]] <- sum(is.na(ch))
}
check 
[[1]]
[1] 150

[[2]]
[1] 150

[[3]]
[1] 150

[[4]]
[1] 150

[[5]]
[1] 150

[[6]]
[1] 150

[[7]]
[1] 7

[[8]]
[1] 150
#Phenotype without IDs in Pheno_dup(ID==NAs)
y_check <- adj_pheno[!(adj_pheno$ID%in%Pheno_dup$ID),]
dim(y_check)
[1] 10367    20
summary(y_check)
       X              ID                Range            Row       
 Min.   :    1   Length:10367       Min.   : 1.00   Min.   :  1.0  
 1st Qu.: 2854   Class :character   1st Qu.: 6.00   1st Qu.:120.0  
 Median : 5707   Mode  :character   Median :12.00   Median :238.0  
 Mean   : 5709                      Mean   :12.49   Mean   :238.5  
 3rd Qu.: 8564                      3rd Qu.:18.00   3rd Qu.:357.0  
 Max.   :11423                      Max.   :24.00   Max.   :476.0  
                                                                   
   Location              Year       GrowthHabit       adj_NDVI        
 Length:10367       Min.   :2014   Min.   :1.000   Min.   :-0.495391  
 Class :character   1st Qu.:2014   1st Qu.:2.000   1st Qu.:-0.048276  
 Mode  :character   Median :2014   Median :2.000   Median : 0.004227  
                    Mean   :2014   Mean   :2.365   Mean   :-0.000125  
                    3rd Qu.:2014   3rd Qu.:3.000   3rd Qu.: 0.050554  
                    Max.   :2014   Max.   :8.000   Max.   : 0.266729  
                                   NA's   :9       NA's   :6          
 adj_PlantHeight      adj_LeafLoss       adj_Glaucousness   
 Min.   :-4.064010   Min.   :-4.538660   Min.   :-4.027074  
 1st Qu.:-0.627596   1st Qu.:-1.150594   1st Qu.:-1.485844  
 Median :-0.026288   Median :-0.113439   Median :-0.373189  
 Mean   :-0.000589   Mean   : 0.000217   Mean   : 0.001182  
 3rd Qu.: 0.727739   3rd Qu.: 1.175993   3rd Qu.: 1.251936  
 Max.   : 3.513630   Max.   : 4.686780   Max.   : 6.144128  
                                                            
 adj_Greenness       adj_LeafWidth        adj_Biomass        adj_Yellows       
 Min.   :-4.768577   Min.   :-3.942787   Min.   :-4.87902   Min.   :-1.417275  
 1st Qu.:-1.097327   1st Qu.:-0.949922   1st Qu.:-0.69445   1st Qu.:-0.605954  
 Median : 0.084101   Median : 0.018754   Median : 0.04168   Median :-0.402283  
 Mean   : 0.000695   Mean   : 0.000468   Mean   :-0.00068   Mean   : 0.000276  
 3rd Qu.: 1.049599   3rd Qu.: 0.950542   3rd Qu.: 0.72300   3rd Qu.: 0.415370  
 Max.   : 3.755015   Max.   : 3.388017   Max.   : 3.33733   Max.   : 7.033529  
                                         NA's   :107                           
  adj_Maturity        adj_GrainYield      adj_TestWeight      
 Min.   :-22.252293   Min.   :-2643.450   Min.   :-10.367262  
 1st Qu.: -4.352765   1st Qu.: -365.178   1st Qu.: -0.984365  
 Median :  0.584405   Median :   27.237   Median :  0.185297  
 Mean   : -0.001202   Mean   :    0.168   Mean   : -0.000197  
 3rd Qu.:  4.641852   3rd Qu.:  389.405   3rd Qu.:  1.165534  
 Max.   : 15.284852   Max.   : 2080.314   Max.   :  4.942738  
                      NA's   :123         NA's   :26          
 adj_GrainSizeTKW     adj_GrainProtein  
 Min.   :-14.646842   Min.   :-2.85376  
 1st Qu.: -2.922147   1st Qu.:-0.51878  
 Median : -0.136602   Median :-0.02669  
 Mean   :  0.000636   Mean   : 0.00060  
 3rd Qu.:  2.790503   3rd Qu.: 0.49063  
 Max.   : 20.142392   Max.   : 3.73942  
 NA's   :27           NA's   :72        

Genotypes file

R_file <- readRDS("Genotypes.rds")
R_file[1:5, 1:5] #non-standardized markers
           AXAFFSNP-00001 AXAFFSNP-00002 AXAFFSNP-00003 AXAFFSNP-00004
AYT-O-1848              1             -1             -1              1
AYT-O-0974             -1             -1             -1              1
AYT-S-0579              1             -1             -1              1
AYT-S-0299              1             -1             -1              1
AYT-S-0664              1             -1             -1              1
           AXAFFSNP-00005
AYT-O-1848              1
AYT-O-0974              1
AYT-S-0579              1
AYT-S-0299              1
AYT-S-0664              1

Compare variables between phenotypes and genomic marker

length(unique(adj_pheno$ID))
[1] 10375
all(rownames(R_file)%in%adj_pheno$ID)
[1] TRUE
all(unique(adj_pheno$ID)%in%rownames(R_file))
[1] TRUE
table(table(adj_pheno$ID))

    1     7   150 
10367     1     7 
table(table(rownames(R_file)))

    1 
10375 
#Genotype with phenotypes that contain some variables
Geno_n_pheo <- R_file[(rownames(R_file)%in%y_check$ID),]
dim(Geno_n_pheo)
[1] 10367 17181

PCA

load("PC.RData")
dim(PC)      
[1] 10375    10
head(PC)
                    [,1]          [,2]         [,3]         [,4]          [,5]
AYT-O-1848  0.0142843657  0.0008520041  0.004599534  0.006290104  0.0040835958
AYT-O-0974 -0.0020205548  0.0039306500 -0.019053867  0.005907936 -0.0052171607
AYT-S-0579 -0.0123209799 -0.0113984018  0.006133443 -0.001420917  0.0095267938
AYT-S-0299 -0.0009560686  0.0020915326 -0.007918294 -0.005763531 -0.0084143525
AYT-S-0664  0.0166148917 -0.0072749232  0.004678868  0.004706790 -0.0006557087
AYT-S-0304  0.0105904204 -0.0009692425 -0.002309381 -0.026689162 -0.0093229600
                    [,6]         [,7]          [,8]          [,9]        [,10]
AYT-O-1848  0.0025967631 -0.003812178  0.0009275762 -0.0003712614 -0.002473555
AYT-O-0974 -0.0119637740 -0.012642017 -0.0222248123 -0.0118447073  0.018452294
AYT-S-0579 -0.0009197395 -0.006211754  0.0099594666  0.0046946866  0.006762153
AYT-S-0299 -0.0147098250  0.001235230  0.0036071933  0.0063621112 -0.002532126
AYT-S-0664  0.0075995266 -0.001902767 -0.0029518912  0.0021473800  0.001026518
AYT-S-0304  0.0110086302  0.003983154 -0.0049505648 -0.0035896452 -0.003298428
set.seed(234)
clust <- kmeans(PC[,1:5], nstart = 10, centers = 8)
plot(PC[, 1:2], cex = 0.5, col = clust$cluster)

plot(PC[, c(1,3)], cex = 0.5, col = clust$cluster)

table(clust$cluster)

   1    2    3    4    5    6    7    8 
1140 1156  949 1298 2437 1250 1049 1096 
length(clust$cluster)
[1] 10375
Code
R_file <- Geno_n_pheo
cst <- clust$cluster[(names(clust$cluster)%in%rownames(R_file))]
length(cst)
[1] 10367
Code
#Check that names in cluster are equal to rownames in Genotype file
sum(names(cst) == rownames(R_file))
[1] 10367

Convert cluster to dataframe

clust_data <- tibble(ID = names(cst), cluster = cst)
head(clust_data)
# A tibble: 6 × 2
  ID         cluster
  <chr>        <int>
1 AYT-O-1848       5
2 AYT-O-0974       2
3 AYT-S-0579       6
4 AYT-S-0299       2
5 AYT-S-0664       5
6 AYT-S-0304       7
new_pheno <- left_join(y_check, clust_data, by = join_by(ID)) |>
  group_by(cluster)

Plots

#Create the mean
combn_m <- new_pheno|>
  summarise_at(vars(adj_NDVI:adj_GrainProtein), ~ mean(., na.rm = TRUE))
combn_m <- combn_m |>
  pivot_longer(cols = adj_NDVI:adj_GrainProtein, names_to = "Trait", values_to = "Mean")

#Create the standard deviation
combn_s <- new_pheno|>
  summarise_at(vars(adj_NDVI:adj_GrainProtein),~sd(., na.rm = TRUE))
combn_s <- combn_s |>
  pivot_longer(cols = adj_NDVI:adj_GrainProtein, names_to = "Trait", values_to = "SD")

plot_df <- cbind(combn_m, SD = combn_s$SD)
plot_df$cluster <- as.factor(plot_df$cluster)
ggplot(plot_df, aes(y = Mean)) +
  geom_col(aes(x = cluster)) + facet_wrap(~Trait, scales = "free") +
  geom_errorbar(aes(x = cluster, ymin = Mean - SD, ymax = Mean + SD), width = 0.2)

Genomic relationship matrices

#Genotype by cluster
c_geno <- list()
for(i in 1:8){
  c_geno[[i]] <- R_file[clust_data$cluster == i, ]
}

c_geno[[1]][1:5, 1:5] #cluster one
           AXAFFSNP-00001 AXAFFSNP-00002 AXAFFSNP-00003 AXAFFSNP-00004
AYT-O-1092             -1             -1             -1              1
AYT-O-1524              0             -1             -1              1
AYT-O-0789             -1             -1             -1              1
AYT-O-1055              1             -1             -1              1
AYT-O-1352             -1             -1             -1              0
           AXAFFSNP-00005
AYT-O-1092              1
AYT-O-1524              1
AYT-O-0789              1
AYT-O-1055              1
AYT-O-1352              1
#Allele frequnecies by cluster
pclust <- lapply(c_geno, function(x) colMeans(x+1)/2)
sapply(pclust, FUN = summary)
             [,1]      [,2]       [,3]      [,4]       [,5]      [,6]
Min.    0.0000000 0.0000000 0.00000000 0.0000000 0.00000000 0.0000000
1st Qu. 0.1357379 0.1739537 0.09335443 0.1177092 0.08081121 0.1081439
Median  0.4943008 0.4986987 0.48076294 0.5057453 0.49978777 0.4982958
Mean    0.5002393 0.4993927 0.49966665 0.5000844 0.50005717 0.5001681
3rd Qu. 0.8673331 0.8236526 0.90596242 0.8786958 0.91888703 0.8895860
Max.    1.0000000 1.0000000 1.00000000 1.0000000 1.00000000 1.0000000
              [,7]      [,8]
Min.    0.00000000 0.0000000
1st Qu. 0.09677943 0.2054046
Median  0.50136861 0.5025376
Mean    0.49944482 0.5006256
3rd Qu. 0.90113898 0.7919156
Max.    1.00000000 1.0000000
qclust <- lapply(pclust, function(x) 1-x)
sapply(qclust, FUN = summary)
             [,1]      [,2]       [,3]      [,4]       [,5]      [,6]
Min.    0.0000000 0.0000000 0.00000000 0.0000000 0.00000000 0.0000000
1st Qu. 0.1326669 0.1763474 0.09403758 0.1213042 0.08111297 0.1104140
Median  0.5056992 0.5013013 0.51923706 0.4942547 0.50021223 0.5017042
Mean    0.4997607 0.5006073 0.50033335 0.4999156 0.49994283 0.4998319
3rd Qu. 0.8642621 0.8260463 0.90664557 0.8822908 0.91918879 0.8918561
Max.    1.0000000 1.0000000 1.00000000 1.0000000 1.00000000 1.0000000
              [,7]      [,8]
Min.    0.00000000 0.0000000
1st Qu. 0.09886102 0.2080844
Median  0.49863139 0.4974624
Mean    0.50055518 0.4993744
3rd Qu. 0.90322057 0.7945954
Max.    1.00000000 1.0000000
#Compute minor allele frequencies
maf <- lapply(pclust, function(x) pmin(x, 1-x))
maf_1 <- lapply(maf, function(x) x>0.05)

#Obtain new genotype object with MAF >0.05
new_geno <- list()
for(i in seq_along(c_geno)){
  new_geno[[i]] <- c_geno[[i]][, maf_1[[i]]]
}

#Obtain new allele frequencies
pw <- lapply(new_geno, function(x) colMeans(x+1)/2)
sapply(pw, FUN = summary)
              [,1]       [,2]       [,3]       [,4]       [,5]       [,6]
Min.    0.05000483 0.05004018 0.05000427 0.05000896 0.05006137 0.05000463
1st Qu. 0.23009933 0.26063250 0.16051355 0.21993617 0.14770626 0.19264260
Median  0.49687102 0.50139067 0.51161663 0.50873067 0.50845270 0.49837105
Mean    0.50174849 0.50066385 0.50118703 0.50170389 0.50182858 0.50034948
3rd Qu. 0.77683962 0.74367034 0.84382579 0.78253646 0.85285452 0.81142281
Max.    0.94987878 0.94984258 0.94997281 0.94999920 0.94998864 0.94996741
              [,7]       [,8]
Min.    0.05004766 0.05011191
1st Qu. 0.20776171 0.27214164
Median  0.50591085 0.50357837
Mean    0.50323207 0.50172105
3rd Qu. 0.79919621 0.72878893
Max.    0.94995234 0.94983241
qw <- lapply(pw, function(x) 1-x)
sapply(qw, FUN = summary)
              [,1]       [,2]       [,3]      [,4]       [,5]       [,6]
Min.    0.05012122 0.05015742 0.05002719 0.0500008 0.05001136 0.05003259
1st Qu. 0.22316038 0.25632966 0.15617421 0.2174635 0.14714548 0.18857719
Median  0.50312898 0.49860933 0.48838337 0.4912693 0.49154730 0.50162895
Mean    0.49825151 0.49933615 0.49881297 0.4982961 0.49817142 0.49965052
3rd Qu. 0.76990067 0.73936750 0.83948645 0.7800638 0.85229374 0.80735740
Max.    0.94999517 0.94995982 0.94999573 0.9499910 0.94993863 0.94999537
              [,7]       [,8]
Min.    0.05004766 0.05016759
1st Qu. 0.20080379 0.27121107
Median  0.49408915 0.49642163
Mean    0.49676793 0.49827895
3rd Qu. 0.79223829 0.72785836
Max.    0.94995234 0.94988809

Dimensions of Genotype and Phenotype

Genotype by cluster (1 - 8): 1137x11868 1156x12773 948x11166 1297x11438 2436x10544 1250x11330 1049x10452 1094x13488 
Phenotype: 10367 20 

Obtain Z and G by cluster

  • \(P = 2(p_i - 0.5)\) as pclust_all. Check Efficient methods to compute genomic predictions for more details.

  • \(Z = M - P\), where \(M\) is the marker allele individual inherited \(\{-1/0/1\}\)

  • \(G = \frac{ZZ'}{2pq}\) or

  • \(G = \frac{ZZ'}{4pq}\) accounting for inbreeding

pclust_all <- lapply(pw, function(x) 2*(x-0.5))

Z_cl <- list()
for (i in seq_along(new_geno)) {
    Z_cl[[i]] <- sweep(new_geno[[i]], 2, pclust_all[[i]], FUN = "-")
}

Z_tc <- lapply(Z_cl, FUN = tcrossprod)

sum_2pq <- list()
for(i in seq_along(pw)){
  sum_2pq[[i]] <- sum(2*pw[[i]]*qw[[i]])
}

G_cl <- lapply(seq_along(Z_tc), function(i) Z_tc[[i]] / sum_2pq[[i]])
sum_4pq <- list()
for(i in seq_along(pw)){
  sum_4pq[[i]] <- sum(4*pw[[i]]*qw[[i]])
}

G_cl4 <- lapply(seq_along(Z_tc), function(i) Z_tc[[i]] / sum_4pq[[i]])

Summary of Z and G matrices by cluster

# Adjusted Allele frequencies
sapply(pclust_all, function(x) summary(x))
                [,1]         [,2]         [,3]         [,4]         [,5]
Min.    -0.899990349 -0.899919635 -0.899991462 -0.899982086 -0.899877256
1st Qu. -0.539801350 -0.478735007 -0.678972895 -0.560127659 -0.704587490
Median  -0.006257964  0.002781343  0.023233251  0.017461340  0.016905394
Mean     0.003496989  0.001327695  0.002374063  0.003407778  0.003657153
3rd Qu.  0.553679247  0.487340689  0.687651580  0.565072928  0.705709040
Max.     0.899757557  0.899685162  0.899945619  0.899998396  0.899977273
                 [,6]         [,7]         [,8]
Min.    -0.8999907395 -0.899904671 -0.899776170
1st Qu. -0.6147148024 -0.584476586 -0.455716729
Median  -0.0032579027  0.011821706  0.007156736
Mean     0.0006989691  0.006464136  0.003442105
3rd Qu.  0.6228456164  0.598392412  0.457577854
Max.     0.8999348120  0.899904671  0.899664824
#Z matrices
sapply(Z_cl, function(x) summary(colMeans(x)))
                 [,1]          [,2]          [,3]          [,4]          [,5]
Min.    -1.295748e-16 -1.125589e-16 -1.551736e-16 -1.118355e-16 -1.685160e-16
1st Qu. -4.071794e-17 -3.966454e-17 -4.005235e-17 -3.967528e-17 -4.034015e-17
Median  -5.004303e-19  4.802003e-19  8.197849e-19  3.637971e-19  4.557566e-20
Mean    -2.781852e-19  3.784483e-19  1.929573e-19  1.597456e-19 -1.738176e-19
3rd Qu.  3.988796e-17  4.072098e-17  4.005235e-17  4.056872e-17  4.058085e-17
Max.     1.890406e-16  1.235555e-16  1.190445e-16  1.129911e-16  1.126175e-16
                 [,6]          [,7]          [,8]
Min.    -1.111333e-16 -1.111281e-16 -1.308115e-16
1st Qu. -3.955725e-17 -4.085944e-17 -4.089761e-17
Median   4.884981e-19 -2.116726e-19 -2.029658e-19
Mean     2.079781e-19 -5.457801e-19 -1.634410e-19
3rd Qu.  4.026779e-17  3.894777e-17  3.998427e-17
Max.     1.170841e-16  1.314884e-16  1.116185e-16
#Z tcrossprod
sapply(Z_tc, function(x) summary(diag(x)))
             [,1]      [,2]     [,3]      [,4]     [,5]     [,6]      [,7]
Min.     3128.078  4542.555 1572.900  3596.515 1630.612 2829.882  2979.005
1st Qu.  6310.200  7358.895 4389.980  5477.433 3949.128 5422.668  5206.944
Median   7251.185  8135.607 5423.748  6564.930 4860.234 6350.592  6082.779
Mean     7391.636  8253.989 5490.644  6678.949 4940.544 6286.974  6160.460
3rd Qu.  8274.891  8958.878 6495.350  7784.395 5970.492 7177.905  6923.493
Max.    12008.138 11435.085 9543.387 10744.136 9106.588 9472.882 10000.699
             [,8]
Min.     5021.360
1st Qu.  8501.080
Median   9043.462
Mean     8881.487
3rd Qu.  9449.546
Max.    10665.394
#sum 2pq
sum_2pq
[[1]]
[1] 3933.18

[[2]]
[1] 4474.099

[[3]]
[1] 3044.081

[[4]]
[1] 3737.895

[[5]]
[1] 2832.316

[[6]]
[1] 3479.487

[[7]]
[1] 3350.206

[[8]]
[1] 4869.144
#sum 4pq
sum_4pq
[[1]]
[1] 7866.361

[[2]]
[1] 8948.197

[[3]]
[1] 6088.161

[[4]]
[1] 7475.791

[[5]]
[1] 5664.633

[[6]]
[1] 6958.973

[[7]]
[1] 6700.413

[[8]]
[1] 9738.288
# GRM with 2pq
sapply(G_cl, dim)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1137 1156  948 1297 2436 1250 1049 1094
[2,] 1137 1156  948 1297 2436 1250 1049 1094
sapply(G_cl, function(x) summary(diag(x)))
             [,1]     [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
Min.    0.7953049 1.015301 0.5167077 0.9621764 0.5757169 0.8133046 0.8892006
1st Qu. 1.6043505 1.644777 1.4421367 1.4653789 1.3943103 1.5584680 1.5542158
Median  1.8435933 1.818379 1.7817359 1.7563173 1.7159927 1.8251520 1.8156432
Mean    1.8793026 1.844838 1.8037116 1.7868207 1.7443475 1.8068682 1.8388300
3rd Qu. 2.1038677 2.002387 2.1337642 2.0825610 2.1079889 2.0629207 2.0665870
Max.    3.0530351 2.555841 3.1350638 2.8743812 3.2152438 2.7224941 2.9850993
            [,8]
Min.    1.031261
1st Qu. 1.745908
Median  1.857300
Mean    1.824035
3rd Qu. 1.940700
Max.    2.190404
# GRM with 4pq
sapply(G_cl4, dim)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1137 1156  948 1297 2436 1250 1049 1094
[2,] 1137 1156  948 1297 2436 1250 1049 1094
sapply(G_cl4, function(x) summary(diag(x)))
             [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
Min.    0.3976525 0.5076503 0.2583538 0.4810882 0.2878584 0.4066523 0.4446003
1st Qu. 0.8021752 0.8223885 0.7210684 0.7326895 0.6971551 0.7792340 0.7771079
Median  0.9217966 0.9091895 0.8908679 0.8781586 0.8579963 0.9125760 0.9078216
Mean    0.9396513 0.9224192 0.9018558 0.8934104 0.8721737 0.9034341 0.9194150
3rd Qu. 1.0519339 1.0011936 1.0668821 1.0412805 1.0539944 1.0314603 1.0332935
Max.    1.5265176 1.2779205 1.5675319 1.4371906 1.6076219 1.3612470 1.4925496
             [,8]
Min.    0.5156307
1st Qu. 0.8729542
Median  0.9286501
Mean    0.9120173
3rd Qu. 0.9703498
Max.    1.0952021

Alternatively, using scaled Z matrix

  • \(Z^* = scale(Z)\)

  • \(G^* = \frac{Z^*{Z^*}'}{p}\), p is the number of columns in \(Z^*\)

Z_alt <- lapply(Z_cl, function(x) scale(x))
sapply(Z_alt, function(x) summary(colMeans(x)))
                 [,1]          [,2]          [,3]          [,4]          [,5]
Min.    -1.183335e-16 -1.507769e-16 -1.167535e-16 -1.188894e-16 -1.358610e-16
1st Qu. -2.255598e-17 -2.373390e-17 -2.019635e-17 -2.246982e-17 -1.958169e-17
Median   6.102809e-20 -1.440601e-19  2.653322e-19 -4.065967e-19 -1.595148e-19
Mean     4.499391e-19  1.349149e-19  2.714749e-19  2.345482e-19  3.208878e-21
3rd Qu.  2.334935e-17  2.360184e-17  2.098503e-17  2.241766e-17  1.993561e-17
Max.     1.465773e-16  1.379953e-16  1.205743e-16  1.394159e-16  1.116547e-16
                 [,6]          [,7]          [,8]
Min.    -1.270359e-16 -1.262892e-16 -1.339828e-16
1st Qu. -2.170486e-17 -2.194863e-17 -2.405145e-17
Median   2.692291e-19 -2.579760e-19 -9.640876e-19
Mean    -1.378772e-19  1.391687e-19 -4.290848e-19
3rd Qu.  2.171319e-17  2.253321e-17  2.317933e-17
Max.     1.165845e-16  1.311610e-16  1.409661e-16
Z_trans <- lapply(Z_alt, FUN = tcrossprod)
sapply(Z_trans, function(x) summary(diag(x)))
             [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
Min.     5031.637  6464.258  2635.339  4879.816  2827.216  4016.239  3986.458
1st Qu.  9503.992 10579.287  8807.958  9256.664  7935.705  9420.770  8322.558
Median  11531.552 12563.944 11007.572 11227.615 10230.546 11400.253 10229.281
Mean    11857.562 12761.951 11154.222 11429.181 10539.672 11320.936 10442.036
3rd Qu. 13864.848 14450.498 13401.581 13646.197 13183.871 13176.901 12220.785
Max.    22802.686 20600.626 20554.402 19808.034 20747.528 17916.448 19204.892
             [,8]
Min.     7456.676
1st Qu. 12521.257
Median  13577.480
Mean    13475.671
3rd Qu. 14499.559
Max.    17907.692
#Number of columns per cluster
colm <- lapply(Z_alt, function(x) ncol(x))

#GRM by cluster
G_alt <- lapply(seq_along(Z_trans), function(i) Z_trans[[i]] / colm[[i]])
sapply(G_alt, dim)
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] 1137 1156  948 1297 2436 1250 1049 1094
[2,] 1137 1156  948 1297 2436 1250 1049 1094
sapply(G_alt, function(x) summary(diag(x)))
             [,1]      [,2]      [,3]      [,4]      [,5]      [,6]      [,7]
Min.    0.4239667 0.5060877 0.2360146 0.4266319 0.2681351 0.3544783 0.3814063
1st Qu. 0.8008082 0.8282539 0.7888195 0.8092904 0.7526275 0.8314889 0.7962646
Median  0.9716509 0.9836330 0.9858116 0.9816065 0.9702718 1.0062006 0.9786912
Mean    0.9991205 0.9991349 0.9989451 0.9992290 0.9995895 0.9992000 0.9990467
3rd Qu. 1.1682548 1.1313315 1.2002132 1.1930580 1.2503671 1.1630098 1.1692293
Max.    1.9213588 1.6128260 1.8408026 1.7317743 1.9677094 1.5813281 1.8374371
             [,8]
Min.    0.5528378
1st Qu. 0.9283257
Median  1.0066341
Mean    0.9990859
3rd Qu. 1.0749970
Max.    1.3276758