Introduction


Here, we employed resampling and transfer learning approaches to better understand population structure and for model validation. This was done using ‘meat redness(CIE a*)’ as the phenotypic variable.

Load Libraries

library(data.table)
library(gwaR)
library(regress)
library(Matrix)
library(knitr)
library(magrittr)
library(ggplot2)
library(kableExtra)
library(qqman)

Load Necessary Functions and datasets

Basically, the gwaR package was used in computing these functions to perform the genome-wide prediction from genomic best linear unbiased prediction (GBLUP).

source("Toolkit_function.R")
source("toolkit_function_Meta_v2.R")

PARTITIONING POPULATION 1


set.seed(904)
cat <- sample(1:2, nrow(com_pheno), replace = T)
table(cat)
## cat
##   1   2 
## 971 948

Sub-population analysis

Sub-population 1

#Manhattan Plot 1
pvalue1<-getpvalue(com_Gwt1,log.p = F)
manhattan_plot(pvalue1, map=com_mp)

#Predicted breeding value for Sub-population 1  
c1 <- summary(com_gbt1)$uhat
head(c1)
##            [,1]
## 10   -0.2831910
## 1001 -0.2196702
## 1005 -0.0657902
## 1006  0.2199628
## 1007 -0.1298514
## 1008 -0.2753362
#Variance component for Subpopulation 1
varcomp(com_gbt1)
##     Estimate   StdError  prop.var         se
## G  0.3879295 0.11098691 0.1845088 0.05703041
## cg 0.8286072 0.30667190 0.3941060 0.09231982
## In 0.8859618 0.08399623 0.4213852 0.07880826
#Transfer learning (TL) 
com1_2_p <- get_polys(com_filter1$geno, com_Gwt2)
head(com1_2_p)
##              [,1]
## 10   -0.018086779
## 1001 -0.048568155
## 1005  0.051350709
## 1006  0.151040939
## 1007 -0.002328433
## 1008 -0.104864357
#Sub-population 1 with TL_Pheno 
y_P1 <- add_poly(com_filter1$pheno, com1_2_p)

#Sub-population 1 Genomic prediction with TL
com_gb_p1<- model_gb(G=com_matr1, pheno=y_P1, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)
## .......................................................... 
## summary gblup analysis of trait: plantA 
## 
## fixed effects equation:
## y ~ poly1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: -577.0897 converged in: 7 iterations 
## 
## estimated fixed effects:
##               Estimate  StdError   test.st      p.value
## (Intercept) -0.9388214 0.2213348 -4.241634 2.218984e-05
## poly1        0.9331605 0.3424383  2.725047 6.429237e-03
## 
## estimated variance components:
##     Estimate   StdError  prop.var
## G  0.3615603 0.10894856 0.1746332
## cg 0.8124836 0.30089088 0.3924286
## In 0.8963547 0.08351242 0.4329382
## 
## breeding value predicted for 959 first 5 animals shown:
##            [,1]
## 10   -0.2999273
## 1001 -0.2313401
## 1005 -0.1157141
## 1006  0.1479159
## 1007 -0.1522491
## 1008 -0.2463351
## ..........................................................
#Sub-population 1 Estimated SNP-effects with TL
com_Gw_p1<-run_gwa(com_gb_p1, com_filter1$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat    var_ghat
## MARC0044150 -0.045452799 0.002795781
## ASGA0000014 -0.025444027 0.001992046
## ASGA0000021  0.004200506 0.002852422
## ALGA0000009  0.011522688 0.002511035
## ALGA0000014  0.011522407 0.002511014
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Sub-population 1 TL Predicted breeding value 
c1_p <- summary(com_gb_p1)$uhat
head(c1_p)
##            [,1]
## 10   -0.2999273
## 1001 -0.2313401
## 1005 -0.1157141
## 1006  0.1479159
## 1007 -0.1522491
## 1008 -0.2463351
#Sub-population 1 TL variance component
varcomp(com_gb_p1)
##     Estimate   StdError  prop.var         se
## G  0.3615603 0.10894856 0.1746332 0.05630812
## cg 0.8124836 0.30089088 0.3924286 0.09222661
## In 0.8963547 0.08351242 0.4329382 0.08031404

Sub-population 2

#Manhattan Plot 2
pvalue2<-getpvalue(com_Gwt2,log.p = F)
manhattan_plot(pvalue2, map=com_mp)

#Predicted breeding value for Sub-population 2 
c2 <- summary(com_gbt2)$uhat
head(c2)
##              [,1]
## 1     0.014958563
## 1003 -0.009658086
## 1004 -0.295510682
## 1009  0.336859810
## 1010  0.043002697
## 1011  0.124811677
#Variance component for Sub-population 2
varcomp(com_gbt2)
##     Estimate   StdError   prop.var         se
## G  0.2120275 0.09572963 0.09270471 0.04404780
## cg 1.1058942 0.40583901 0.48352965 0.09555015
## In 0.9692063 0.08136283 0.42376565 0.08579585
#Transfer learning (TL) 
com2_1_p <- get_polys(com_filter2$geno, com_Gwt1)
head(com2_1_p)
##              [,1]
## 1    -0.185125827
## 1003  0.174341713
## 1004 -0.319766401
## 1009 -0.006256207
## 1010 -0.165677185
## 1011 -0.137574913
#Sub-population 2 with TL_Pheno
y_P2 <- add_poly(com_filter2$pheno, com2_1_p)

#Sub-population 2 Genomic prediction with TL
com_gb_p2<- model_gb(G=com_matr2, pheno=y_P2, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)
## .......................................................... 
## summary gblup analysis of trait: plantA 
## 
## fixed effects equation:
## y ~ poly1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: -547.1365 converged in: 6 iterations 
## 
## estimated fixed effects:
##               Estimate  StdError   test.st      p.value
## (Intercept) -1.0483256 0.2564849 -4.087279 4.364613e-05
## poly1        0.6002514 0.2007185  2.990513 2.785092e-03
## 
## estimated variance components:
##     Estimate   StdError   prop.var
## G  0.1711907 0.08991781 0.07649579
## cg 1.0772359 0.39546829 0.48135796
## In 0.9894836 0.07935234 0.44214625
## 
## breeding value predicted for 932 first 5 animals shown:
##             [,1]
## 1     0.07176061
## 1003 -0.04418299
## 1004 -0.22315859
## 1009  0.27443537
## 1010  0.06398593
## 1011  0.10515657
## ..........................................................
#Sub-population 2 Estimated SNP-effects with TL
com_Gw_p2<-run_gwa(com_gb_p2, com_filter2$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150  0.014659735 0.0006257262
## ASGA0000014 -0.006151563 0.0005038594
## ASGA0000021 -0.010140501 0.0006453725
## ALGA0000009 -0.008081123 0.0005845130
## ALGA0000014 -0.008081017 0.0005845102
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    1
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Sub-population 2 TL Predicted breeding value 
c2_p <- summary(com_gb_p2)$uhat
head(c2_p)
##             [,1]
## 1     0.07176061
## 1003 -0.04418299
## 1004 -0.22315859
## 1009  0.27443537
## 1010  0.06398593
## 1011  0.10515657
#Sub-population 2 TL variance component
varcomp(com_gb_p2)
##     Estimate   StdError   prop.var         se
## G  0.1711907 0.08991781 0.07649579 0.04165928
## cg 1.0772359 0.39546829 0.48135796 0.09534506
## In 0.9894836 0.07935234 0.44214625 0.08813793

Correction between predicted breeding values

#Sub-population 1 and sub-population 1 TL
cor(c1, c1_p)
##           [,1]
## [1,] 0.9718469
#Sub-population 2 and sub-population 2 TL
cor(c2, c2_p)
##         [,1]
## [1,] 0.97159

Cross-validation for sub-population and sub-population_TL

Prediction accuracy under cross-validation
subpop1 subpop2 subpop1_TL subpop2_TL POP1
0.22±0.095 0.175±0.1 0.241±0.097 0.213±0.111 0.229±0.067
a values are mean ± standard deviation

Meta analysis of Population 1 using its subpopulations

com_met <- meta_gp(N= c(959, 932), com_gbt1$sigma[1], com_Gwt1, com_gbt2$sigma[1], com_Gwt2)
com_met$table
Meta analysis table (First 6 rows)
SE_Meta Z_Meta p_Meta \(\beta_Meta\) \(\hat{g}_Meta\)
4.867369 -0.2811542 0.7785921 -1.3684813 -0.0174003
5.589525 -0.5669638 0.5707388 -3.1690585 -0.0305553
4.817894 -0.1972860 0.8436037 -0.9505032 -0.0123352
5.093444 0.0109843 0.9912360 0.0559477 0.0006496
5.093461 0.0109819 0.9912379 0.0559361 0.0006495
5.706303 0.8424085 0.3995593 4.8070384 0.0444707
#Plot
plot(com_gw$ghat, com_met$ghat, xlab = expression(hat(g)[POP1]), 
     ylab = expression(hat(g)[Meta]), cex.lab= 1.3, col="darkred",
     main= "Estimated SNP-effects of Population 1 and Meta_Sub_population 1")

#Correlation between population1 ghat and partitioned population1 meta_ghat
cor(com_gw$ghat,com_met$ghat) 
## [1] 0.9858568