Introduction


This is an extension to the previous report titled “Meta-analysis of genome-wide predictions- part2”. Here, we employed resampling and transfer learning approaches to better understand population structure and model effectiveness.

Load Libraries

library(data.table)
library(gwaR)
library(regress)
library(Matrix)
library(knitr)
library(magrittr)
library(ggplot2)
library(kableExtra)
library(qqman)

Load Necessary Functions and datasets

Basically, the gwaR package was used in computing these functions to perform the genome-wide prediction from genomic best linear unbiased prediction (GBLUP).

source("Toolkit_function.R")
source("toolkit_function_Meta_v2.R")
load("Pop1.Rdata")
load("comm_g_var.Rdata")
load("Pop2.Rdata")
load("msu_g_var.Rdata")
load("Pop3.Rdata")
load("marc_g_var.Rdata")

#Required population datasets were loaded as in part2

CROSS VALIDATION


#Population 1
com <- read_cv(com_gb$model$G, com_filter$pheno, com_filter$geno, y_name=phenotype1, rand_var = cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ 1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 2058.437 converged in: 5 iterations 
## 
## estimated fixed effects:
## [1] test.st p.value
## <0 rows> (or 0-length row.names)
## 
## estimated variance components:
## [1] prop.var
## <0 rows> (or 0-length row.names)
## 
## breeding value predicted for 1485 first 5 animals shown:
##              [,1]
## 2172  0.074752071
## 3035 -0.020949968
## 5057 -0.009117249
## 2257 -0.024352633
## 1272  0.006455878
## 108  -0.035397801
## .......................................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.019081922 9.105899e-05
## ASGA0000014 -0.009936004 7.230686e-05
## ASGA0000021  0.008358675 9.157354e-05
## ALGA0000009 -0.004028245 8.405175e-05
## ALGA0000014 -0.004027084 8.405125e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      5     6      6     6    6
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Population 2
ms <- read_cv(msu_gb$model$G, msu_filter$pheno,msu_filter$geno, y_name= phenotype2, fixed=c(fe_cov2,fe_fct2),rand_var=cg_name2)
## .......................................................... 
## summary gblup analysis of trait: ph24 
## 
## fixed effects equation:
## y ~ age_slg + sex
## 
## random effects equation:
## ~G + cgsl + In
## 
## log-likelihood: 1151.402 converged in: 5 iterations 
## 
## estimated fixed effects:
##                 Estimate    StdError   test.st     p.value
## (Intercept)  5.764751177 0.205720708 28.022221 0.000000000
## age_slg     -0.001405453 0.001241829 -1.131760 0.257735199
## sex2        -0.026336168 0.009090525 -2.897101 0.003766285
## 
## estimated variance components:
##         Estimate     StdError  prop.var
## G    0.002481777 0.0008578938 0.1319836
## cgsl 0.004251821 0.0013005144 0.2261165
## In   0.012070075 0.0008213470 0.6418999
## 
## breeding value predicted for 723 first 5 animals shown:
##                [,1]
## 991640 -0.031707474
## 991870 -0.036421203
## 991316  0.010059359
## 991752 -0.004674971
## 991314 -0.032111270
## 991099 -0.028024867
## .......................................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.001152574 4.516546e-06
## ASGA0000014  0.003356674 4.035863e-06
## ASGA0000021  0.001157903 4.514264e-06
## ALGA0000009  0.001094442 4.413839e-06
## ALGA0000014  0.001128480 4.482338e-06
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      4     4      4     6    6
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Population 3
mc <- read_cv(mac_gb$model$G, mac_filter$pheno, mac_filter$geno, y_name=phenotype3,fixed=c(fe_cov3), rand_var = cg_name3)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ age
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 556.5498 converged in: 6 iterations 
## 
## estimated fixed effects:
##                 Estimate     StdError   test.st   p.value
## (Intercept)  6.031501477 0.1528336909 39.464476 0.0000000
## age         -0.001108438 0.0007755991 -1.429138 0.1529645
## 
## estimated variance components:
##       Estimate    StdError   prop.var
## G  0.005494334 0.002398227 0.19398143
## cg 0.002451048 0.001150948 0.08653604
## In 0.020378638 0.002251864 0.71948253
## 
## breeding value predicted for 424 first 5 animals shown:
##                  [,1]
## 200558003  0.01070540
## 200537906  0.01406244
## 200537903  0.06806932
## 200526103  0.05959723
## 200553104 -0.02594420
## 200528903 -0.06126413
## .......................................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.004757849 1.276905e-05
## ASGA0000014 -0.005136264 7.220255e-06
## ASGA0000021  0.004757849 1.276905e-05
## ALGA0000009  0.005428632 1.287493e-05
## ALGA0000014  0.005428632 1.287493e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Meta_analysis
meta <- meta_gp(N= c(1485, 723,424), unname(com$gb$sigma[1]), com$gwa, unname(ms$gb$sigma[1]), ms$gwa, unname(mc$gb$sigma[1]), mc$gwa)
meta$table
Meta analysis table (First 6 rows)
SE_Meta Z_Meta p_Meta \(\beta_Meta\) \(\hat{g}_Meta\)
0.6096353 -2.3211107 0.0202809 -1.4150311 -0.0225171
0.6879668 -0.5999645 0.5485299 -0.4127556 -0.0051576
0.6086972 1.4715553 0.1411410 0.8957316 0.0142976
0.6250704 0.5663954 0.5711250 0.3540370 0.0053589
0.6237178 0.5738182 0.5660908 0.3579006 0.0054409
0.6444176 -1.3092864 0.1904373 -0.8437272 -0.0120159

Estimated breeding values using test set with GWA (Test_BV)

  • The test set is the remaining 20% of the sample size in each population. Estimated Breeding values was obtained using the test set and the individual population SNP-effects
Pop1 Pop2 Pop3
-0.0010243 -0.0337288 -0.0129651
0.0361363 -0.0665441 -0.0140222
0.0675089 -0.0363723 -0.0023115
0.0242680 -0.0377739 -0.0293494
-0.0192831 -0.0399810 -0.0671329
0.0341076 0.0282616 -0.0618337

Estimated breeding values using test set with meta_GWA (Meta_BV)

  • Estimated Breeding values was obtained using the test set and the meta-analysis SNP-effects
Pop1 Pop2 Pop3
0.0236127 -0.0835597 -0.0202352
0.0185148 -0.1766177 0.0156304
0.0575872 -0.1025981 -0.0153853
0.0322101 -0.1118418 -0.0508079
-0.0064854 -0.0955318 -0.0889278
0.0363007 0.0749474 -0.0717655

Correlation of Breeding values between Test_BV and Meta_BV

Correlation between Test_BV and Meta_BV
Pop1 Pop2 Pop3
0.7721364 0.9723881 0.9117705

kable(cbind(rbind(cor_c,cor_cm),rbind(cor_s ,cor_sm),
            rbind(cor_r,cor_rm)), col.names = c("Pop1", "Pop2", "Pop3"), caption = "Correlation between breeding value and phenotype")%>%kable_styling(latex_options = "scale_down")%>%
  add_footnote("Ind: Test_BV ; Meta: Meta_BV")
Correlation between breeding value and phenotype
Pop1 Pop2 Pop3
Ind 0.300697 0.2951063 0.2765412
Meta 0.258237 0.2907489 0.2225283
a Ind: Test_BV ; Meta: Meta_BV

Obtaining breeding values from training and testing set of the sample size

  • The training set is denoted as Animals w/ records and the testing set is denoted as Animals w/o records.
#Animal with records
head(uhat_train)
##              [,1]
## 2172  0.074752071
## 3035 -0.020949968
## 5057 -0.009117249
## 2257 -0.024352633
## 1272  0.006455878
## 108  -0.035397801
#Animal w/o records
head(uhat_test)
##              [,1]
## 1004 -0.002844066
## 1006  0.034246259
## 1012  0.065559619
## 1029  0.022400412
## 103  -0.021068310
## 1033  0.032221388
#Comparing breeding values obtained from GBLUP with Animals w/o records
plot(com_pol, uhat_test, col="darkred", xlab="GBLUP", ylab="Animals w/o records", 
     main= "Breeding values obtained from GBLUP vs Animals w/o records")

cor(com_pol, uhat_test)
##      [,1]
## [1,]    1
#Although they (GBLUP vs Animals w/o records) are correlated, they do not contain exact values
all.equal(com_pol, uhat_test)
## [1] "Mean relative difference: 0.06617495"

PARTITIONING POPULATION 1


set.seed(904)
cat <- sample(1:2, nrow(com_pheno), replace = T)
table(cat)
## cat
##   1   2 
## 957 928

Sub-population analysis

Sub-population 1

#Manhattan Plot 1
pvalue1<-getpvalue(com_Gwt1,log.p = F)
manhattan_plot(pvalue1, map=com_mp)

#Predicted breeding value for Sub-population 1  
c1 <- summary(com_gbt1)$uhat
head(c1)
##              [,1]
## 10    0.016554245
## 1001  0.004126127
## 1005  0.012545231
## 1006  0.025632978
## 1007  0.013790618
## 1008 -0.002513178
#Transfer learning (TL) 
com1_2_p <- get_polys(com_filter1$geno, com_Gwt2)
head(com1_2_p)
##             [,1]
## 10   0.002641256
## 1001 0.067513259
## 1005 0.039548757
## 1006 0.012680162
## 1007 0.026605863
## 1008 0.034791442
#Sub-population 1 with TL_Pheno 
y_P1 <- add_poly(com_filter1$pheno, com1_2_p)

#Sub-population 1 Genomic prediction with TL
com_gb_p1<- model_gb(G=com_matr1, pheno=y_P1, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ poly1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 1284.179 converged in: 6 iterations 
## 
## estimated fixed effects:
##             Estimate   StdError    test.st      p.value
## (Intercept) 5.660731 0.01684685 336.011146 0.000000e+00
## poly1       1.178858 0.20884246   5.644723 1.654469e-08
## 
## estimated variance components:
##       Estimate    StdError  prop.var
## G  0.006324179 0.002131077 0.2163540
## cg 0.004157434 0.001737635 0.1422283
## In 0.018749086 0.001685454 0.6414176
## 
## breeding value predicted for 947 first 5 animals shown:
##              [,1]
## 10    0.016210925
## 1001 -0.030967599
## 1005 -0.009780158
## 1006  0.012745714
## 1007  0.003855220
## 1008 -0.022632182
## ..........................................................
#Sub-population 1 Estimated SNP-effects with TL
com_Gw_p1<-run_gwa(com_gb_p1, com_filter1$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.015892434 4.071745e-05
## ASGA0000014 -0.008044152 3.070702e-05
## ASGA0000021  0.006791416 4.169914e-05
## ALGA0000009 -0.002991308 3.665302e-05
## ALGA0000014 -0.002990653 3.665271e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Sub-population 1 TL Predicted breeding value 
c1_p <- summary(com_gb_p1)$uhat
head(c1_p)
##              [,1]
## 10    0.016210925
## 1001 -0.030967599
## 1005 -0.009780158
## 1006  0.012745714
## 1007  0.003855220
## 1008 -0.022632182

Sub-population 2

#Manhattan Plot 2
pvalue2<-getpvalue(com_Gwt2,log.p = F)
manhattan_plot(pvalue2, map=com_mp)

#Predicted breeding value for Sub-population 2 
c2 <- summary(com_gbt2)$uhat
head(c2)
##              [,1]
## 1     0.033787750
## 1003  0.038860245
## 1004 -0.036775038
## 1009 -0.032508227
## 1010  0.006205191
## 1011  0.008111481
#Transfer learning (TL) 
com2_1_p <- get_polys(com_filter2$geno, com_Gwt1)
head(com2_1_p)
##               [,1]
## 1    -0.0003837912
## 1003  0.0088032200
## 1004 -0.0038636916
## 1009 -0.0125945150
## 1010 -0.0100969409
## 1011  0.0035560795
#Sub-population 2 with TL_Pheno
y_P2 <- add_poly(com_filter2$pheno, com2_1_p)

#Sub-population 2 Genomic prediction with TL
com_gb_p2<- model_gb(G=com_matr2, pheno=y_P2, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ poly1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 1266.337 converged in: 7 iterations 
## 
## estimated fixed effects:
##             Estimate   StdError    test.st      p.value
## (Intercept) 5.654528 0.01685741 335.432772 0.000000e+00
## poly1       1.183378 0.20215130   5.853924 4.801077e-09
## 
## estimated variance components:
##       Estimate    StdError  prop.var
## G  0.005648056 0.002058812 0.2066096
## cg 0.004119595 0.001716894 0.1506975
## In 0.017569200 0.001639199 0.6426929
## 
## breeding value predicted for 910 first 5 animals shown:
##              [,1]
## 1     0.030195656
## 1003  0.028471621
## 1004 -0.032222667
## 1009 -0.028484597
## 1010 -0.000304713
## 1011  0.003009086
## ..........................................................
#Sub-population 2 Estimated SNP-effects with TL
com_Gw_p2<-run_gwa(com_gb_p2, com_filter2$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                      ghat     var_ghat
## MARC0044150  2.654754e-04 3.543194e-05
## ASGA0000014 -8.429682e-05 2.598296e-05
## ASGA0000021  1.910681e-03 3.628252e-05
## ALGA0000009  2.273164e-03 3.261391e-05
## ALGA0000014  2.273408e-03 3.261381e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Sub-population 2 TL Predicted breeding value 
c2_p <- summary(com_gb_p2)$uhat
head(c2_p)
##              [,1]
## 1     0.030195656
## 1003  0.028471621
## 1004 -0.032222667
## 1009 -0.028484597
## 1010 -0.000304713
## 1011  0.003009086

Correction between predicted breeding values

#Sub-population 1 and sub-population 1 TL
cor(c1, c1_p)
##           [,1]
## [1,] 0.9091838
#Sub-population 2 and sub-population 2 TL
cor(c2, c2_p)
##           [,1]
## [1,] 0.9316224

Cross-validation for sub-population and sub-population_TL

Prediction accuracy under cross-validation
subpop1 subpop2 subpop1_TL subpop2_TL
0.225±0.09 0.188±0.097 0.24±0.082 0.279±0.082
a values are mean ± standard deviation

Meta analysis of Population 1 using its subpopulations

#Meta_analysis
com_met <- meta_gp(N= c(947, 910), com_gbt1$sigma[1], com_Gwt1, com_gbt2$sigma[1], com_Gwt2)
com_met$table
Meta analysis table (First 6 rows)
SE_Meta Z_Meta p_Meta \(\beta_Meta\) \(\hat{g}_Meta\)
0.7054193 -1.8063865 0.0708580 -1.2742599 -0.0209618
0.8137637 -1.0846330 0.2780842 -0.8826350 -0.0109107
0.7002158 1.0902156 0.2756182 0.7633862 0.0127452
0.7412763 0.1715888 0.8637608 0.1271947 0.0018949
0.7412783 0.1717038 0.8636704 0.1272803 0.0018961
0.8278155 -0.4356580 0.6630849 -0.3606445 -0.0043080
#Plot
plot(com_gw$ghat, com_met$ghat, xlab = expression(hat(g)[POP1]), 
     ylab = expression(hat(g)[Meta]), cex.lab= 1.3, col="darkred",
     main= "Estimated SNP-effects of Population 1 and Meta_Sub_population 1")

#Correlation between population1 ghat and partitioned population1 meta_ghat
cor(com_gw$ghat,com_met$ghat) 
## [1] 0.9778922

Predicting test animals in Sub-populations using Meta_analysis


Cross-validation with Sub-populations

##### Training set
#Sub_population 1
Sub_com1 <- read_cv(com_gbt1$model$G, com_filter1$pheno, com_filter1$geno, y_name=phenotype1, rand_var = cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ 1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 1005.28 converged in: 5 iterations 
## 
## estimated fixed effects:
## [1] test.st p.value
## <0 rows> (or 0-length row.names)
## 
## estimated variance components:
## [1] prop.var
## <0 rows> (or 0-length row.names)
## 
## breeding value predicted for 757 first 5 animals shown:
##              [,1]
## 4002  0.053915939
## 6283 -0.028716699
## 5038  0.008479372
## 2200 -0.008532503
## 4171  0.004851873
## 2196 -0.013317779
## .......................................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.015162579 5.291354e-05
## ASGA0000014 -0.003960409 3.943470e-05
## ASGA0000021  0.005392140 5.440300e-05
## ALGA0000009  0.001113502 4.813478e-05
## ALGA0000014  0.001113834 4.813445e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Sub_population 2
Sub_com2 <- read_cv(com_gbt2$model$G, com_filter2$pheno, com_filter2$geno, y_name=phenotype1, rand_var = cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ 1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 1001.886 converged in: 7 iterations 
## 
## estimated fixed effects:
## [1] test.st p.value
## <0 rows> (or 0-length row.names)
## 
## estimated variance components:
## [1] prop.var
## <0 rows> (or 0-length row.names)
## 
## breeding value predicted for 728 first 5 animals shown:
##             [,1]
## 4048 -0.00943938
## 5092  0.03221101
## 2257 -0.02049025
## 4229 -0.01079700
## 2253 -0.03951310
## 1202  0.02272243
## .......................................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150  0.001535618 4.248199e-05
## ASGA0000014 -0.001214137 3.089973e-05
## ASGA0000021 -0.003074442 4.332568e-05
## ALGA0000009 -0.001870648 3.867555e-05
## ALGA0000014 -0.001870309 3.867535e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#Meta_analysis for Sub_population 1 and Sub_populaion 2
Sub_meta <- meta_gp(N= c(757,728), unname(Sub_com1$gb$sigma[1]), Sub_com1$gwa, unname(Sub_com2$gb$sigma[1]), Sub_com2$gwa)
Sub_meta$table
Meta analysis table (First 6 rows)
SE_Meta Z_Meta p_Meta \(\beta_Meta\) \(\hat{g}_Meta\)
0.7809154 -1.2638336 0.2062898 -0.9869471 -0.0123755
0.9103557 -0.5943948 0.5522481 -0.5411107 -0.0049928
0.7717901 0.1660849 0.8680901 0.1281827 0.0016455
0.8185940 -0.1077957 0.9141577 -0.0882409 -0.0010070
0.8185964 -0.1077236 0.9142150 -0.0881821 -0.0010063
0.9172416 -0.3533348 0.7238375 -0.3240934 -0.0029456
Breeding values
Pop1 Pop2 Pop1_Meta Pop2_Meta
-0.0277650 0.0302303 -0.0056705 0.0569013
0.0048827 -0.0150898 0.0477059 -0.0619549
-0.0270898 0.0277710 -0.0406073 0.0195021
-0.0070633 -0.0922323 -0.0252119 -0.1263839
-0.0060241 0.0173443 0.0192863 0.0310989
-0.0114434 0.0401475 0.0505408 0.0629839
a Pop1 & Pop2: Sub_population with its SNP-effects; Pop1_Meta &Pop2_Meta: Sub_population with Meta_SNP effects
Correlation between breeding values and response variable
Sub_Pop1 Sub_Pop2
0.2353979 0.195054
a Breeding values for testing sets

Correlations between Individual Sub-population and Meta_Sub_populations

#Sub_population 1
cor(Sub_pol1, Sub_poM1)
##           [,1]
## [1,] 0.7961295
#Sub_population 2
cor(Sub_pol2, Sub_poM2)
##           [,1]
## [1,] 0.8037521