Resampling of Genomic prediction analysis

Author

Blessing Olabosoye

Published

July 24, 2023

Introduction

Here, we employed resampling and transfer learning approaches to better understand population structure and model effectiveness. The main difference here is the resampling of the meta-analysis.

Load Libraries

Code

library(data.table)
library(gwaR)
library(regress)
library(Matrix)
library(knitr)
library(magrittr)
library(ggplot2)
library(kableExtra)
library(qqman)

Load Necessary Functions and datasets

Basically, the gwaR package was used in computing these functions to perform the genome-wide prediction from genomic best linear unbiased prediction (GBLUP).

Code

source("Toolkit_function.R")
source("toolkit_function_Meta_v3.R")

Code

#COMM
source_pop1 = 'comm'
pheno_name1= 'https://www.dropbox.com/s/1e2bw6yb9viha1w/pheno_comm.csv?dl=1'
geno_name1 = 'https://www.dropbox.com/s/oas827ngl39yxrd/genos_commercial.csv?dl=1'
map_name1 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name1 = 'cg'
id_name1 = 'animal'
phenotype1 = "ph"
phenotype2 = "plantA"

na_per_row = 0.05
na_per_col = 0.05
MAF=0.0

Code

#COMM_ph
com_geno <-read_genotypes(geno_name1)
com_pheno<-read_pheno(pheno_name1, source_pop1, id_name1, 
                       cg_name=cg_name1,phenotype = phenotype1)
com_mp<- read_map(map_name1)
com_ft <- filter_and_match(com_geno, com_pheno, com_mp, n_maf = MAF,
                                n_row=na_per_row, n_col=na_per_col)
com_mat <- read_G_matrix(com_ft$geno)
com_gb<- model_gb(com_mat, com_ft$pheno,phenotype1,random=cg_name1)
com_gw<-run_gwa(com_gb, com_ft$geno)

#COMM_plantA
com_genoA <-read_genotypes(geno_name1)
com_phenoA<-read_pheno(pheno_name1, source_pop1, id_name1, 
                       cg_name=cg_name1,phenotype = phenotype2)
com_mpA<- read_map(map_name1)
com_ftA <- filter_and_match(com_genoA, com_phenoA, com_mpA, n_maf = MAF,
                                n_row=na_per_row, n_col=na_per_col)
com_matA <- read_G_matrix(com_ftA$geno)
com_gbA<- model_gb(com_matA, com_ftA$pheno,phenotype2,random=cg_name1)
com_gwA<-run_gwa(com_gbA, com_ftA$geno)

Partitioning population for pH

Population 1 was randomly partitioned to create two closely-related sub-populations and performing analysis using ultimate meat pH as the phenotype.

Code

set.seed(904)
ph <- sample(1:2, nrow(com_pheno), replace = T)
table(ph)

ph
  1   2 
957 928

Code

ph1 <-com_pheno[ph==1, ]
ph2  <-com_pheno[ph==2, ]

ph_ft1 <- filter_and_match(com_geno, ph1, map=com_mp, n_maf = MAF,
                            n_row=na_per_row, n_col=na_per_col)
ph_ft2 <- filter_and_match(com_geno, ph2, map=com_mp, n_maf = MAF,
                            n_row=na_per_row, n_col=na_per_col)
ph_matr1 <- read_G_matrix(ph_ft1$geno)
ph_matr2 <- read_G_matrix(ph_ft2$geno)
ph_gbt1<- model_gb(ph_matr1, ph_ft1$pheno, y_name=phenotype1,random=cg_name1)
ph_gbt2<- model_gb(ph_matr2, ph_ft2$pheno, y_name=phenotype1,random=cg_name1)
ph_Gwt1<-run_gwa(ph_gbt1, ph_ft1$geno)
ph_Gwt2<-run_gwa(ph_gbt2, ph_ft2$geno)

Sub-population analysis

In this analysis, transfer learning approach was employed to determine prediction improvement with parameters transfer between closely-related populations

Sub-population 1

#Manhattan Plot 1
pvalue1<-getpvalue(ph_Gwt1,log.p = F)
manhattan_plot(pvalue1, map=com_mp)

#Predicted breeding value for Sub-population 1  
p1 <- summary(ph_gbt1)$uhat
head(p1)

             [,1]
10    0.016554245
1001  0.004126127
1005  0.012545231
1006  0.025632978
1007  0.013790618
1008 -0.002513178

#Variance component for Subpopulation 1
varcomp(ph_gbt1)

      Estimate    StdError  prop.var         se
G  0.008457395 0.002348583 0.2747147 0.07398355
cg 0.004286183 0.001800863 0.1392246 0.05223169
In 0.018042523 0.001753117 0.5860607 0.08798344

#Transfer learning (TL) 
ph1_2_p <- get_polys(ph_ft1$geno, ph_Gwt2)
head(ph1_2_p)

            [,1]
10   0.002641256
1001 0.067513259
1005 0.039548757
1006 0.012680162
1007 0.026605863
1008 0.034791442

#Sub-population 1 with TL_Pheno 
y_P1 <- add_poly(ph_ft1$pheno, ph1_2_p)

#Sub-population 1 Genomic prediction with TL
ph_gb_p1<- model_gb(G=ph_matr1, pheno=y_P1, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)

.......................................................... 
summary gblup analysis of trait: ph 

fixed effects equation:
y ~ poly1

random effects equation:
~G + cg + In

log-likelihood: 1284.179 converged in: 6 iterations 

estimated fixed effects:
            Estimate   StdError    test.st      p.value
(Intercept) 5.660731 0.01684685 336.011146 0.000000e+00
poly1       1.178858 0.20884246   5.644723 1.654469e-08

estimated variance components:
      Estimate    StdError  prop.var
G  0.006324179 0.002131077 0.2163540
cg 0.004157434 0.001737635 0.1422283
In 0.018749086 0.001685454 0.6414176

breeding value predicted for 947 first 5 animals shown:
             [,1]
10    0.016210925
1001 -0.030967599
1005 -0.009780158
1006  0.012745714
1007  0.003855220
1008 -0.022632182
..........................................................

#Sub-population 1 Estimated SNP-effects with TL
ph_Gw_p1<-run_gwa(ph_gb_p1, ph_ft1$geno)

......................................... 
Estimated SNP Effect and its variance: 
                    ghat     var_ghat
MARC0044150 -0.015892434 4.071745e-05
ASGA0000014 -0.008044152 3.070702e-05
ASGA0000021  0.006791416 4.169914e-05
ALGA0000009 -0.002991308 3.665302e-05
ALGA0000014 -0.002990653 3.665271e-05
......................................... 
Summary of GWA: 
              <0.001 <0.01 <0.025 <0.05 <0.1
signif.values      0     0      0     0    0
attr(,"class")
[1] "summary.gwas"
.........................................

#Sub-population 1 TL Predicted breeding value 
p1_p <- summary(ph_gb_p1)$uhat
head(p1_p)

             [,1]
10    0.016210925
1001 -0.030967599
1005 -0.009780158
1006  0.012745714
1007  0.003855220
1008 -0.022632182

#Sub-population 1 TL variance component
varcomp(ph_gb_p1)

      Estimate    StdError  prop.var         se
G  0.006324179 0.002131077 0.2163540 0.07088010
cg 0.004157434 0.001737635 0.1422283 0.05284408
In 0.018749086 0.001685454 0.6414176 0.09091368

Sub-population 2

#Manhattan Plot 2
pvalue2<-getpvalue(ph_Gwt2,log.p = F)
manhattan_plot(pvalue2, map=com_mp)

#Predicted breeding value for Sub-population 2 
p2 <- summary(ph_gbt2)$uhat
head(p2)

             [,1]
1     0.033787750
1003  0.038860245
1004 -0.036775038
1009 -0.032508227
1010  0.006205191
1011  0.008111481

#Variance component for Subpopulation 2
varcomp(ph_gbt2)

      Estimate    StdError  prop.var         se
G  0.007903345 0.002302598 0.2719105 0.07713626
cg 0.004373521 0.001821427 0.1504687 0.05544754
In 0.016789118 0.001720322 0.5776208 0.09146270

#Transfer learning (TL) 
ph2_1_p <- get_polys(ph_ft2$geno, ph_Gwt1)
head(ph2_1_p)

              [,1]
1    -0.0003837912
1003  0.0088032200
1004 -0.0038636916
1009 -0.0125945150
1010 -0.0100969409
1011  0.0035560795

#Sub-population 2 with TL_Pheno
y_P2 <- add_poly(ph_ft2$pheno, ph2_1_p)

#Sub-population 2 Genomic prediction with TL
ph_gb_p2<- model_gb(G=ph_matr2, pheno=y_P2, y_name=phenotype1,fixed=c("poly1"), random=cg_name1)

.......................................................... 
summary gblup analysis of trait: ph 

fixed effects equation:
y ~ poly1

random effects equation:
~G + cg + In

log-likelihood: 1266.337 converged in: 7 iterations 

estimated fixed effects:
            Estimate   StdError    test.st      p.value
(Intercept) 5.654528 0.01685741 335.432772 0.000000e+00
poly1       1.183378 0.20215130   5.853924 4.801077e-09

estimated variance components:
      Estimate    StdError  prop.var
G  0.005648056 0.002058812 0.2066096
cg 0.004119595 0.001716894 0.1506975
In 0.017569200 0.001639199 0.6426929

breeding value predicted for 910 first 5 animals shown:
             [,1]
1     0.030195656
1003  0.028471621
1004 -0.032222667
1009 -0.028484597
1010 -0.000304713
1011  0.003009086
..........................................................

#Sub-population 2 Estimated SNP-effects with TL
ph_Gw_p2<-run_gwa(ph_gb_p2, ph_ft2$geno)

......................................... 
Estimated SNP Effect and its variance: 
                     ghat     var_ghat
MARC0044150  2.654754e-04 3.543194e-05
ASGA0000014 -8.429682e-05 2.598296e-05
ASGA0000021  1.910681e-03 3.628252e-05
ALGA0000009  2.273164e-03 3.261391e-05
ALGA0000014  2.273408e-03 3.261381e-05
......................................... 
Summary of GWA: 
              <0.001 <0.01 <0.025 <0.05 <0.1
signif.values      0     0      0     0    0
attr(,"class")
[1] "summary.gwas"
.........................................

#Sub-population 2 TL Predicted breeding value 
p2_p <- summary(ph_gb_p2)$uhat
head(p2_p)

             [,1]
1     0.030195656
1003  0.028471621
1004 -0.032222667
1009 -0.028484597
1010 -0.000304713
1011  0.003009086

#Sub-population 2 TL variance component
varcomp(ph_gb_p2)

      Estimate    StdError  prop.var         se
G  0.005648056 0.002058812 0.2066096 0.07341335
cg 0.004119595 0.001716894 0.1506975 0.05545169
In 0.017569200 0.001639199 0.6426929 0.09491811

Correction between predicted breeding values

#Sub-population 1 and sub-population 1 TL
cor(p1, p1_p)

          [,1]
[1,] 0.9091838

#Sub-population 2 and sub-population 2 TL
cor(p2, p2_p)

          [,1]
[1,] 0.9316224

Cross-validation for sub-population and sub-population_TL

Code

#Cross-validation
ph_cv_1 <- cross_validation(ph_gbt1$model$G, iterate=10, ph_ft1$pheno, y_name=phenotype1, rand_var = cg_name1)
ph_cv_2 <- cross_validation(ph_gbt2$model$G, iterate=10, ph_ft2$pheno, y_name=phenotype1, rand_var = cg_name1)
ph_cv_Main <- cross_validation(com_gb$model$G, iterate=10, com_ft$pheno, y_name=phenotype1, rand_var = cg_name1)

ph_cv_p1 <- cross_validation(ph_gb_p1$model$G, iterate=10, y_P1, y_name=phenotype1, fixed=c("poly1"), rand_var = cg_name1, poly_name= "poly1")
ph_cv_p2 <- cross_validation(ph_gb_p2$model$G, iterate=10, y_P2, y_name=phenotype1, fixed=c("poly1"), rand_var = cg_name1, poly_name= "poly1")

Prediction accuracy under cross-validation
subpop1	subpop2	subpop1_TL	subpop2_TL	POP1
0.225±0.09	0.188±0.097	0.24±0.082	0.279±0.082	0.223±0.06
^a values are mean ± standard deviation

Meta analysis of Population 1 using its subpopulations

Code

#Meta_analysis
ph_met <- meta_gp(N= c(nrow(ph_ft1$pheno), nrow(ph_ft2$pheno)), ph_gbt1$sigma[1], ph_Gwt1, ph_gbt2$sigma[1], ph_Gwt2)
ph_met$table

Meta analysis table (First 6 rows)
SE_meta	Z_meta	p_meta	beta_meta	ghat_meta
0.7054193	-1.8063865	0.0708580	-1.2742599	-0.0209618
0.8137637	-1.0846330	0.2780842	-0.8826350	-0.0109107
0.7002158	1.0902156	0.2756182	0.7633862	0.0127452
0.7412763	0.1715888	0.8637608	0.1271947	0.0018949
0.7412783	0.1717038	0.8636704	0.1272803	0.0018961
0.8278155	-0.4356580	0.6630849	-0.3606445	-0.0043080

Cross-validation between meta_analysis and phenotype

Code

# Perform k-fold cross-validation
k <- 10
COR <- matrix(NA, k, 2)
colnames(COR) <- c("Pop1", "Pop2")

getZ <-function(g,scale=TRUE){
  if(scale){
    p <- colMeans(g)/2
    Z <-sweep(g,2,2*p)/sqrt(sum(2*p*(1-p)))
  }
return(Z)
}

for (i in 1:k) {
  # Get the training and testing indices for i
  tr_ind1 <- which((1:nrow(ph_ft1$pheno)) %% k != i- 1)
  ts_ind1 <- which((1:nrow(ph_ft1$pheno)) %% k == i - 1)
  tr_ind2 <- which((1:nrow(ph_ft2$pheno)) %% k != i - 1)
  ts_ind2 <- which((1:nrow(ph_ft2$pheno)) %% k == i - 1)
  
  # Extract training and testing data for phenotype
  tr_pop1 <- ph_ft1$pheno[tr_ind1,]
  ts_pop1 <- ph_ft1$pheno[ts_ind1, ]
  
  tr_pop2 <- ph_ft2$pheno[tr_ind2, ]
  ts_pop2 <- ph_ft2$pheno[ts_ind2, ]
  
  # Extract training and testing data for genotype
  tr_geno1 <- ph_ft1$geno[tr_ind1, ]
  ts_geno1 <- ph_ft1$geno[ts_ind1, ]
  
  tr_geno2 <- ph_ft2$geno[tr_ind2, ]
  ts_geno2 <- ph_ft2$geno[ts_ind2, ]
  
  # Obtain standardized genotype for training and testing data
  zp1 <- getZ(tr_geno1)
  zt1 <- getZ(ts_geno1)
  
  zp2 <- getZ(tr_geno2)
  zt2 <- getZ(ts_geno2)
  
  # Perform meta-analysis
  gb1<-gblup(rsp = phenotype1,data = tr_pop1,
                  design =  c(formula(y~1), (formula(paste("~",cg_name1)))),G = ph_gbt1$model$G)
  gb2<-gblup(rsp = phenotype1,data = tr_pop2,
              design =  c(formula(y~1), (formula(paste("~",cg_name1)))),G = ph_gbt2$model$G)
  
  gw1 <- gwas(gb1, t(zp1))
  
  gw2 <- gwas(gb2, t(zp2))
  
  ph_met <- meta_gp(N= c(nrow(tr_pop1), nrow(tr_pop2)), gb1$sigma[1], gw1, gb2$sigma[1], gw2)
  
  # Calculate the uhat1 and uhat2 values
  uhat1 <- zt1%*%ph_met$ghat
  uhat2 <- zt2%*%ph_met$ghat
  
  # Calculate the correlation between observed phenotype and predicted values for i
  COR[i, "Pop1"] <- cor(ts_pop1$ph, uhat1)
  COR[i, "Pop2"] <- cor(ts_pop2$ph, uhat2)
}

Prediction accuracy under cross-validation
Subpop1	Subpop2
0.267±0.121	0.285±0.05
^a values are mean ± standard deviation

Partitioning population for CieA*

Population 1 was randomly partitioned to create two closely-related sub-populations and performing analysis using meat redness (CieA*) as the phenotype.

Code

set.seed(904)
cat <- sample(1:2, nrow(com_phenoA), replace = T)
table(cat)

cat
  1   2 
971 948

Code

com1 <-com_phenoA[cat==1, ]
com2  <-com_phenoA[cat==2, ]

com_ft1 <- filter_and_match(com_genoA, com1, map=com_mpA, n_maf = MAF,
                            n_row=na_per_row, n_col=na_per_col)
com_ft2 <- filter_and_match(com_genoA, com2, map=com_mpA, n_maf = MAF,
                            n_row=na_per_row, n_col=na_per_col)
com_matr1 <- read_G_matrix(com_ft1$geno)
com_matr2 <- read_G_matrix(com_ft2$geno)
com_gbt1<- model_gb(com_matr1, com_ft1$pheno, y_name=phenotype2,random=cg_name1)
com_gbt2<- model_gb(com_matr2, com_ft2$pheno, y_name=phenotype2,random=cg_name1)
com_Gwt1<-run_gwa(com_gbt1, com_ft1$geno)
com_Gwt2<-run_gwa(com_gbt2, com_ft2$geno)

Sub-population analysis

In this analysis, transfer learning approach was employed to determine prediction improvement with parameters transfer between closely-related populations

Sub-population 1

#Manhattan Plot 1
pvalue1<-getpvalue(com_Gwt1,log.p = F)
manhattan_plot(pvalue1, map=com_mpA)

#Predicted breeding value for Sub-population 1  
c1 <- summary(com_gbt1)$uhat
head(c1)

           [,1]
10   -0.2831910
1001 -0.2196702
1005 -0.0657902
1006  0.2199628
1007 -0.1298514
1008 -0.2753362

#Variance component for Subpopulation 1
varcomp(com_gbt1)

    Estimate   StdError  prop.var         se
G  0.3879295 0.11098691 0.1845088 0.05703041
cg 0.8286072 0.30667190 0.3941060 0.09231982
In 0.8859618 0.08399623 0.4213852 0.07880826

#Transfer learning (TL) 
com1_2_p <- get_polys(com_ft1$geno, com_Gwt2)
head(com1_2_p)

             [,1]
10   -0.018086779
1001 -0.048568155
1005  0.051350709
1006  0.151040939
1007 -0.002328433
1008 -0.104864357

#Sub-population 1 with TL_Pheno 
y.P1 <- add_poly(com_ft1$pheno, com1_2_p)

#Sub-population 1 Genomic prediction with TL
com_gb_p1<- model_gb(G=com_matr1, pheno=y.P1, y_name=phenotype2,fixed=c("poly1"), random=cg_name1)

.......................................................... 
summary gblup analysis of trait: plantA 

fixed effects equation:
y ~ poly1

random effects equation:
~G + cg + In

log-likelihood: -577.0897 converged in: 7 iterations 

estimated fixed effects:
              Estimate  StdError   test.st      p.value
(Intercept) -0.9388214 0.2213348 -4.241634 2.218984e-05
poly1        0.9331605 0.3424383  2.725047 6.429237e-03

estimated variance components:
    Estimate   StdError  prop.var
G  0.3615603 0.10894856 0.1746332
cg 0.8124836 0.30089088 0.3924286
In 0.8963547 0.08351242 0.4329382

breeding value predicted for 959 first 5 animals shown:
           [,1]
10   -0.2999273
1001 -0.2313401
1005 -0.1157141
1006  0.1479159
1007 -0.1522491
1008 -0.2463351
..........................................................

#Sub-population 1 Estimated SNP-effects with TL
com_Gw_p1<-run_gwa(com_gb_p1, com_ft1$geno)

......................................... 
Estimated SNP Effect and its variance: 
                    ghat    var_ghat
MARC0044150 -0.045452799 0.002795781
ASGA0000014 -0.025444027 0.001992046
ASGA0000021  0.004200506 0.002852422
ALGA0000009  0.011522688 0.002511035
ALGA0000014  0.011522407 0.002511014
......................................... 
Summary of GWA: 
              <0.001 <0.01 <0.025 <0.05 <0.1
signif.values      0     0      0     0    0
attr(,"class")
[1] "summary.gwas"
.........................................

#Sub-population 1 TL Predicted breeding value 
c1_p <- summary(com_gb_p1)$uhat
head(c1_p)

           [,1]
10   -0.2999273
1001 -0.2313401
1005 -0.1157141
1006  0.1479159
1007 -0.1522491
1008 -0.2463351

#Sub-population 1 TL variance component
varcomp(com_gb_p1)

    Estimate   StdError  prop.var         se
G  0.3615603 0.10894856 0.1746332 0.05630812
cg 0.8124836 0.30089088 0.3924286 0.09222661
In 0.8963547 0.08351242 0.4329382 0.08031404

Sub-population 2

#Manhattan Plot 2
pvalue2<-getpvalue(com_Gwt2,log.p = F)
manhattan_plot(pvalue2, map=com_mp)

#Predicted breeding value for Sub-population 2 
c2 <- summary(com_gbt2)$uhat
head(c2)

             [,1]
1     0.014958563
1003 -0.009658086
1004 -0.295510682
1009  0.336859810
1010  0.043002697
1011  0.124811677

#Variance component for Sub-population 2
varcomp(com_gbt2)

    Estimate   StdError   prop.var         se
G  0.2120275 0.09572963 0.09270471 0.04404780
cg 1.1058942 0.40583901 0.48352965 0.09555015
In 0.9692063 0.08136283 0.42376565 0.08579585

#Transfer learning (TL) 
com2_1_p <- get_polys(com_ft2$geno, com_Gwt1)
head(com2_1_p)

             [,1]
1    -0.185125827
1003  0.174341713
1004 -0.319766401
1009 -0.006256207
1010 -0.165677185
1011 -0.137574913

#Sub-population 2 with TL_Pheno
y.P2 <- add_poly(com_ft2$pheno, com2_1_p)

#Sub-population 2 Genomic prediction with TL
com_gb_p2<- model_gb(G=com_matr2, pheno=y.P2, y_name=phenotype2,fixed=c("poly1"), random=cg_name1)

.......................................................... 
summary gblup analysis of trait: plantA 

fixed effects equation:
y ~ poly1

random effects equation:
~G + cg + In

log-likelihood: -547.1365 converged in: 6 iterations 

estimated fixed effects:
              Estimate  StdError   test.st      p.value
(Intercept) -1.0483256 0.2564849 -4.087279 4.364613e-05
poly1        0.6002514 0.2007185  2.990513 2.785092e-03

estimated variance components:
    Estimate   StdError   prop.var
G  0.1711907 0.08991781 0.07649579
cg 1.0772359 0.39546829 0.48135796
In 0.9894836 0.07935234 0.44214625

breeding value predicted for 932 first 5 animals shown:
            [,1]
1     0.07176061
1003 -0.04418299
1004 -0.22315859
1009  0.27443537
1010  0.06398593
1011  0.10515657
..........................................................

#Sub-population 2 Estimated SNP-effects with TL
com_Gw_p2<-run_gwa(com_gb_p2, com_ft2$geno)

......................................... 
Estimated SNP Effect and its variance: 
                    ghat     var_ghat
MARC0044150  0.014659735 0.0006257262
ASGA0000014 -0.006151563 0.0005038594
ASGA0000021 -0.010140501 0.0006453725
ALGA0000009 -0.008081123 0.0005845130
ALGA0000014 -0.008081017 0.0005845102
......................................... 
Summary of GWA: 
              <0.001 <0.01 <0.025 <0.05 <0.1
signif.values      0     0      0     0    1
attr(,"class")
[1] "summary.gwas"
.........................................

#Sub-population 2 TL Predicted breeding value 
c2_p <- summary(com_gb_p2)$uhat
head(c2_p)

            [,1]
1     0.07176061
1003 -0.04418299
1004 -0.22315859
1009  0.27443537
1010  0.06398593
1011  0.10515657

#Sub-population 2 TL variance component
varcomp(com_gb_p2)

    Estimate   StdError   prop.var         se
G  0.1711907 0.08991781 0.07649579 0.04165928
cg 1.0772359 0.39546829 0.48135796 0.09534506
In 0.9894836 0.07935234 0.44214625 0.08813793

Correction between predicted breeding values

#Sub-population 1 and sub-population 1 TL
cor(c1, c1_p)

          [,1]
[1,] 0.9718469

#Sub-population 2 and sub-population 2 TL
cor(c2, c2_p)

        [,1]
[1,] 0.97159

Cross-validation for sub-population and sub-population_TL

Code

#Cross-validation
com_cv_1 <- cross_validation(com_gbt1$model$G, iterate=10, com_ft1$pheno, y_name=phenotype2, rand_var = cg_name1)
com_cv_2 <- cross_validation(com_gbt2$model$G, iterate=10, com_ft2$pheno, y_name=phenotype2, rand_var = cg_name1)
com_cv_Main <- cross_validation(com_gbA$model$G, iterate=10, com_ftA$pheno, y_name=phenotype2, rand_var = cg_name1)


com_cv_p1 <- cross_validation(com_gb_p1$model$G, iterate=10, y.P1, y_name=phenotype2, fixed=c("poly1"), rand_var = cg_name1, poly_name= "poly1")
com_cv_p2 <- cross_validation(com_gb_p2$model$G, iterate=10, y.P2, y_name=phenotype2, fixed=c("poly1"), rand_var = cg_name1, poly_name= "poly1")

Prediction accuracy under cross-validation
subpop1	subpop2	subpop1_TL	subpop2_TL	POP1
0.22±0.095	0.175±0.1	0.241±0.097	0.213±0.111	0.229±0.067
^a values are mean ± standard deviation

Meta analysis of Population 1 using its subpopulations

Code

#Meta_analysis
com_met <- meta_gp(N= c(nrow(com_ft1$pheno), nrow(com_ft2$pheno)), com_gbt1$sigma[1], com_Gwt1, com_gbt2$sigma[1], com_Gwt2)
com_met$table

Meta analysis table (First 6 rows)
SE_meta	Z_meta	p_meta	beta_meta	ghat_meta
4.867369	-0.2811542	0.7785921	-1.3684813	-0.0174003
5.589525	-0.5669638	0.5707388	-3.1690585	-0.0305553
4.817894	-0.1972860	0.8436037	-0.9505032	-0.0123352
5.093444	0.0109843	0.9912360	0.0559477	0.0006496
5.093461	0.0109819	0.9912379	0.0559361	0.0006495
5.706303	0.8424085	0.3995593	4.8070384	0.0444707

Cross-validation between meta_analysis and phenotype

Code

# Perform k-fold cross-validation
k <- 10
COR_1 <- matrix(NA, k, 2)
colnames(COR_1) <- c("Pop1", "Pop2")

getZ <-function(g,scale=TRUE){
  if(scale){
    p <- colMeans(g)/2
    Z <-sweep(g,2,2*p)/sqrt(sum(2*p*(1-p)))
  }
return(Z)
}

for (i in 1:k) {
  # Get the training and testing indices for i
  tr.1 <- which((1:nrow(com_ft1$pheno)) %% k != i- 1)
  ts.1 <- which((1:nrow(com_ft1$pheno)) %% k == i - 1)
  tr.2 <- which((1:nrow(com_ft2$pheno)) %% k != i - 1)
  ts.2 <- which((1:nrow(com_ft2$pheno)) %% k == i - 1)
  
  # Extract training and testing data for phenotype
  tr.pop1 <- com_ft1$pheno[tr.1,]
  ts.pop1 <- com_ft1$pheno[ts.1, ]
  
  tr.pop2 <- com_ft2$pheno[tr.2, ]
  ts.pop2 <- com_ft2$pheno[ts.2, ]
  
  # Extract training and testing data for genotype
  tr.geno1 <- com_ft1$geno[tr.1, ]
  ts.geno1 <- com_ft1$geno[ts.1, ]
  
  tr.geno2 <- com_ft2$geno[tr.2, ]
  ts.geno2 <- com_ft2$geno[ts.2, ]
  
  #Obtain standardized genotype for training and testing data
  tr.z1 <- getZ(tr.geno1)
  ts.z1 <- getZ(ts.geno1)
  
  tr.z2 <- getZ(tr.geno2)
  ts.z2 <- getZ(ts.geno2)
  
  # Perform meta-analysis
  
  gb.1<-gblup(rsp = phenotype2,data = tr.pop1,
                  design =  c(formula(y~1), (formula(paste("~",cg_name1)))),G = com_gbt1$model$G)
  gb.2<-gblup(rsp = phenotype2,data = tr.pop2,
              design =  c(formula(y~1), (formula(paste("~",cg_name1)))),G = com_gbt2$model$G)
  
  gw.1 <- gwas(gb.1, t(tr.z1))
  
  gw.2 <- gwas(gb.2, t(tr.z2))
  
  com_met <- meta_gp(N= c(nrow(tr.pop1), nrow(tr.pop2)), gb.1$sigma[1], gw.1, gb.2$sigma[1], gw.2)
  
  # Calculate the uhat1 and uhat2 values
  
  uhat.1 <- ts.z1%*%com_met$ghat
  uhat.2 <- ts.z2%*%com_met$ghat
  
  # Calculate the correlation between observed phenotype and predicted values for i
  COR_1[i, "Pop1"] <- cor(ts.pop1$plantA, uhat.1)
  COR_1[i, "Pop2"] <- cor(ts.pop2$plantA, uhat.2)
}

Prediction accuracy under cross-validation
Subpop1	Subpop2
0.256±0.12	0.24±0.086
^a values are mean ± standard deviation