Introduction

This short walkthrough uses genotype and phenotype datasets from three pork populations. We incorporated individual analysis that returns GBLUP objects and the estimated SNP effects and their variances in performing the meta-analysis. The functions used in this note provide an opportunity for concise scripting and computationally efficient tasks while displaying minimum needed outputs for prompt understanding.

Load Libraries

library(data.table)
library(gwaR)
library(regress)
library(Matrix)
library(knitr)
library(magrittr)
library(ggplot2)
library(kableExtra)

Load Necessary Functions

Basically, the gwaR package was used in computing these functions to perform the genome-wide prediction from genomic best linear unbiased prediction (GBLUP).

source("Toolkit_function.R")
source("toolkit_function_Meta.R")

Key Parameters

#COM
source_pop1 = 'comm'
pheno_name1= 'https://www.dropbox.com/s/1e2bw6yb9viha1w/pheno_comm.csv?dl=1'
geno_name1 = 'https://www.dropbox.com/s/oas827ngl39yxrd/genos_commercial.csv?dl=1'
map_name1 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name1 = 'cg'
id_name1 = 'animal'
phenotype1 = "ph"

#MSU
source_pop2 = 'msu'
pheno_name2 = 'https://www.dropbox.com/s/nh0dgcsuxkhtvvd/pheno_msu.csv?dl=1'
geno_name2 = 'https://www.dropbox.com/s/jjzgpvrecjd9e5g/genos_msu.csv?dl=1'
map_name2 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name2 = 'cgsl'
id_name2 = 'animal'
fe_cov2 = c('age_slg')
fe_fct2 = c('sex')
phenotype2 = "ph24"

#MAC
source_pop3 = 'marc'
pheno_name3='https://www.dropbox.com/s/lksmz0izaaifgxw/pheno_marc.csv?dl=1'
geno_name3 = 'https://www.dropbox.com/s/gx4jxoumij5lhtv/genos_marc.csv?dl=1'
map_name3 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name3 = 'cg'
id_name3 = 'animal'
fe_cov3 = c('age')
phenotype3 = "ph"

na_per_row = 0.05
na_per_col = 0.05
MAF=0.0

Read the objects to execute necessary parameters

#COMM
com_geno <-read_genotypes(geno_name1)
## ------------------------------- 
## Read genotype file https://www.dropbox.com/s/oas827ngl39yxrd/genos_commercial.csv?dl=1 
## sample output: 
##      MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 1              1           2           1           1           1
## 10             0           2           2           2           2
## 1001           2           2           0           0           0
## 1003           0           0           0           0           0
## 1004           1           1           1           1           1
## number of subjects: 1892 
## number of SNP: 37069 
## -------------------------------
com_pheno<-read_pheno(pheno_name1, source_pop1, id_name1, 
                       cg_name=cg_name1,phenotype = phenotype1)
## ------------------------------- 
## Read phenotype file https://www.dropbox.com/s/1e2bw6yb9viha1w/pheno_comm.csv?dl=1 
## sample output: 
##     cg   ph
## 1 7003 5.68
## 2 7003 5.52
## 3 7005 5.48
## 4 7003 6.00
## 5 7005 5.50
## 6 7003 5.61
## number of subjects: 1920 
## deleting rows with missing values 
## ------------------------------- 
## number of subjects with complete observations: 1885 
## ------------------------------- 
## Phenotypic summary 
##        cg            ph       
##  31     :304   Min.   :5.180  
##  41     :304   1st Qu.:5.540  
##  11     :150   Median :5.630  
##  23     :141   Mean   :5.654  
##  12     :115   3rd Qu.:5.740  
##  21     :102   Max.   :6.500  
##  (Other):769                  
## -------------------------------
com_mp<- read_map(map_name3)
## .................................. 
## Summary of map: 
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837 
## .................................
com_filter <- filter_and_match(com_geno, com_pheno, com_mp, n_maf = MAF,
                                n_row=na_per_row, n_col=na_per_col)
## ------------------------------- 
## number of deleted row: 0 
## number of deleted column: 0 
## number of deleted MAF: 0 
## ------------------------------- 
## Sample Output for genotype: 
## [1]  1857 37069
##      MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 1              1           2           1           1           1
## 10             0           2           2           2           2
## 1001           2           2           0           0           0
## 1003           0           0           0           0           0
## 1004           1           1           1           1           1
## Sample Output for phenotype: 
##        cg   ph
## 1    7003 5.68
## 10   7003 5.76
## 1001   12 5.55
## 1003   12 5.60
## 1004   12 5.39
## number of subjects for genotype: 1857 
## number of subjects for phenotype: 1857 
## ------------------------------- 
## [1] "A map file was provided!"
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837
com_mat <- read_G_matrix(com_filter$geno)
## --------------------------------- 
## Genomic relationship matrix 
## [1] 1857 1857
## Summary for diagonal: 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7095  0.7984  0.8262  0.9357  1.1572  1.6004 
## --------------------------------- 
## Summary for off-diagonal: 
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.3674607 -0.1199162  0.0084886 -0.0005041  0.0567494  1.1201528 
## --------------------------------- 
## Normalized Z matrix: 
## [1]  1857 37069
com_gb<- model_gb(com_mat, com_filter$pheno,phenotype1,random=cg_name1)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ 1
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 2553.079 converged in: 5 iterations 
## 
## estimated fixed effects:
## [1] test.st p.value
## <0 rows> (or 0-length row.names)
## 
## estimated variance components:
## [1] prop.var
## <0 rows> (or 0-length row.names)
## 
## breeding value predicted for 1857 first 5 animals shown:
##             [,1]
## 1     0.02739802
## 10    0.03379701
## 1001  0.04445830
## 1003  0.04195778
## 1004 -0.05189923
## 1005  0.03457575
## ..........................................................
com_gw<-run_gwa(com_gb, com_filter$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                      ghat     var_ghat
## MARC0044150 -0.0259500989 0.0001663922
## ASGA0000014 -0.0121420243 0.0001336983
## ASGA0000021  0.0153958497 0.0001658754
## ALGA0000009  0.0007611284 0.0001531351
## ALGA0000014  0.0007625065 0.0001531344
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      5     6      7     7    7
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#MSU
msu_geno <-read_genotypes(geno_name2)
## ------------------------------- 
## Read genotype file https://www.dropbox.com/s/jjzgpvrecjd9e5g/genos_msu.csv?dl=1 
## sample output: 
##        MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 991001           1           1           1           1           1
## 991002           1           1           1           1           1
## 991003           1           1           1           1           1
## 991004           1           1           1           1           1
## 991006           0           1           2           2           2
## number of subjects: 922 
## number of SNP: 37069 
## -------------------------------
msu_pheno<-read_pheno(pheno_name2, source_pop2, id_name2,fe_cov=fe_cov2,
                      fe_fct=fe_fct2, cg_name=cg_name2, phenotype = phenotype2)
## ------------------------------- 
## Read phenotype file https://www.dropbox.com/s/nh0dgcsuxkhtvvd/pheno_msu.csv?dl=1 
## sample output: 
##        age_slg sex cgsl ph24
## 991001     182   1    3 5.54
## 991002     161   2    1 5.50
## 991003     161   1    1 5.40
## 991004     161   2    1 5.33
## 991006     179   2    3 5.52
## 991007     161   1    1 5.47
## number of subjects: 948 
## deleting rows with missing values 
## ------------------------------- 
## number of subjects with complete observations: 927 
## ------------------------------- 
## Phenotypic summary 
##     age_slg      sex          cgsl          ph24      
##  Min.   :148.0   1:492   12     : 57   Min.   :5.090  
##  1st Qu.:157.0   2:435   16     : 56   1st Qu.:5.420  
##  Median :164.0           18     : 47   Median :5.490  
##  Mean   :166.3           1      : 45   Mean   :5.513  
##  3rd Qu.:177.0           3      : 42   3rd Qu.:5.590  
##  Max.   :182.0           8      : 42   Max.   :6.350  
##                          (Other):638                  
## -------------------------------
msu_mp<- read_map(map_name2)
## .................................. 
## Summary of map: 
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837 
## .................................
msu_filter <- filter_and_match(msu_geno, msu_pheno, msu_mp, n_maf = MAF,
                               n_row=na_per_row, n_col=na_per_col)
## ------------------------------- 
## number of deleted row: 0 
## number of deleted column: 0 
## number of deleted MAF: 0 
## ------------------------------- 
## Sample Output for genotype: 
## [1]   904 37069
##        MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 991001           1           1           1           1           1
## 991002           1           1           1           1           1
## 991003           1           1           1           1           1
## 991004           1           1           1           1           1
## 991006           0           1           2           2           2
## Sample Output for phenotype: 
##        age_slg sex cgsl ph24
## 991001     182   1    3 5.54
## 991002     161   2    1 5.50
## 991003     161   1    1 5.40
## 991004     161   2    1 5.33
## 991006     179   2    3 5.52
## number of subjects for genotype: 904 
## number of subjects for phenotype: 904 
## ------------------------------- 
## [1] "A map file was provided!"
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837
msu_mat <- read_G_matrix(msu_filter$geno)
## --------------------------------- 
## Genomic relationship matrix 
## [1] 904 904
## Summary for diagonal: 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7560  0.9128  0.9482  0.9517  0.9859  1.2434 
## --------------------------------- 
## Summary for off-diagonal: 
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.284280 -0.071938 -0.019533 -0.001054  0.052246  0.635059 
## --------------------------------- 
## Normalized Z matrix: 
## [1]   904 37069
msu_gb<- model_gb(msu_mat,msu_filter$pheno,phenotype2,fixed=c(fe_cov2,fe_fct2),
                  random=cg_name2)
## .......................................................... 
## summary gblup analysis of trait: ph24 
## 
## fixed effects equation:
## y ~ age_slg + sex
## 
## random effects equation:
## ~G + cgsl + In
## 
## log-likelihood: 1427.524 converged in: 5 iterations 
## 
## estimated fixed effects:
##                 Estimate    StdError    test.st     p.value
## (Intercept)  5.731175106 0.204355560 28.0451146 0.000000000
## age_slg     -0.001208287 0.001233471 -0.9795832 0.327291885
## sex2        -0.022569259 0.008269218 -2.7293100 0.006346702
## 
## estimated variance components:
##         Estimate     StdError  prop.var
## G    0.003021366 0.0008436650 0.1527314
## cgsl 0.004376634 0.0012946806 0.2212408
## In   0.012384220 0.0007437225 0.6260278
## 
## breeding value predicted for 904 first 5 animals shown:
##                [,1]
## 991001  0.024862960
## 991002  0.016831065
## 991003 -0.048651854
## 991004 -0.002855189
## 991006  0.049104951
## 991007 -0.027439658
## ..........................................................
msu_gw<-run_gwa(msu_gb, msu_filter$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                      ghat     var_ghat
## MARC0044150 -0.0009559271 7.477286e-06
## ASGA0000014  0.0052428948 6.803276e-06
## ASGA0000021  0.0009625046 7.473270e-06
## ALGA0000009  0.0007970479 7.327064e-06
## ALGA0000014  0.0006311137 7.435277e-06
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      4     4      5     5    8
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#MARC
mac_geno<-read_genotypes(geno_name3)
## ------------------------------- 
## Read genotype file https://www.dropbox.com/s/gx4jxoumij5lhtv/genos_marc.csv?dl=1 
## sample output: 
##           MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 200521304           1           1           1           1           1
## 200521402           1           1           1           1           1
## 200521405           2           2           0           0           0
## 200521502           2           2           0           1           1
## 200521504           1           2           1           2           2
## number of subjects: 1234 
## number of SNP: 37069 
## -------------------------------
mac_pheno<-read_pheno(pheno_name3, source_pop3, id_name3,fe_cov=fe_cov3,
                       cg_name=cg_name3, phenotype = phenotype3)
## ------------------------------- 
## Read phenotype file https://www.dropbox.com/s/lksmz0izaaifgxw/pheno_marc.csv?dl=1 
## sample output: 
##           age cg  ph
## 200521304 189  4 5.9
## 200521402 201  7 5.6
## 200521405 224 11 5.8
## 200521502 207  8 5.9
## 200521504 207  8 5.9
## 200521601 215 10 5.8
## number of subjects: 1237 
## deleting rows with missing values 
## ------------------------------- 
## number of subjects with complete observations: 531 
## ------------------------------- 
## Phenotypic summary 
##       age              cg            ph       
##  Min.   :160.0   24     : 26   Min.   :5.400  
##  1st Qu.:186.0   13     : 25   1st Qu.:5.700  
##  Median :196.0   14     : 25   Median :5.800  
##  Mean   :196.3   20     : 25   Mean   :5.812  
##  3rd Qu.:207.0   21     : 25   3rd Qu.:5.900  
##  Max.   :233.0   22     : 25   Max.   :7.000  
##                  (Other):380                  
## -------------------------------
mac_mp<- read_map(map_name3)
## .................................. 
## Summary of map: 
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837 
## .................................
mac_filter <- filter_and_match(mac_geno, mac_pheno, map=mac_mp, n_maf = MAF,
                            n_row=na_per_row, n_col=na_per_col)
## ------------------------------- 
## number of deleted row: 0 
## number of deleted column: 0 
## number of deleted MAF: 0 
## ------------------------------- 
## Sample Output for genotype: 
## [1]   530 37069
##           MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 200521304           1           1           1           1           1
## 200521402           1           1           1           1           1
## 200521405           2           2           0           0           0
## 200521502           2           2           0           1           1
## 200521504           1           2           1           2           2
## Sample Output for phenotype: 
##           age cg  ph
## 200521304 189  4 5.9
## 200521402 201  7 5.6
## 200521405 224 11 5.8
## 200521502 207  8 5.9
## 200521504 207  8 5.9
## number of subjects for genotype: 530 
## number of subjects for phenotype: 530 
## ------------------------------- 
## [1] "A map file was provided!"
##             chr    pos
## MARC0044150   1 286933
## ASGA0000014   1 342481
## ASGA0000021   1 489855
## ALGA0000009   1 538161
## ALGA0000014   1 565627
## H3GA0000032   1 573088
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416 
##   17   18 
## 1211  837
mac_mat <- read_G_matrix(mac_filter$geno)
## --------------------------------- 
## Genomic relationship matrix 
## [1] 530 530
## Summary for diagonal: 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8928  0.9541  0.9756  0.9821  1.0093  1.1465 
## --------------------------------- 
## Summary for off-diagonal: 
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.158444 -0.053681 -0.023830 -0.001857  0.017487  0.706645 
## --------------------------------- 
## Normalized Z matrix: 
## [1]   530 37069
mac_gb <- model_gb(mac_mat,mac_filter$pheno,phenotype3,fixed=c(fe_cov3),
                    random=cg_name3)
## .......................................................... 
## summary gblup analysis of trait: ph 
## 
## fixed effects equation:
## y ~ age
## 
## random effects equation:
## ~G + cg + In
## 
## log-likelihood: 701.4861 converged in: 5 iterations 
## 
## estimated fixed effects:
##                  Estimate     StdError    test.st   p.value
## (Intercept)  5.9457267074 0.1572708952 37.8056391 0.0000000
## age         -0.0006709545 0.0007975558 -0.8412635 0.4002004
## 
## estimated variance components:
##       Estimate    StdError  prop.var
## G  0.006117629 0.002164584 0.2097627
## cg 0.003727272 0.001397493 0.1278016
## In 0.019319621 0.001899998 0.6624357
## 
## breeding value predicted for 530 first 5 animals shown:
##                  [,1]
## 200521304  0.03097201
## 200521402 -0.04838265
## 200521405 -0.01679525
## 200521502 -0.02663800
## 200521504 -0.04140536
## 200521601 -0.03101332
## ..........................................................
mac_gw<-run_gwa(mac_gb, mac_filter$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.005274609 1.884942e-05
## ASGA0000014 -0.004839716 1.115949e-05
## ASGA0000021  0.005274609 1.884942e-05
## ALGA0000009  0.005915184 1.895690e-05
## ALGA0000014  0.005915184 1.895690e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................

Meta-Analysis

meta <- meta_gp(com_gb, com_filter$geno, msu_gb, msu_filter$geno, mac_gb,
                mac_filter$geno)
## ......................................... 
## Estimated SNP Effect and its variance: 
##                      ghat     var_ghat
## MARC0044150 -0.0259500989 0.0001663922
## ASGA0000014 -0.0121420243 0.0001336983
## ASGA0000021  0.0153958497 0.0001658754
## ALGA0000009  0.0007611284 0.0001531351
## ALGA0000014  0.0007625065 0.0001531344
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      5     6      7     7    7
## attr(,"class")
## [1] "summary.gwas"
## ......................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                      ghat     var_ghat
## MARC0044150 -0.0009559271 7.477286e-06
## ASGA0000014  0.0052428948 6.803276e-06
## ASGA0000021  0.0009625046 7.473270e-06
## ALGA0000009  0.0007970479 7.327064e-06
## ALGA0000014  0.0006311137 7.435277e-06
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      4     4      5     5    8
## attr(,"class")
## [1] "summary.gwas"
## ......................................... 
## ......................................... 
## Estimated SNP Effect and its variance: 
##                     ghat     var_ghat
## MARC0044150 -0.005274609 1.884942e-05
## ASGA0000014 -0.004839716 1.115949e-05
## ASGA0000021  0.005274609 1.884942e-05
## ALGA0000009  0.005915184 1.895690e-05
## ALGA0000014  0.005915184 1.895690e-05
## ......................................... 
## Summary of GWA: 
##               <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values      0     0      0     0    0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
meta$table
Meta analysis table (First 6 rows)
SE_Meta Z_Meta p_Meta \(\beta_Meta\) \(\hat{g}_Meta\)
MARC0044150 0.5691040 -2.1916848 0.0284023 -1.2472965 -0.0280941
ASGA0000014 0.6373828 -0.1952363 0.8452079 -0.1244403 -0.0022345
ASGA0000021 0.5696505 1.5758466 0.1150612 0.8976818 0.0201806
ALGA0000009 0.5838699 0.7643431 0.4446628 0.4462769 0.0095500
ALGA0000014 0.5826945 0.7308853 0.4648492 0.4258828 0.0091503
H3GA0000032 0.6059635 -1.6794179 0.0930706 -1.0176659 -0.0202182

Comparing Individual \(\hat{g}\) with meta \(\hat{g}\)

par(mfrow= c(1, 3))
par(mar=c(6,5,6,0))
plot(com_gw[, 1], meta$ghat, xlab = expression(hat(g)[COM]), 
     ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="orangered3")
plot(msu_gw[, 1], meta$ghat, xlab = expression(hat(g)[MSU]), 
     ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="gold3")
plot(mac_gw[, 1], meta$ghat, xlab = expression(hat(g)[MAC]), 
     ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="forestgreen")
mtext("Individual SNP effect vs Meta-SNP effect", side =3, line= -4, outer = TRUE)

Numerical equivalence between GBLUP derived SNP effect and fixed SNP effect

plot((meta$beta_Meta/meta$SE_Meta),(meta$ghat/sqrt(meta$var_ghat)), xlab = "FIXED_SNP",
     ylab = "GBLUP_SNP")
abline(a=0, b=1, col="red")