This short walkthrough uses genotype and phenotype datasets from three pork populations. We incorporated individual analysis that returns GBLUP objects and the estimated SNP effects and their variances in performing the meta-analysis. The functions used in this note provide an opportunity for concise scripting and computationally efficient tasks while displaying minimum needed outputs for prompt understanding.
library(data.table)
library(gwaR)
library(regress)
library(Matrix)
library(knitr)
library(magrittr)
library(ggplot2)
library(kableExtra)
Basically, the gwaR package was used in computing these functions to perform the genome-wide prediction from genomic best linear unbiased prediction (GBLUP).
source("Toolkit_function.R")
source("toolkit_function_Meta.R")
#COM
source_pop1 = 'comm'
pheno_name1= 'https://www.dropbox.com/s/1e2bw6yb9viha1w/pheno_comm.csv?dl=1'
geno_name1 = 'https://www.dropbox.com/s/oas827ngl39yxrd/genos_commercial.csv?dl=1'
map_name1 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name1 = 'cg'
id_name1 = 'animal'
phenotype1 = "ph"
#MSU
source_pop2 = 'msu'
pheno_name2 = 'https://www.dropbox.com/s/nh0dgcsuxkhtvvd/pheno_msu.csv?dl=1'
geno_name2 = 'https://www.dropbox.com/s/jjzgpvrecjd9e5g/genos_msu.csv?dl=1'
map_name2 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name2 = 'cgsl'
id_name2 = 'animal'
fe_cov2 = c('age_slg')
fe_fct2 = c('sex')
phenotype2 = "ph24"
#MAC
source_pop3 = 'marc'
pheno_name3='https://www.dropbox.com/s/lksmz0izaaifgxw/pheno_marc.csv?dl=1'
geno_name3 = 'https://www.dropbox.com/s/gx4jxoumij5lhtv/genos_marc.csv?dl=1'
map_name3 = 'https://www.dropbox.com/s/ym39ln7qii84gfe/map.csv?dl=1'
cg_name3 = 'cg'
id_name3 = 'animal'
fe_cov3 = c('age')
phenotype3 = "ph"
na_per_row = 0.05
na_per_col = 0.05
MAF=0.0
#COMM
com_geno <-read_genotypes(geno_name1)
## -------------------------------
## Read genotype file https://www.dropbox.com/s/oas827ngl39yxrd/genos_commercial.csv?dl=1
## sample output:
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 1 1 2 1 1 1
## 10 0 2 2 2 2
## 1001 2 2 0 0 0
## 1003 0 0 0 0 0
## 1004 1 1 1 1 1
## number of subjects: 1892
## number of SNP: 37069
## -------------------------------
com_pheno<-read_pheno(pheno_name1, source_pop1, id_name1,
cg_name=cg_name1,phenotype = phenotype1)
## -------------------------------
## Read phenotype file https://www.dropbox.com/s/1e2bw6yb9viha1w/pheno_comm.csv?dl=1
## sample output:
## cg ph
## 1 7003 5.68
## 2 7003 5.52
## 3 7005 5.48
## 4 7003 6.00
## 5 7005 5.50
## 6 7003 5.61
## number of subjects: 1920
## deleting rows with missing values
## -------------------------------
## number of subjects with complete observations: 1885
## -------------------------------
## Phenotypic summary
## cg ph
## 31 :304 Min. :5.180
## 41 :304 1st Qu.:5.540
## 11 :150 Median :5.630
## 23 :141 Mean :5.654
## 12 :115 3rd Qu.:5.740
## 21 :102 Max. :6.500
## (Other):769
## -------------------------------
com_mp<- read_map(map_name3)
## ..................................
## Summary of map:
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
## .................................
com_filter <- filter_and_match(com_geno, com_pheno, com_mp, n_maf = MAF,
n_row=na_per_row, n_col=na_per_col)
## -------------------------------
## number of deleted row: 0
## number of deleted column: 0
## number of deleted MAF: 0
## -------------------------------
## Sample Output for genotype:
## [1] 1857 37069
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 1 1 2 1 1 1
## 10 0 2 2 2 2
## 1001 2 2 0 0 0
## 1003 0 0 0 0 0
## 1004 1 1 1 1 1
## Sample Output for phenotype:
## cg ph
## 1 7003 5.68
## 10 7003 5.76
## 1001 12 5.55
## 1003 12 5.60
## 1004 12 5.39
## number of subjects for genotype: 1857
## number of subjects for phenotype: 1857
## -------------------------------
## [1] "A map file was provided!"
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
com_mat <- read_G_matrix(com_filter$geno)
## ---------------------------------
## Genomic relationship matrix
## [1] 1857 1857
## Summary for diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7095 0.7984 0.8262 0.9357 1.1572 1.6004
## ---------------------------------
## Summary for off-diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.3674607 -0.1199162 0.0084886 -0.0005041 0.0567494 1.1201528
## ---------------------------------
## Normalized Z matrix:
## [1] 1857 37069
com_gb<- model_gb(com_mat, com_filter$pheno,phenotype1,random=cg_name1)
## ..........................................................
## summary gblup analysis of trait: ph
##
## fixed effects equation:
## y ~ 1
##
## random effects equation:
## ~G + cg + In
##
## log-likelihood: 2553.079 converged in: 5 iterations
##
## estimated fixed effects:
## [1] test.st p.value
## <0 rows> (or 0-length row.names)
##
## estimated variance components:
## [1] prop.var
## <0 rows> (or 0-length row.names)
##
## breeding value predicted for 1857 first 5 animals shown:
## [,1]
## 1 0.02739802
## 10 0.03379701
## 1001 0.04445830
## 1003 0.04195778
## 1004 -0.05189923
## 1005 0.03457575
## ..........................................................
com_gw<-run_gwa(com_gb, com_filter$geno)
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.0259500989 0.0001663922
## ASGA0000014 -0.0121420243 0.0001336983
## ASGA0000021 0.0153958497 0.0001658754
## ALGA0000009 0.0007611284 0.0001531351
## ALGA0000014 0.0007625065 0.0001531344
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 5 6 7 7 7
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#MSU
msu_geno <-read_genotypes(geno_name2)
## -------------------------------
## Read genotype file https://www.dropbox.com/s/jjzgpvrecjd9e5g/genos_msu.csv?dl=1
## sample output:
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 991001 1 1 1 1 1
## 991002 1 1 1 1 1
## 991003 1 1 1 1 1
## 991004 1 1 1 1 1
## 991006 0 1 2 2 2
## number of subjects: 922
## number of SNP: 37069
## -------------------------------
msu_pheno<-read_pheno(pheno_name2, source_pop2, id_name2,fe_cov=fe_cov2,
fe_fct=fe_fct2, cg_name=cg_name2, phenotype = phenotype2)
## -------------------------------
## Read phenotype file https://www.dropbox.com/s/nh0dgcsuxkhtvvd/pheno_msu.csv?dl=1
## sample output:
## age_slg sex cgsl ph24
## 991001 182 1 3 5.54
## 991002 161 2 1 5.50
## 991003 161 1 1 5.40
## 991004 161 2 1 5.33
## 991006 179 2 3 5.52
## 991007 161 1 1 5.47
## number of subjects: 948
## deleting rows with missing values
## -------------------------------
## number of subjects with complete observations: 927
## -------------------------------
## Phenotypic summary
## age_slg sex cgsl ph24
## Min. :148.0 1:492 12 : 57 Min. :5.090
## 1st Qu.:157.0 2:435 16 : 56 1st Qu.:5.420
## Median :164.0 18 : 47 Median :5.490
## Mean :166.3 1 : 45 Mean :5.513
## 3rd Qu.:177.0 3 : 42 3rd Qu.:5.590
## Max. :182.0 8 : 42 Max. :6.350
## (Other):638
## -------------------------------
msu_mp<- read_map(map_name2)
## ..................................
## Summary of map:
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
## .................................
msu_filter <- filter_and_match(msu_geno, msu_pheno, msu_mp, n_maf = MAF,
n_row=na_per_row, n_col=na_per_col)
## -------------------------------
## number of deleted row: 0
## number of deleted column: 0
## number of deleted MAF: 0
## -------------------------------
## Sample Output for genotype:
## [1] 904 37069
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 991001 1 1 1 1 1
## 991002 1 1 1 1 1
## 991003 1 1 1 1 1
## 991004 1 1 1 1 1
## 991006 0 1 2 2 2
## Sample Output for phenotype:
## age_slg sex cgsl ph24
## 991001 182 1 3 5.54
## 991002 161 2 1 5.50
## 991003 161 1 1 5.40
## 991004 161 2 1 5.33
## 991006 179 2 3 5.52
## number of subjects for genotype: 904
## number of subjects for phenotype: 904
## -------------------------------
## [1] "A map file was provided!"
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
msu_mat <- read_G_matrix(msu_filter$geno)
## ---------------------------------
## Genomic relationship matrix
## [1] 904 904
## Summary for diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.7560 0.9128 0.9482 0.9517 0.9859 1.2434
## ---------------------------------
## Summary for off-diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.284280 -0.071938 -0.019533 -0.001054 0.052246 0.635059
## ---------------------------------
## Normalized Z matrix:
## [1] 904 37069
msu_gb<- model_gb(msu_mat,msu_filter$pheno,phenotype2,fixed=c(fe_cov2,fe_fct2),
random=cg_name2)
## ..........................................................
## summary gblup analysis of trait: ph24
##
## fixed effects equation:
## y ~ age_slg + sex
##
## random effects equation:
## ~G + cgsl + In
##
## log-likelihood: 1427.524 converged in: 5 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 5.731175106 0.204355560 28.0451146 0.000000000
## age_slg -0.001208287 0.001233471 -0.9795832 0.327291885
## sex2 -0.022569259 0.008269218 -2.7293100 0.006346702
##
## estimated variance components:
## Estimate StdError prop.var
## G 0.003021366 0.0008436650 0.1527314
## cgsl 0.004376634 0.0012946806 0.2212408
## In 0.012384220 0.0007437225 0.6260278
##
## breeding value predicted for 904 first 5 animals shown:
## [,1]
## 991001 0.024862960
## 991002 0.016831065
## 991003 -0.048651854
## 991004 -0.002855189
## 991006 0.049104951
## 991007 -0.027439658
## ..........................................................
msu_gw<-run_gwa(msu_gb, msu_filter$geno)
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.0009559271 7.477286e-06
## ASGA0000014 0.0052428948 6.803276e-06
## ASGA0000021 0.0009625046 7.473270e-06
## ALGA0000009 0.0007970479 7.327064e-06
## ALGA0000014 0.0006311137 7.435277e-06
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 4 4 5 5 8
## attr(,"class")
## [1] "summary.gwas"
## .........................................
#MARC
mac_geno<-read_genotypes(geno_name3)
## -------------------------------
## Read genotype file https://www.dropbox.com/s/gx4jxoumij5lhtv/genos_marc.csv?dl=1
## sample output:
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 200521304 1 1 1 1 1
## 200521402 1 1 1 1 1
## 200521405 2 2 0 0 0
## 200521502 2 2 0 1 1
## 200521504 1 2 1 2 2
## number of subjects: 1234
## number of SNP: 37069
## -------------------------------
mac_pheno<-read_pheno(pheno_name3, source_pop3, id_name3,fe_cov=fe_cov3,
cg_name=cg_name3, phenotype = phenotype3)
## -------------------------------
## Read phenotype file https://www.dropbox.com/s/lksmz0izaaifgxw/pheno_marc.csv?dl=1
## sample output:
## age cg ph
## 200521304 189 4 5.9
## 200521402 201 7 5.6
## 200521405 224 11 5.8
## 200521502 207 8 5.9
## 200521504 207 8 5.9
## 200521601 215 10 5.8
## number of subjects: 1237
## deleting rows with missing values
## -------------------------------
## number of subjects with complete observations: 531
## -------------------------------
## Phenotypic summary
## age cg ph
## Min. :160.0 24 : 26 Min. :5.400
## 1st Qu.:186.0 13 : 25 1st Qu.:5.700
## Median :196.0 14 : 25 Median :5.800
## Mean :196.3 20 : 25 Mean :5.812
## 3rd Qu.:207.0 21 : 25 3rd Qu.:5.900
## Max. :233.0 22 : 25 Max. :7.000
## (Other):380
## -------------------------------
mac_mp<- read_map(map_name3)
## ..................................
## Summary of map:
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
## .................................
mac_filter <- filter_and_match(mac_geno, mac_pheno, map=mac_mp, n_maf = MAF,
n_row=na_per_row, n_col=na_per_col)
## -------------------------------
## number of deleted row: 0
## number of deleted column: 0
## number of deleted MAF: 0
## -------------------------------
## Sample Output for genotype:
## [1] 530 37069
## MARC0044150 ASGA0000014 ASGA0000021 ALGA0000009 ALGA0000014
## 200521304 1 1 1 1 1
## 200521402 1 1 1 1 1
## 200521405 2 2 0 0 0
## 200521502 2 2 0 1 1
## 200521504 1 2 1 2 2
## Sample Output for phenotype:
## age cg ph
## 200521304 189 4 5.9
## 200521402 201 7 5.6
## 200521405 224 11 5.8
## 200521502 207 8 5.9
## 200521504 207 8 5.9
## number of subjects for genotype: 530
## number of subjects for phenotype: 530
## -------------------------------
## [1] "A map file was provided!"
## chr pos
## MARC0044150 1 286933
## ASGA0000014 1 342481
## ASGA0000021 1 489855
## ALGA0000009 1 538161
## ALGA0000014 1 565627
## H3GA0000032 1 573088
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 4409 2545 1867 2648 1611 2370 2283 1901 2293 1368 1335 1217 2905 2849 2004 1416
## 17 18
## 1211 837
mac_mat <- read_G_matrix(mac_filter$geno)
## ---------------------------------
## Genomic relationship matrix
## [1] 530 530
## Summary for diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.8928 0.9541 0.9756 0.9821 1.0093 1.1465
## ---------------------------------
## Summary for off-diagonal:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.158444 -0.053681 -0.023830 -0.001857 0.017487 0.706645
## ---------------------------------
## Normalized Z matrix:
## [1] 530 37069
mac_gb <- model_gb(mac_mat,mac_filter$pheno,phenotype3,fixed=c(fe_cov3),
random=cg_name3)
## ..........................................................
## summary gblup analysis of trait: ph
##
## fixed effects equation:
## y ~ age
##
## random effects equation:
## ~G + cg + In
##
## log-likelihood: 701.4861 converged in: 5 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 5.9457267074 0.1572708952 37.8056391 0.0000000
## age -0.0006709545 0.0007975558 -0.8412635 0.4002004
##
## estimated variance components:
## Estimate StdError prop.var
## G 0.006117629 0.002164584 0.2097627
## cg 0.003727272 0.001397493 0.1278016
## In 0.019319621 0.001899998 0.6624357
##
## breeding value predicted for 530 first 5 animals shown:
## [,1]
## 200521304 0.03097201
## 200521402 -0.04838265
## 200521405 -0.01679525
## 200521502 -0.02663800
## 200521504 -0.04140536
## 200521601 -0.03101332
## ..........................................................
mac_gw<-run_gwa(mac_gb, mac_filter$geno)
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.005274609 1.884942e-05
## ASGA0000014 -0.004839716 1.115949e-05
## ASGA0000021 0.005274609 1.884942e-05
## ALGA0000009 0.005915184 1.895690e-05
## ALGA0000014 0.005915184 1.895690e-05
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 0 0 0 0 0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
meta <- meta_gp(com_gb, com_filter$geno, msu_gb, msu_filter$geno, mac_gb,
mac_filter$geno)
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.0259500989 0.0001663922
## ASGA0000014 -0.0121420243 0.0001336983
## ASGA0000021 0.0153958497 0.0001658754
## ALGA0000009 0.0007611284 0.0001531351
## ALGA0000014 0.0007625065 0.0001531344
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 5 6 7 7 7
## attr(,"class")
## [1] "summary.gwas"
## .........................................
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.0009559271 7.477286e-06
## ASGA0000014 0.0052428948 6.803276e-06
## ASGA0000021 0.0009625046 7.473270e-06
## ALGA0000009 0.0007970479 7.327064e-06
## ALGA0000014 0.0006311137 7.435277e-06
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 4 4 5 5 8
## attr(,"class")
## [1] "summary.gwas"
## .........................................
## .........................................
## Estimated SNP Effect and its variance:
## ghat var_ghat
## MARC0044150 -0.005274609 1.884942e-05
## ASGA0000014 -0.004839716 1.115949e-05
## ASGA0000021 0.005274609 1.884942e-05
## ALGA0000009 0.005915184 1.895690e-05
## ALGA0000014 0.005915184 1.895690e-05
## .........................................
## Summary of GWA:
## <0.001 <0.01 <0.025 <0.05 <0.1
## signif.values 0 0 0 0 0
## attr(,"class")
## [1] "summary.gwas"
## .........................................
meta$table
| SE_Meta | Z_Meta | p_Meta | \(\beta_Meta\) | \(\hat{g}_Meta\) | |
|---|---|---|---|---|---|
| MARC0044150 | 0.5691040 | -2.1916848 | 0.0284023 | -1.2472965 | -0.0280941 |
| ASGA0000014 | 0.6373828 | -0.1952363 | 0.8452079 | -0.1244403 | -0.0022345 |
| ASGA0000021 | 0.5696505 | 1.5758466 | 0.1150612 | 0.8976818 | 0.0201806 |
| ALGA0000009 | 0.5838699 | 0.7643431 | 0.4446628 | 0.4462769 | 0.0095500 |
| ALGA0000014 | 0.5826945 | 0.7308853 | 0.4648492 | 0.4258828 | 0.0091503 |
| H3GA0000032 | 0.6059635 | -1.6794179 | 0.0930706 | -1.0176659 | -0.0202182 |
par(mfrow= c(1, 3))
par(mar=c(6,5,6,0))
plot(com_gw[, 1], meta$ghat, xlab = expression(hat(g)[COM]),
ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="orangered3")
plot(msu_gw[, 1], meta$ghat, xlab = expression(hat(g)[MSU]),
ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="gold3")
plot(mac_gw[, 1], meta$ghat, xlab = expression(hat(g)[MAC]),
ylab = expression(hat(g)[Meta]), cex.lab= 1.5, col="forestgreen")
mtext("Individual SNP effect vs Meta-SNP effect", side =3, line= -4, outer = TRUE)
plot((meta$beta_Meta/meta$SE_Meta),(meta$ghat/sqrt(meta$var_ghat)), xlab = "FIXED_SNP",
ylab = "GBLUP_SNP")
abline(a=0, b=1, col="red")