setwd("C:/Users/steibelj/OneDrive/Documents/job/ans824/2017") # Set current working directory
rm(list=ls()) # Remove all objects from work environment
library(synbreed)
library(synbreedData)
library(regress)
library(gwaR)
let’s load mice dataset and impute
data(mice)
is.heter<-function(x){
substr(x,1,1)!=substr(x,3,3)
}
miceCimp<-codeGeno(mice, label.heter = is.heter,impute=T,impute.type="random",
maf=0.05,nmiss=0.01,verbose=T)
## step 1 : 400 marker(s) removed with > 1 % missing values
## step 2 : Recoding alleles
## step 4 : 2148 marker(s) removed with maf < 0.05
## step 7 : Imputing of missing values
## step 7d : Random imputing of missing values
## step 8 : Recode alleles due to imputation
## step 9 : 0 marker(s) removed with maf < 0.05
## step 10 : No duplicated markers removed
## End : 9997 marker(s) remain after the check
##
## Summary of imputation
## total number of missing values : 20141
## number of random imputations : 20141
summary(miceCimp)
## object of class 'gpData'
## covar
## No. of individuals 2527
## phenotyped 2527
## genotyped 1940
## pheno
## No. of traits: 2
##
## weight growth.slope
## Min. :11.9 Min. :-0.08889
## 1st Qu.:17.8 1st Qu.: 0.04556
## Median :19.9 Median : 0.08024
## Mean :20.3 Mean : 0.08659
## 3rd Qu.:22.6 3rd Qu.: 0.12569
## Max. :30.2 Max. : 0.26408
## NA's :16 NA's :53
##
## geno
## No. of markers 9997
## genotypes 1 0 2
## frequencies 0.367 0.532 0.101
## NA's 0.000 %
## map
## No. of mapped markers 9997
## No. of chromosomes 20
##
## markers per chromosome
##
## 1 10 11 12 13 14 15 16 17 18 19 2 3 4 5 6 7 8
## 875 332 646 490 410 435 427 437 375 346 248 801 757 716 553 628 509 479
## 9 X
## 527 6
##
## pedigree
## NULL
Let’s compute genomic relationship matrix, and check dimension of phenotypes and genotypes
Gmat<-kin(miceCimp,ret="realized")
plot(Gmat)
summary(Gmat)
## dimension 1940 x 1940
## rank 1939
## range of off-diagonal values -0.2797885 -- 1.218987
## mean off-diagonal values -0.0005252645
## range of diagonal values 0.8121 -- 1.325
## mean diagonal values 1.018488
## number of unique values 1871722
dim(Gmat)
## [1] 1940 1940
miceCimp$covar$CageDensity<-as.factor(miceCimp$covar$CageDensity)
miceCimp$covar$birthYear<-as.factor(miceCimp$covar$birthYear)
miceCimp$covar$birthMonth<-as.factor(miceCimp$covar$birthMonth)
gb<-gblup(rsp="weight",data=miceCimp,design=c(y~CageDensity+sex+birthYear),G=Gmat)
print(gb)
## gblup analysis of trait: weight
##
## fixed effects equation:
## y ~ CageDensity + sex + birthYear
##
## random effects equation:
## ~G + In
##
## log-likelihood: -2496.608 converged in: 7 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 18.6358719 0.5693513 32.7317636 0.000000e+00
## CageDensity3 -1.0177320 0.5142108 -1.9792114 4.779221e-02
## CageDensity4 -1.1366604 0.4874889 -2.3316641 1.971837e-02
## CageDensity5 -1.3513646 0.4914227 -2.7499028 5.961295e-03
## CageDensity6 -1.5536356 0.4906843 -3.1662632 1.544110e-03
## CageDensity7 -2.2883748 0.5719152 -4.0012487 6.300910e-05
## sexM 4.2143385 0.1006135 41.8864280 0.000000e+00
## birthYear2003 0.3318858 0.3335067 0.9951399 3.196682e-01
## birthYear2004 1.9788235 0.3575527 5.5343551 3.123755e-08
##
## estimated variance components:
## Estimate StdError prop.var
## G 2.123722 0.2697555 0.3509632
## In 3.927402 0.1536502 0.6490368
varcomp(gb)
## Estimate StdError prop.var se
## G 2.123722 0.2697555 0.3509632 0.03440950
## In 3.927402 0.1536502 0.6490368 0.03756528
EBVg<-summary(gb)$uhat
Compare GEBV to phenotypes and to corrected phenotypes
plot(EBVg,gb$model$y)
abline(0,1)
cor(gb$model$y,EBVg)
## [,1]
## [1,] 0.554764
Ycor<-gb$ehat #this is the phenotype minus fixed effects
plot(EBVg~Ycor)
abline(0,1)
cor(Ycor,EBVg)
## [,1]
## [1,] 0.6867677
Let’s set aside some observations to prediction and refit model.
#random vector of missing values
Nobs<-nrow(miceCimp$pheno)
Nmis<-round(Nobs*0.1) #10% missing
tomissing<-sample(rownames(miceCimp$pheno),Nmis)
delG<-!(rownames(Gmat)%in%tomissing)
Gmiss<-Gmat[delG,delG]
gbmiss<-gblup(rsp="weight",data=miceCimp,design=c(y~CageDensity+sex+birthYear),G=Gmiss)
print(gbmiss)
## gblup analysis of trait: weight
##
## fixed effects equation:
## y ~ CageDensity + sex + birthYear
##
## random effects equation:
## ~G + In
##
## log-likelihood: -2245.489 converged in: 7 iterations
##
## estimated fixed effects:
## Estimate StdError test.st p.value
## (Intercept) 18.8168901 0.6577242 28.6090878 0.000000e+00
## CageDensity3 -1.1199982 0.5850741 -1.9142842 5.558385e-02
## CageDensity4 -1.1850424 0.5586844 -2.1211304 3.391083e-02
## CageDensity5 -1.4599564 0.5617177 -2.5990929 9.347048e-03
## CageDensity6 -1.6767818 0.5629528 -2.9785476 2.896181e-03
## CageDensity7 -2.3589715 0.6339190 -3.7212509 1.982384e-04
## sexM 4.2336245 0.1050824 40.2886268 0.000000e+00
## birthYear2003 0.2094526 0.3526029 0.5940183 5.524999e-01
## birthYear2004 1.9067436 0.3768627 5.0595181 4.203174e-07
##
## estimated variance components:
## Estimate StdError prop.var
## G 2.070753 0.2754597 0.3489157
## In 3.864071 0.1609608 0.6510843
varcomp(gbmiss)
## Estimate StdError prop.var se
## G 2.070753 0.2754597 0.3489157 0.03621222
## In 3.864071 0.1609608 0.6510843 0.03973085