#setwd("C:/Users/steibelj/OneDrive/Documents/job/ans824/2015") # Set current working directory
setwd("C:/Users/Juan Steibel/SkyDrive/Documents/job/ans824/2015") # Set current working directory
rm(list=ls()) # Remove all objects from work environment
library(synbreed)
## Warning: package 'synbreed' was built under R version 3.0.3
## Loading required package: doBy
## Warning: package 'doBy' was built under R version 3.0.3
## Loading required package: survival
## Loading required package: splines
## Loading required package: BLR
## Loading required package: SuppDists
## Loading required package: regress
## Warning: package 'regress' was built under R version 3.0.3
## Loading required package: abind
library(synbreedData)
library(genetics)
## Warning: package 'genetics' was built under R version 3.0.3
## Loading required package: combinat
##
## Attaching package: 'combinat'
##
## The following object is masked from 'package:utils':
##
## combn
##
## Loading required package: gdata
## Warning: package 'gdata' was built under R version 3.0.3
## gdata: Unable to locate valid perl interpreter
## gdata:
## gdata: read.xls() will be unable to read Excel XLS and XLSX files
## gdata: unless the 'perl=' argument is used to specify the location
## gdata: of a valid perl intrpreter.
## gdata:
## gdata: (To avoid display of this message in the future, please
## gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLX' (Excel 97-2004) files.
##
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLSX' (Excel 2007+) files.
##
## gdata: Run the function 'installXLSXsupport()'
## gdata: to automatically download and install the perl
## gdata: libaries needed to support Excel XLS and XLSX formats.
##
## Attaching package: 'gdata'
##
## The following object is masked from 'package:stats':
##
## nobs
##
## The following object is masked from 'package:utils':
##
## object.size
##
## Loading required package: gtools
## Warning: package 'gtools' was built under R version 3.0.3
## Loading required package: MASS
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 3.0.3
##
##
## NOTE: THIS PACKAGE IS NOW OBSOLETE.
##
##
##
## The R-Genetics project has developed an set of enhanced genetics
##
## packages to replace 'genetics'. Please visit the project homepage
##
## at http://rgenetics.org for informtion.
##
##
##
##
## Attaching package: 'genetics'
##
## The following objects are masked from 'package:base':
##
## %in%, as.factor, order
library(regress)
let’s load cattle dataset and obtain a few descriptive statistics
data(cattle)
head(cattle$pedigree)
## ID Par1 Par2 gener
## 1 ID10001 0 0 0
## 2 ID10002 0 0 0
## 3 ID10003 0 0 0
## 4 ID10004 0 0 0
## 5 ID10005 0 0 0
## 6 ID10006 0 0 0
dim(cattle$pedigree)
## [1] 1929 4
head(cattle$covar)
## id phenotyped genotyped
## 1 ID10001 FALSE FALSE
## 2 ID10002 FALSE FALSE
## 3 ID10003 FALSE FALSE
## 4 ID10004 FALSE FALSE
## 5 ID10005 FALSE FALSE
## 6 ID10006 FALSE FALSE
sum(cattle$covar$phenotyped)
## [1] 500
sum(cattle$covar$genotyped)
## [1] 500
summary(cattle$pheno[,,1])
## Phenotype1 Phenotype2
## Min. :-50.07000 Min. :-424.43
## 1st Qu.:-10.25750 1st Qu.: -94.40
## Median : 0.13500 Median : -0.84
## Mean : -0.00104 Mean : 0.05
## 3rd Qu.: 10.35250 3rd Qu.: 96.99
## Max. : 49.41000 Max. : 422.63
Let’s compute the numerator relationship matrix\ This matrix contains: a) in the diagonal 1+Expected inbreeding coeffients and b) in the off-diagonals, the expected proportion of alleles shared identical by descent
The expected inbreeding and ibd sharing is computed conditional on pedigree information
Note Many other packages can be used to compute this matrix
Amat<-kin(cattle,ret="add")
dim(Amat)
## [1] 1929 1929
summary(diag(Amat-1)) #let's look at the distribution of inbreeding
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000000 0.0000000 0.0001944 0.0000000 0.1250000
summary(Amat) #a more general descriptive analysis
## dimension 1929 x 1929
## rank 1929
## range of off-diagonal values 0 -- 0.625
## mean off-diagonal values 0.002926408
## range of diagonal values 1 -- 1.125
## mean diagonal values 1.000194
## number of unique values 25
save(Amat,file="A_cattle.Rdata") #save to avoid costly re-computation
head(rownames(Amat)) #names of animals are kept in rows and columns, very useful for matching
## [1] "ID10001" "ID10002" "ID10003" "ID10004" "ID10005" "ID10006"
Extract the subset of A that corresponds to phenotyped animals and re-order it don’t assume the order matches!!! check it!
indx<-match(rownames(cattle$pheno[,,1]),rownames(Amat))
phenotypes<-cattle$pheno[,,1]
A_sub<-Amat[indx,indx]
sum(rownames(A_sub)!=rownames(phenotypes))
## [1] 0
We can now fit an animal model
In this case the only fixed effect is the population mean, because data are pre-corrected
lmm<-regress(phenotypes[,1]~1,~A_sub)
summary(lmm)
## Likelihood kernel: K = (Intercept)
##
## Maximized log likelihood with kernel K is -1613.348
##
## Linear Coefficients:
## Estimate Std. Error
## (Intercept) 0.526 1.009
##
## Variance Coefficients:
## Estimate Std. Error
## A_sub 99.565 40.589
## In 142.788 37.391
names(lmm)
## [1] "trace" "llik" "cycle"
## [4] "rdf" "beta" "beta.cov"
## [7] "beta.se" "sigma" "sigma.cov"
## [10] "W" "Q" "fitted"
## [13] "predicted" "predictedVariance" "predictedVariance2"
## [16] "pos" "Vnames" "formula"
## [19] "Vformula" "Kcolnames" "model"
## [22] "Z" "X" "Sigma"
lmm$sigma[1]/(sum(lmm$sigma)) #estimate heritability
## A_sub
## 0.4108264
To select the best bulls we could estimate breeding values
And then sort them
EBV<-BLUP(lmm)
head(sort(EBV$Mean,decreasing=T),10)
## A_sub.ID11821 A_sub.ID11451 A_sub.ID11738 A_sub.ID11677 A_sub.ID11530
## 21.12254 16.54415 15.69283 14.72569 14.20865
## A_sub.ID11496 A_sub.ID11563 A_sub.ID11645 A_sub.ID11814 A_sub.ID11620
## 13.60020 13.35008 13.34789 13.25690 12.97709
But notice this prediction only involves the animals with records. It is more interesting to include all pedigreed animals. To get an EBV (estimated breeding value) for everyone in the pedigree!
It can be done with regress, but it is a bit more involved. A much better package to deal with that is pedigreemm
notice: there is a class conflict between pedigreemm and synbreeed, no big deal if you are aware of it
Due to time constraints, we will skip this task, but if you are interested, come see me and we could implement it togetehr (outside class hours)
###Breeding values of individuals without records An alternative solution is to derive the other animals’ EBV using multivariate normal distribution theory and to condition the expected breeding value of animals without records on EBV of animals with records.
\[
\begin{bmatrix}
EBV_{no.record}\\
EBV_{record}
\end{bmatrix} \sim N \left ( \begin{bmatrix} 0\\
0 \end{bmatrix} , \sigma _{a}^{2}
\begin{bmatrix}
A_{no.record,no.record} & A_{no.record,record} \\
A_{record,no.record} & A_{record,record}
\end{bmatrix} \right )
\]
Consequently:
\[ E(EBV_{no.record} | \widehat{EBV}_{record}) = A_{no.record,record} A_{record,record}^{-1} \widehat{EBV}_{record} \]
EBV_norec<- Amat[-indx,indx]%*%solve(A_sub)%*%EBV$Mean
head(sort(EBV_norec[,1],decreasing=T),10)
## ID10764 ID10880 ID10897 ID11399 ID10906 ID11416 ID11299
## 13.208223 11.240781 10.560951 9.678954 9.299042 8.885645 8.841818
## ID10770 ID10772 ID11110
## 8.754462 8.749780 8.381084
We now could bind together both predictions and get an overall ranking.
EBV_all<-c(EBV$Mean,EBV_norec[,1])
head(sort(EBV_all,decreasing=T),10)
## A_sub.ID11821 A_sub.ID11451 A_sub.ID11738 A_sub.ID11677 A_sub.ID11530
## 21.12254 16.54415 15.69283 14.72569 14.20865
## A_sub.ID11496 A_sub.ID11563 A_sub.ID11645 A_sub.ID11814 ID10764
## 13.60020 13.35008 13.34789 13.25690 13.20822
Notice a problem with the labeling… it would be easy to solve using string functions
I will show this because it is a good way of practicing writing functions and using sapply
we know:
nms<-names(EBV$Mean)
head(nms) #we need to extract second word after '.'
## [1] "A_sub.ID11430" "A_sub.ID11431" "A_sub.ID11432" "A_sub.ID11433"
## [5] "A_sub.ID11434" "A_sub.ID11435"
strsplit(nms[1],"\\.")
## [[1]]
## [1] "A_sub" "ID11430"
strsplit(nms[1],"\\.")[[1]][2] #this is it!
## [1] "ID11430"
new_name<-sapply(nms,function(x) strsplit(nms[1],"\\.")[[1]][2])
head(new_name)
## A_sub.ID11430 A_sub.ID11431 A_sub.ID11432 A_sub.ID11433 A_sub.ID11434
## "ID11430" "ID11430" "ID11430" "ID11430" "ID11430"
## A_sub.ID11435
## "ID11430"
names(EBV$Mean)<-new_name #let's rename it
EBV_all<-c(EBV$Mean,EBV_norec[,1])
head(sort(EBV_all,decreasing=T),10) #what we wanted!!!
## ID11430 ID11430 ID11430 ID11430 ID11430 ID11430 ID11430 ID11430
## 21.12254 16.54415 15.69283 14.72569 14.20865 13.60020 13.35008 13.34789
## ID11430 ID10764
## 13.25690 13.20822