\(Date:\ 19\ March\ 2020\)

\(Description\ code:\)

This code and document, make the genomic relationship matrices for each of MSU and Scotland pig populations with the 11.1 version of marker map, and after it build the big block diagonal matrix G join both populations

1.Compute M, Z standardized matrices and G matrix for MSU population

\(The\ \mathbf{G}\ matrix\ for\ MSU\ population\ was\ computed\ as\ VanRadem (2008):\)

\[\mathbf{Z}=\frac{\mathbf{M}-\mathbf{F}}{\sqrt{2p_i(1-p_i)}} \ and\ \mathbf{G}=\mathbf{ZZ'}\]

# load marker-map MSU-Scotland populations
load("../../map_MSU_Scotland.Rdata")
## 1.1. load genomatrix MSU population contains NA elements for imputation
load("../../genomatrix_MSU.Rdata")

## 1.2. Imputation of genotypes with the expected allelic dosage of SNP  
geno_na<-genofinal.MSU
pos_NA<-which(is.na(geno_na), arr.ind = T) ### position of NA elements in the matrix

## 1.2.1. Imputation of the elements by the mean of each SNP to compute the matrix M
for (j in 1:ncol(geno_na)) {
  geno_na[is.na(geno_na[ ,j]), j] <-mean(geno_na[ ,j], na.rm = T)
}            
M<-geno_na
idmsu<-map.MSU.Scotland$snp_name
M<-M[,idmsu]
M[1:5,1:5]

##     1_10673082 1_10723065 1_11407894 1_11426075 1_13996200
## 265          2          0          0          2          0
## 284          0          0          0          2          0
## 311          1          0          0          2          0
## 336          0          0          0          2          0
## 343          1          0          0          2          0

summary(M[upper.tri(M)])

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   1.000   1.009   2.000   2.000

## 1.2.2. Check NA replacement by comparison  mean M vs mean geno_na matrix
summary(colMeans(M))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1001  0.6200  1.0093  1.0091  1.4056  1.8999

summary(colMeans(geno_na,na.rm=T)) ###  expected allelic dosage of SNP

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1001  0.6200  1.0093  1.0091  1.4056  1.8999

plot(colMeans(M),colMeans(geno_na[,idmsu],na.rm=T))
abline(0,1,col="red")

# 1.3. Construct of the standardized marker matrix Z, (VanRaden 2008)    

## 1.3.1. Construc of matrix F, where the columns of F are allele frequencies expressed as
# Fi = 2pi(1-pi) ,  pi is the MAF of locus i

all_frq<-colMeans(M)/2    ### allele frequencies
all_f_e<-2*all_frq
sd_all_f<-all_f_e*(1-all_frq)   
es_f<-sum(sd_all_f)

M_F<-(sweep(M,2,all_f_e,"-"))  #### Computation (M - F)
M_F[1:5,1:5]

##     1_10673082 1_10723065 1_11407894 1_11426075 1_13996200
## 265  1.4995366 -0.2520853 -0.4637546  0.3670065  -0.102873
## 284 -0.5004634 -0.2520853 -0.4637546  0.3670065  -0.102873
## 311  0.4995366 -0.2520853 -0.4637546  0.3670065  -0.102873
## 336 -0.5004634 -0.2520853 -0.4637546  0.3670065  -0.102873
## 343  0.4995366 -0.2520853 -0.4637546  0.3670065  -0.102873

## 1.3.2. Computation of Z matrix Standardized
Z_stan<-(M_F)/sqrt(es_f)   
dim(Z_stan)

## [1]  1079 45978

Z_stan[1:5,1:5]

##       1_10673082  1_10723065   1_11407894  1_11426075    1_13996200
## 265  0.011266275 -0.00189396 -0.003484268 0.002757382 -0.0007729027
## 284 -0.003760067 -0.00189396 -0.003484268 0.002757382 -0.0007729027
## 311  0.003753104 -0.00189396 -0.003484268 0.002757382 -0.0007729027
## 336 -0.003760067 -0.00189396 -0.003484268 0.002757382 -0.0007729027
## 343  0.003753104 -0.00189396 -0.003484268 0.002757382 -0.0007729027

ZT<-t(Z_stan)  #### traspose of Z matrix (Z')

# 1.4. Computation of genomic relationship matrix  (G), G = ZZ'     
Gmsu<-Z_stan%*%ZT
dim(Gmsu)

## [1] 1079 1079

Gmsu[1:5,1:5]

##              265         284         311         336          343
## 265  1.022518441 -0.03101190 -0.04035183 -0.02177595 -0.009979937
## 284 -0.031011902  0.98020913 -0.04915424 -0.06621095 -0.017859642
## 311 -0.040351832 -0.04915424  1.00786365  0.15276562  0.103933198
## 336 -0.021775951 -0.06621095  0.15276562  0.91574822  0.233679409
## 343 -0.009979937 -0.01785964  0.10393320  0.23367941  1.020037617

summary(diag(Gmsu))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8367  0.9552  0.9858  0.9916  1.0211  1.2203

summary(Gmsu[upper.tri(Gmsu)])

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.1634645 -0.0385499 -0.0125019 -0.0009199  0.0188589  0.8022920

summary(eigen(Gmsu)$values)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1803  0.3580  0.9916  0.8042 30.9371

save(Gmsu, file = "Gmsu.Rdata")

2.Compute M, Z standardized matrices and G matrix for Scotland population by line

\(The\ G\ matrix\ for\ Scotland\ population\ was\ builded\ by\ each\ line,and\ computed\ as\ follow:\)

\(\mathbf{G}=\frac{\mathbf{ZZ'}}{tr(\mathbf{ZZ'})/n}\\ where\ tr\ is\ the\ trace\ operator\ and\ n\ is\ the\ individuals\)

## 2.1. load genomatrix Scotland population contains NA elements for imputation
load("../../genomatrix_Scotland.Rdata")
# load phenotypes with ids by Line
load("../../../8_Phenomics_Scottish/Work_Scotland/2_Behavior_matrices/phenotypes_2_camera_number.Rdata")

# 2.2.Select individuals by line and compute Marker matrix by line

# 2.2.1. Individuals with lesion count phenotype, lines within pen
table(phenotype$Line)

## 
##  1942 28042 32742 33742 40842 
##   290   318   347   417   463

lphe<-phenotype%>%arrange(Line)%>%as.data.frame()
rownames(lphe)<-lphe$Ident
l<-unique(lphe$Line)
id.line<-list()
for (i in 1:length(l)) {
  id.line[i]<-lphe%>%filter(Line==l[i])%>%select(Ident)
  names(id.line)[i]<-l[i]}

# 2.2.2. Caculate of denominator Zstand genotypes by Line (M matrix by line)

# 2.2.2.1. list with common individuals by Line and genomatrix
genoidc.na<-genomatrix.Scot
dim(genoidc.na)

## [1]  1830 45978

idg<-list() 
for (i in 1:length(id.line)) {
  idg[[i]]<-intersect(id.line[[i]], rownames(genoidc.na))
  names(idg)[i]<-names(id.line)[i]
}

# 2.2.2.2. list with genotype matrix of individuals by Line
line.mat.na<-list()
for (i in 1:length(idg)) {
  line.mat.na[[i]]<-genoidc.na[idg[[i]],]
  names(line.mat.na)[i]<-names(idg)[i]
}
lapply(line.mat.na, function(x)dim(x))

## $`1942`
## [1]   290 45978
## 
## $`28042`
## [1]   317 45978
## 
## $`32742`
## [1]   347 45978
## 
## $`33742`
## [1]   417 45978
## 
## $`40842`
## [1]   459 45978

# 2.2.2.3. Imputation of genotypes with the expected allelic dosage of SNP and Compute M matrix By Line 
#   Imputation of the elements by the mean of each SNP to compute the matrix M

imput.gen <- function(x) {
  if(class(x)!="matrix")stop("x must be class matrix")
  for (j in 1:ncol(x)) {
    x[is.na(x[ ,j]), j] <-mean(x[ ,j], na.rm = T)
  }            
  return(x)
}

M.bl<-lapply(line.mat.na,imput.gen)
lapply(M.bl,function(x)dim(x))

## $`1942`
## [1]   290 45978
## 
## $`28042`
## [1]   317 45978
## 
## $`32742`
## [1]   347 45978
## 
## $`33742`
## [1]   417 45978
## 
## $`40842`
## [1]   459 45978

# 2.3. Construc of matrix F, where the columns of F are allele frequencies 
# expressed as 2pi(1-pi) ,  pi is the MAF of locus i, 

den.mat <- function(x) {
  
  if(class(x)!="matrix")stop("x must be class matrix")
  all.frq<-colMeans(x)/2        # allelic frequency pi
  all.f_e<-2*all.frq            # expected allelic frequency (2pi)
  sd.all_f<-all.f_e*(1-all.frq) # variance allelic frequency 2pi(1-pi) 
  es_f<-sum(sd.all_f)           # sum(2pi(1-pi))
  denom<-sqrt(es_f)             # denominator 
  
  return(list(denom=denom, all.frq=all.frq))
}

deno.by.line<-lapply(M.bl,den.mat)
lapply(deno.by.line,function(x)summary(x$all.frq))

## $`1942`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3207  0.5000  0.5005  0.6810  1.0000 
## 
## $`28042`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3107  0.5000  0.5007  0.6909  1.0000 
## 
## $`32742`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3180  0.5014  0.5014  0.6859  1.0000 
## 
## $`33742`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3285  0.5000  0.5005  0.6739  1.0000 
## 
## $`40842`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.3231  0.4989  0.4997  0.6783  1.0000

# 2.4. New Z Standardized by line
# 2.4.1. Allelic frequecy matrix (af)

af<-rbind(deno.by.line$`1942`$all.frq,
          deno.by.line$`28042`$all.frq,
          deno.by.line$`32742`$all.frq,
          deno.by.line$`33742`$all.frq,
          deno.by.line$`40842`$all.frq)
dim(af)

## [1]     5 45978

rownames(af)<-c("1942","28042","32742","33742","40842")
af[,1:5]

##        12785117  12785118  12785120  12785121  12785124
## 1942  0.5086207 0.7854671 0.6862069 0.6793103 0.7051724
## 28042 0.6987382 0.8059937 0.7744479 0.7689873 0.7428571
## 32742 0.5592486 0.8265896 0.8126801 0.7982709 0.7991202
## 33742 0.6115108 0.7949640 0.7494005 0.7218225 0.7652068
## 40842 0.5631808 0.6397380 0.6666667 0.6677560 0.7280702

# 2.4.2. Vector with rownames of genotype matrix (id individuals)
rownm.genom<-as_tibble(as.numeric(rownames(genomatrix.Scot)))

## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.

colnames(rownm.genom)<-"Ident"
# 2.4.3. Vector with Line of individual.
VL<-rownm.genom%>%left_join(lphe)%>%select(Ident,Line)

## Joining, by = "Ident"

vl<-as.data.frame(VL)
rownames(vl)<-vl$Ident
vl<-as.character(vl$Line)

# 2.4.4 Expected dosage matrix (ED)
ED<-af[vl,]
ED[1:5,1:5]

##        12785117  12785118  12785120  12785121  12785124
## 32742 0.5592486 0.8265896 0.8126801 0.7982709 0.7991202
## 32742 0.5592486 0.8265896 0.8126801 0.7982709 0.7991202
## 33742 0.6115108 0.7949640 0.7494005 0.7218225 0.7652068
## 33742 0.6115108 0.7949640 0.7494005 0.7218225 0.7652068
## 33742 0.6115108 0.7949640 0.7494005 0.7218225 0.7652068

af[,1:5]

##        12785117  12785118  12785120  12785121  12785124
## 1942  0.5086207 0.7854671 0.6862069 0.6793103 0.7051724
## 28042 0.6987382 0.8059937 0.7744479 0.7689873 0.7428571
## 32742 0.5592486 0.8265896 0.8126801 0.7982709 0.7991202
## 33742 0.6115108 0.7949640 0.7494005 0.7218225 0.7652068
## 40842 0.5631808 0.6397380 0.6666667 0.6677560 0.7280702

rownames(ED)<-VL$Ident

# 2.4.5. Marker matrix (M) and Z marker matrix
m<-rbind(M.bl[[1]],M.bl[[2]],M.bl[[3]],M.bl[[4]],M.bl[[5]])
idg<-rownames(genomatrix.Scot)
M<-m[idg,]
genomatrix.Scot[1:5,1:5]

##          12785117 12785118 12785120 12785121 12785124
## 64002915        1        2        2        2       NA
## 64015515        1        1        1        1        1
## 64032369        0        1        2        2        2
## 64032399        1        2        1        1       NA
## 64032400        1        2        1        1        2

M[1:5,1:5]

##          12785117 12785118 12785120 12785121 12785124
## 64002915        1        2        2        2 1.598240
## 64015515        1        1        1        1 1.000000
## 64032369        0        1        2        2 2.000000
## 64032399        1        2        1        1 1.530414
## 64032400        1        2        1        1 2.000000

sum(rownames(M)!=rownames(genomatrix.Scot))

## [1] 0

sum(rownames(M)!=rownames(ED))

## [1] 0

z<-M-2*ED
dim(z)

## [1]  1830 45978

z[1:5,1:5]

##            12785117   12785118   12785120   12785121   12785124
## 64002915 -0.1184971  0.3468208  0.3746398  0.4034582  0.0000000
## 64015515 -0.1184971 -0.6531792 -0.6253602 -0.5965418 -0.5982405
## 64032369 -1.2230216 -0.5899281  0.5011990  0.5563549  0.4695864
## 64032399 -0.2230216  0.4100719 -0.4988010 -0.4436451  0.0000000
## 64032400 -0.2230216  0.4100719 -0.4988010 -0.4436451  0.4695864

z[1,]%*%z[1,]

##          [,1]
## [1,] 15122.14

# 2.4.6. ZZ' matrix
Gl<-z%*%t(z)
Gl[1,1]

## [1] 15122.14

# 2.4.7. Standardization of ZZ' matrix and Make G matrix
dm<-sum(diag(Gl))/nrow(Gl) 
dm

## [1] 15580.52

G.scotl<-Gl/dm
dim(G.scotl)

## [1] 1830 1830

save(G.scotl, file = "G_scotland.Rdata")

# 2.4.7.1. Summary diagonals and off-diagonals elements and eigenvalues of G matrix
summary(diag(G.scotl))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7113  0.9570  1.0000  1.0000  1.0367  1.3782

summary(G.scotl[upper.tri(G.scotl)])

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.2782966 -0.0223671 -0.0040329 -0.0005467  0.0143230  1.0039055

summary(eigen(G.scotl)$values)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1888  0.3813  1.0000  0.8796 23.1428

# 2.4.7.2. Summary diagonals and off-diagonals elements of G matrix by line
# individuals in each Line
idx<-list()
for (i in 1:length(id.line)) {
  idx[[i]]<-intersect(id.line[[i]], colnames(G.scotl))
  names(idx)[i]<-names(id.line)[i]
}
G.line<-list()
for (i in 1:length(idx)) {
  G.line[[i]]<-G.scotl[idx[[i]],idx[[i]]]
  names(G.line)[i]<-names(idx)[i]
}

## Summary diagonals G by line
lapply(G.line, function(x)dim(x))

## $`1942`
## [1] 290 290
## 
## $`28042`
## [1] 317 317
## 
## $`32742`
## [1] 347 347
## 
## $`33742`
## [1] 417 417
## 
## $`40842`
## [1] 459 459

lapply(G.line,function(s)summary(diag(s)))

## $`1942`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9368  0.9912  1.0143  1.0161  1.0409  1.1252 
## 
## $`28042`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8272  0.8922  0.9125  0.9143  0.9346  0.9978 
## 
## $`32742`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7113  0.9458  0.9686  0.9692  0.9946  1.0781 
## 
## $`33742`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9688  1.0295  1.0597  1.0659  1.0950  1.2841 
## 
## $`40842`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.9050  0.9841  1.0041  1.0124  1.0297  1.3782

## Summary off-diagonals G by line
lapply(G.line,function(s)summary(s[upper.tri(s)]))

## $`1942`
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.161866 -0.058352 -0.032011 -0.003516  0.003896  0.633139 
## 
## $`28042`
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.138786 -0.048580 -0.024205 -0.002893  0.005411  0.587487 
## 
## $`32742`
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.155253 -0.049657 -0.025181 -0.002801  0.005855  1.003905 
## 
## $`33742`
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.161749 -0.052377 -0.023926 -0.002562  0.010772  0.873257 
## 
## $`40842`
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.159785 -0.047081 -0.022407 -0.002210  0.007936  0.865327

3. Make block diagonal matrix G with Msu and Scotland population

\(The\ block\ diagonal\ matrix\ G\ with\ both\ populations\ was\ computed\ as\ follow:\)

\[\mathbf{G} =\begin{bmatrix} \mathbf{G_{MSU}} & \mathbf{0} \\ \mathbf{0} & \mathbf{G_{Scot}} \end{bmatrix} \]

library(magic)

## Loading required package: abind

G.msu.scot<-adiag(Gmsu,G.scotl)
dim(G.msu.scot)

## [1] 2909 2909

save(G.msu.scot,file = "G_matrix_MSU_Scot.Rdata")

Make genomic relationship matrix (G) with MSU and Scotland pigs populations

1.Compute M, Z standardized matrices and G matrix for MSU population

2.Compute M, Z standardized matrices and G matrix for Scotland population by line

3. Make block diagonal matrix G with Msu and Scotland population