Load matrices

Notice that matrices are not stored in binary format and rows and columns are not properly labeled, so some manipulation is needed. It would be safer so store in R the matrices used in all computations using the ##save## command.

rm(list=ls())
setwd("C:\\Users\\steibelj\\OneDrive\\Documents\\kaitlin")
library(regress)
library(gwaR)
load(file="lession_plus.Rdata")
load("Z_and_G_matrices.Rdata")
load("Weight_Gain.RData")

tb<-read.table("C:\\Users\\steibelj\\OneDrive\\Documents\\phenomics\\matGIBD.txt", header=T)
colnames(tb)<-rownames(tb)
tb[1:5,1:5]
##                 1           10          100        1000         1001
## 1    1.0000000000 0.1695529305 0.0000000000 0.005587352 0.0007127601
## 10   0.1695529305 1.0000000000 0.0003662446 0.000000000 0.0025992569
## 100  0.0000000000 0.0003662446 1.0000000000 0.005002218 0.0155715958
## 1000 0.0055873521 0.0000000000 0.0050022178 1.000000000 0.1562584191
## 1001 0.0007127601 0.0025992569 0.0155715958 0.156258419 1.0000000000
Gibd<-as.matrix(tb[rownames(G),rownames(G)])

tb<-read.table("C:\\Users\\steibelj\\OneDrive\\Documents\\kaitlin\\matrizA.txt", header=F)
tb[1:5,1:5]
##     V1 V2 V3 V4   V5
## 1    1  1  0  0 0.00
## 2   10  0  1  0 0.00
## 3  100  0  0  1 0.00
## 4 1000  0  0  0 1.00
## 5 1001  0  0  0 0.25
rownames(tb)<-tb[,1]

dim(tb)
## [1] 1079 1080
tb<-tb[,-1]
tb[1:5,1:5]
##      V2 V3 V4   V5   V6
## 1     1  0  0 0.00 0.00
## 10    0  1  0 0.00 0.00
## 100   0  0  1 0.00 0.00
## 1000  0  0  0 1.00 0.25
## 1001  0  0  0 0.25 1.00
colnames(tb)<-rownames(tb)
A<-as.matrix(tb[rownames(G),rownames(G)])

Check matrices against each other

plot(Gibd[upper.tri(Gibd)],G[upper.tri(G)],
     xlab="G-UBA",ylab="G Van Raden",
     pch=19)
abline(0,1,col="red")

boxplot(Gibd[upper.tri(Gibd)]~A[upper.tri(A)],
     xlab="A",ylab="G-UBA")

boxplot(G[upper.tri(G)]~A[upper.tri(A)],
     xlab="A",ylab="G-Van Raden")

Check diagonals and off-diagonals

summary(diag(G))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8357  0.9560  0.9859  0.9917  1.0200  1.2200
summary(diag(Gibd))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
summary(diag(A))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
summary(G[upper.tri(G)])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.16350 -0.03831 -0.01247 -0.00092  0.01863  0.80390
summary(Gibd[upper.tri(Gibd)])
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0003963 0.0050550 0.0032570 0.2914000
summary(A[upper.tri(A)])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.000000 0.000000 0.007228 0.000000 0.500000

check estimated molecular relationships against pedigree relationships

Notice somethig is wrong here. the molecular relationships don’t match A at all. Gibd scare for off diagonals is not correct (there are a bunch of full sibs here and no Gibd is 0.5).

by(G[upper.tri(G)],A[upper.tri(A)],summary)
## A[upper.tri(A)]: 0
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.163500 -0.038420 -0.012660 -0.001697  0.018210  0.803900 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.0625
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.127400 -0.039250 -0.012160  0.002644  0.020130  0.592800 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.125
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -0.1371000 -0.0396400 -0.0139000 -0.0007878  0.0207200  0.6664000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.25
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.151700 -0.034200 -0.005945  0.018680  0.032240  0.712600 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.3125
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.05955 -0.02358  0.01785  0.05152  0.06547  0.56660 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.5
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.12810 -0.02419  0.01302  0.08201  0.09165  0.67220
by(Gibd[upper.tri(Gibd)],A[upper.tri(A)],summary)
## A[upper.tri(A)]: 0
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0003819 0.0048320 0.0032080 0.2914000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.0625
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0006758 0.0062200 0.0038310 0.2166000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.125
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0002451 0.0049710 0.0032480 0.2442000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.25
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0000000 0.0008561 0.0102700 0.0052350 0.2598000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.3125
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0000000 0.0001636 0.0029550 0.0164000 0.0097900 0.2161000 
## -------------------------------------------------------- 
## A[upper.tri(A)]: 0.5
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.000000 0.000000 0.001911 0.030070 0.016140 0.268700

final check

Load the pedigree and compute A matrix

load("C:\\Users\\steibelj\\OneDrive\\Documents\\phenomics\\ped_cleaned.Rdata")
library(kinship2)
## Warning: package 'kinship2' was built under R version 3.3.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.3.3
## Loading required package: quadprog
KA<-2*kinship(id=ped_cleaned$ID,dadid=ped_cleaned$Par1,momid=ped_cleaned$Par2)
dim(KA)
## [1] 1456 1456
A1<-KA[colnames(G),rownames(G)]
dim(A1)
## [1] 1079 1079
by(G[upper.tri(G)],A1[upper.tri(A1)],summary)
## A1[upper.tri(A1)]: 0
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.163500 -0.039390 -0.014390 -0.009874  0.014360  0.540700 
## -------------------------------------------------------- 
## A1[upper.tri(A1)]: 0.0625
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.10090  0.01206  0.04610  0.05155  0.08693  0.34870 
## -------------------------------------------------------- 
## A1[upper.tri(A1)]: 0.125
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.07430  0.06064  0.09625  0.09808  0.13470  0.32960 
## -------------------------------------------------------- 
## A1[upper.tri(A1)]: 0.25
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.09455  0.19720  0.23560  0.23060  0.27380  0.60270 
## -------------------------------------------------------- 
## A1[upper.tri(A1)]: 0.3125
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2381  0.2829  0.3125  0.3170  0.3577  0.4119 
## -------------------------------------------------------- 
## A1[upper.tri(A1)]: 0.5
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.06902  0.44020  0.48350  0.47890  0.52800  0.80390
boxplot(G[upper.tri(G)]~A1[upper.tri(A1)],
        xlab="A",ylab="G-Van Raden")
points(c(0,0.0625,0.125,0.25,0.3125,0.5),pch=19,cex=1.5,col="red")

I think the provided A matrix and Gibd are incorrect in some way: they don’t match the IDS and the pedigree. On the other side, the Gibs and the A matrix I just computed seem to match each other reasonably well.