# Progeny yield Deviations - Date : 2020-11-11

# An example of the animal model

# Linear Models for the Prediction of Animal Breeding Values, Raphael Mrode, 3rd Edition

# Julius Mugambe(PhD)


# Clear works pace
rm(list = ls())
# Set working directory
# Setting working directory
setwd("G:/R-MME") 
getwd()
## [1] "G:/R-MME"
# Functions

# install.packages('MASS', dependencies = TRUE)
library(MASS)
# Univariate Model with one Random Effect.
#inputting variance components
sigma_2_a = 20
sigma_2_e = 40
alpha = sigma_2_e/sigma_2_a

# load pedigree data
ped <- read.csv("pedigree.txt", sep="")

# function to make inverse of numerator relationship matrix
ainv = function(ped) {
  n = nrow(ped)
  Ainv = matrix(c(0), nrow = n, ncol = n)
  
  for (i in 1:n) {
    animal = ped[i, 1]
    sire = ped[i, 2]
    dam = ped[i, 3]
    
    if (sire == 0 & dam == 0) {
      # both parents unknown
      alpha = 1
      Ainv[animal, animal] = alpha + Ainv[animal, animal]
    } else if (sire != 0 & dam == 0) {
      # sire known
      alpha = 4/3
      Ainv[animal, animal] = alpha + Ainv[animal, animal]
      Ainv[animal, sire] = -alpha/2 + Ainv[animal, sire]
      Ainv[sire, animal] = -alpha/2 + Ainv[sire, animal]
      Ainv[sire, sire] = alpha/4 + Ainv[sire, sire]
    } else if (sire == 0 & dam != 0) {
      # dam known
      alpha = 4/3
      Ainv[animal, animal] = alpha + Ainv[animal, animal]
      Ainv[animal, dam] = -alpha/2 + Ainv[animal, dam]
      Ainv[dam, animal] = -alpha/2 + Ainv[dam, animal]
      Ainv[dam, dam] = alpha/4 + Ainv[dam, dam]
    } else {
      # both parents known
      alpha = 2
      Ainv[animal, animal] = alpha + Ainv[animal, animal]
      Ainv[animal, sire] = -alpha/2 + Ainv[animal, sire]
      Ainv[sire, animal] = -alpha/2 + Ainv[sire, animal]
      Ainv[animal, dam] = -alpha/2 + Ainv[animal, dam]
      Ainv[dam, animal] = -alpha/2 + Ainv[dam, animal]
      Ainv[sire, sire] = alpha/4 + Ainv[sire, sire]
      Ainv[sire, dam] = alpha/4 + Ainv[sire, dam]
      Ainv[dam, sire] = alpha/4 + Ainv[dam, sire]
      Ainv[dam, dam] = alpha/4 + Ainv[dam, dam]
    }
  }
  return(Ainv)
}

# Calculate the AINV from the pedigree using the ainv function
A <- ainv(ped)

# Load data
data  <- read.csv("data.txt", sep="")

# Making design matrices using the function desgn()
# make design matrix 
desgn <- function(v) {
  if (is.numeric(v)) {
    va = v
    mrow = length(va)
    mcol = max(va)
  }
  if (is.character(v)) {
    vf = factor(v)
    # Save the index value of each level
    va = as.numeric(vf)
    mrow = length(va)
    mcol = length(levels(vf))
  }
  
  # Initializing X-matrix
  X = matrix(data = c(0), nrow = mrow, ncol = mcol)
  
  for (i in 1:mrow) {
    ic = va[i]
    X[i, ic] = 1
  }
  return(X)
}
X = desgn(data$sex)
Z = desgn(data$id)
y = data$wwg


# variance component and ratio
sigma_a = 20
sigma_e = 40
alpha = sigma_e/sigma_a

# number of levels of fixed effect
no_levels_fixed = ncol(X)

# number of animals
no_animals = ncol(Z)


# LHS, RHS

# LHS construction
LHS = rbind(
  cbind(t(X) %*% X, t(X) %*% Z), 
  cbind(t(Z) %*% X, t(Z) %*% Z + A * alpha))

# print
LHS
##       [,1] [,2]      [,3] [,4] [,5]      [,6] [,7] [,8] [,9] [,10]
##  [1,]    2    0  0.000000    0    0  0.000000    1    1    0     0
##  [2,]    0    3  0.000000    0    0  1.000000    0    0    1     1
##  [3,]    0    0  3.666667    1    0 -1.333333    0   -2    0     0
##  [4,]    0    0  1.000000    4    1  0.000000   -2   -2    0     0
##  [5,]    0    0  0.000000    1    4  0.000000   -2    1    0    -2
##  [6,]    0    1 -1.333333    0    0  4.666667    1    0   -2     0
##  [7,]    1    0  0.000000   -2   -2  1.000000    6    0   -2     0
##  [8,]    1    0 -2.000000   -2    1  0.000000    0    6    0    -2
##  [9,]    0    1  0.000000    0    0 -2.000000   -2    0    5     0
## [10,]    0    1  0.000000    0   -2  0.000000    0   -2    0     5
# RHS construction
RHS = rbind(t(X) %*% y, t(Z) %*% y)

# print
RHS
##       [,1]
##  [1,]  6.8
##  [2,] 13.0
##  [3,]  0.0
##  [4,]  0.0
##  [5,]  0.0
##  [6,]  4.5
##  [7,]  2.9
##  [8,]  3.9
##  [9,]  3.5
## [10,]  5.0
# Solutions
# generalized inverse of LHS
gi_LHS = ginv(LHS)

# print
gi_LHS
##              [,1]        [,2]         [,3]         [,4]        [,5]        [,6]
##  [1,]  0.80245865  0.15730213 -0.132863260 -0.241250738 -0.11196840 -0.08730803
##  [2,]  0.15730213  0.59556097 -0.164119413 -0.083624565 -0.13059083 -0.26455749
##  [3,] -0.13286326 -0.16411941  0.471094211  0.006928037  0.03264668  0.21954371
##  [4,] -0.24125074 -0.08362457  0.006928037  0.492095721 -0.01030797  0.02039033
##  [5,] -0.11196840 -0.13059083  0.032646682 -0.010307967  0.45645878  0.04812709
##  [6,] -0.08730803 -0.26455749  0.219543709  0.020390333  0.04812709  0.42768015
##  [7,] -0.29891465 -0.14827804  0.044952254  0.237345770  0.20132326  0.04704420
##  [8,] -0.30600266 -0.16632621  0.220774267  0.245155707  0.02261354  0.12757186
##  [9,] -0.18594950 -0.28424641  0.138622268  0.119819354  0.12589831  0.24280124
## [10,] -0.19864885 -0.23787901  0.134192262  0.110664009  0.21774710  0.12319108
##              [,7]        [,8]       [,9]      [,10]
##  [1,] -0.29891465 -0.30600266 -0.1859495 -0.1986488
##  [2,] -0.14827804 -0.16632621 -0.2842464 -0.2378790
##  [3,]  0.04495225  0.22077427  0.1386223  0.1341923
##  [4,]  0.23734577  0.24515571  0.1198194  0.1106640
##  [5,]  0.20132326  0.02261354  0.1258983  0.2177471
##  [6,]  0.04704420  0.12757186  0.2428012  0.1231911
##  [7,]  0.42810675  0.16972255  0.2197160  0.1780739
##  [8,]  0.16972255  0.44228277  0.1521830  0.2192238
##  [9,]  0.21971599  0.15218301  0.4418562  0.1680818
## [10,]  0.17807393  0.21922376  0.1680818  0.4223641
# solution
sol = gi_LHS %*% RHS

# print
sol
##               [,1]
##  [1,]  3.404430006
##  [2,]  4.358502330
##  [3,]  0.098444576
##  [4,] -0.018770099
##  [5,] -0.041084203
##  [6,] -0.008663123
##  [7,] -0.185732099
##  [8,]  0.176872088
##  [9,] -0.249458555
## [10,]  0.182614688
# solutions for fixed effects and animal effects
sol_fixed = sol[1 : no_levels_fixed]
sol_animal = sol[(no_levels_fixed + 1) : (no_levels_fixed + no_animals)]


# reliability(r2), accuracy(r), standard error of prediction(SEP)

# diagonal elements of the generalized inverse of LHS for animal equation
D = diag(gi_LHS[(no_levels_fixed + 1) : (no_levels_fixed + no_animals), (no_levels_fixed + 1) : (no_levels_fixed + no_animals)])

# print
D
## [1] 0.4710942 0.4920957 0.4564588 0.4276802 0.4281067 0.4422828 0.4418562
## [8] 0.4223641
# reliability
rel = 1 - D * alpha

# print
rel
## [1] 0.05781158 0.01580856 0.08708243 0.14463969 0.14378651 0.11543447 0.11628766
## [8] 0.15527171
# accuracy
acc = sqrt(rel)

# print
acc
## [1] 0.2404404 0.1257321 0.2950973 0.3803153 0.3791919 0.3397565 0.3410098
## [8] 0.3940453
# standard error of prediction(SEP)
sep = sqrt( D * sigma_e)

#
sep
## [1] 4.340941 4.436646 4.272979 4.136086 4.138148 4.206104 4.204075 4.110300
# partitioning of breeding values
# a_i = n1(Parental Average) + n2(Yield Deviation) + n3(Progeny Contribution)

# yield deviation
# YD is the vector of yield deviations (YDs) and represents the yields of the 
# animal adjusted for all effects other than genetic merit and error.
YD1 = ginv(t(Z) %*% Z) %*% t(Z) %*% (y - X %*% sol_fixed)

# print
YD1
##            [,1]
## [1,]  0.0000000
## [2,]  0.0000000
## [3,]  0.0000000
## [4,]  0.1414977
## [5,] -0.5044300
## [6,]  0.4955700
## [7,] -0.8585023
## [8,]  0.6414977
# numerator of n2
a2 = diag(t(Z) %*% Z)

# print
a2
## [1] 0 0 0 1 1 1 1 1
# Parents average, progeny contribution

# parents avearge
PA1 = rep(0, no_animals)

# progeny contribution numerator
PC0 = rep(0, no_animals)

# numerator of n3, denominator of progeny contribution
a3 = rep(0, no_animals)

# numerator of n1
a1 = rep(0, no_animals)

# looping ped
for (i in 1 : no_animals) {
  
  sire = ped[i, 2]
  dam = ped[i, 3]
  
  if (sire == 0 & dam == 0) {
    # both parents unknown
    # PA
    a1[i] = 1 * alpha
    
  } else if (sire != 0 & dam == 0) {
    # 
    
    # PA 
    a1[i] = 4/3 * alpha
    PA1[i] = sol_animal[sire]/2
    
    # PC for sire
    a3[sire] = a3[sire] + 0.5 * alpha * (2/3)
    PC0[sire] = PC0[sire] + 0.5 * alpha * (2/3) * (2 * sol_animal[i])
    
  } else if (sire == 0 & dam != 0) {
    # dam known
    
    # PA 
    a1[i] = 4/3 * alpha
    PA1[i] = sol_animal[dam]/2
    
    # PC for dam
    a3[dam] = a3[dam] + 0.5 * alpha * (2/3)
    PC0[dam] = PC0[dam] + 0.5 * alpha * (2/3) * (2 * sol_animal[i])
    
  } else {
    # both parents known
    
    # PA 
    a1[i] = 2 * alpha
    PA1[i] = (sol_animal[sire] + sol_animal[dam])/2
    
    # PC for sire
    a3[sire] = a3[sire] + 0.5 * alpha
    PC0[sire] = PC0[sire] + 0.5 * alpha * (2 * sol_animal[i] - sol_animal[dam])
    
    # PC for dam
    a3[dam] = a3[dam] + 0.5 * alpha
    PC0[dam] = PC0[dam] + 0.5 * alpha * (2 * sol_animal[i] - sol_animal[sire])
    
  }
}

# print
a1
## [1] 2.000000 2.000000 2.000000 2.666667 4.000000 4.000000 4.000000 4.000000
PA1
## [1]  0.00000000  0.00000000  0.00000000  0.04922229 -0.02992715  0.03983724
## [7] -0.09719761  0.06789394
a3
## [1] 1.666667 2.000000 2.000000 1.000000 1.000000 1.000000 0.000000 0.000000
PC0
## [1]  0.3609634 -0.0750804 -0.1643368 -0.3131850 -0.4902540  0.4063136  0.0000000
## [8]  0.0000000
# denominator of n1, n2, n3, diagonal of animals in LHS
nd = a1 + a2 + a3

# print
nd
## [1] 3.666667 4.000000 4.000000 4.666667 6.000000 6.000000 5.000000 5.000000
# parents average fraction of breeding values
PA = PA1 * (a1 / nd)

# print
PA
## [1]  0.00000000  0.00000000  0.00000000  0.02812702 -0.01995143  0.02655816
## [7] -0.07775809  0.05431515
# yield deviation fraction of breeding values
YD = YD1 * (a2 / nd)

# print
YD
##             [,1]
## [1,]  0.00000000
## [2,]  0.00000000
## [3,]  0.00000000
## [4,]  0.03032093
## [5,] -0.08407167
## [6,]  0.08259500
## [7,] -0.17170047
## [8,]  0.12829953
# progeny contribution
PC1 = PC0 / a3
PC1[is.nan(PC1) == TRUE] = 0
PC1
## [1]  0.21657807 -0.03754020 -0.08216841 -0.31318501 -0.49025399  0.40631358
## [7]  0.00000000  0.00000000
# progeny contribution fraction of breeding values
PC =  PC1 * (a3 / nd)

# print
PC
## [1]  0.09844458 -0.01877010 -0.04108420 -0.06711107 -0.08170900  0.06771893
## [7]  0.00000000  0.00000000
# Progeny(Daughter) Yield Deviation(PYD, DYD)

# n2 of progeny
n2prog = a2 / (a1 + a2)

# print
n2prog
## [1] 0.0000000 0.0000000 0.0000000 0.2727273 0.2000000 0.2000000 0.2000000
## [8] 0.2000000
# numerator of DYD : summation of u of progeny * n2 of progeny * (2 * YD - bv of mate)
DYD_n = rep(0, no_animals)
# denominator of DYD : summation of u of progeny * n2 of progeny
DYD_d = rep(0, no_animals)

# looping ped
for (i in 1 : no_animals) {
  
  sire = ped[i, 2]
  dam = ped[i, 3]
  
  if (sire == 0 & dam == 0) {
    # both parents unknown
    
  } else if (sire != 0 & dam == 0) {
    # 
    
    # DYD_n
    DYD_n[sire] = DYD_n[sire] + n2prog[i] * 2 / 3 * 2 * YD1[i]
    # DYD_d 
    DYD_d[sire] = DYD_d[sire] + n2prog[i] * 2 / 3
    
  } else if (sire == 0 & dam != 0) {
    # dam known
    
    # DYD_n
    DYD_n[dam] = DYD_n[dam] + n2prog[i] * 2 / 3 * 2 * YD1[i]
    # DYD_d
    DYD_d[dam] = DYD_d[dam] + n2prog[i] * 2 / 3
    
  } else {
    # both parents known
    
    # DYD_n
    DYD_n[sire] = DYD_n[sire] + n2prog[i] * (2 * YD1[i] - sol_animal[dam])
    DYD_n[dam] = DYD_n[dam] + n2prog[i] * (2 * YD1[i] - sol_animal[sire])
    
    # DYD_d
    DYD_d[sire] = DYD_d[sire] + n2prog[i]
    DYD_d[dam] = DYD_d[dam] + n2prog[i]
    
  }
}

# print
DYD_n
## [1]  0.25343572 -0.01501608  0.02320667 -0.30625451 -0.34166831  0.26481591
## [7]  0.00000000  0.00000000
DYD_d
## [1] 0.3818182 0.4000000 0.4000000 0.2000000 0.2000000 0.2000000 0.0000000
## [8] 0.0000000
# DYD
DYD = DYD_n / DYD_d
DYD[is.nan(DYD) == TRUE] = 0

# print
DYD
## [1]  0.66376021 -0.03754020  0.05801667 -1.53127256 -1.70834154  1.32407954
## [7]  0.00000000  0.00000000
# breeding values and fractions
result = data.frame(animal = ped[,1], animal_bv = sol_animal, rel = rel, acc = acc, sep = sep, PA = PA, YD = YD, PC = PC, sum_of_fr = PA + YD + PC, DYD = DYD)

# print
result
##   animal    animal_bv        rel       acc      sep          PA          YD
## 1      1  0.098444576 0.05781158 0.2404404 4.340941  0.00000000  0.00000000
## 2      2 -0.018770099 0.01580856 0.1257321 4.436646  0.00000000  0.00000000
## 3      3 -0.041084203 0.08708243 0.2950973 4.272979  0.00000000  0.00000000
## 4      4 -0.008663123 0.14463969 0.3803153 4.136086  0.02812702  0.03032093
## 5      5 -0.185732099 0.14378651 0.3791919 4.138148 -0.01995143 -0.08407167
## 6      6  0.176872088 0.11543447 0.3397565 4.206104  0.02655816  0.08259500
## 7      7 -0.249458555 0.11628766 0.3410098 4.204075 -0.07775809 -0.17170047
## 8      8  0.182614688 0.15527171 0.3940453 4.110300  0.05431515  0.12829953
##            PC    sum_of_fr         DYD
## 1  0.09844458  0.098444576  0.66376021
## 2 -0.01877010 -0.018770099 -0.03754020
## 3 -0.04108420 -0.041084203  0.05801667
## 4 -0.06711107 -0.008663123 -1.53127256
## 5 -0.08170900 -0.185732099 -1.70834154
## 6  0.06771893  0.176872088  1.32407954
## 7  0.00000000 -0.249458555  0.00000000
## 8  0.00000000  0.182614688  0.00000000