# Your pedigree should be a data frame with columns: id, sire, dam
pedigree <- data.frame(
id = c("A", "B", "C", "D"),
sire = c(NA, NA, "A", "A"),
dam = c(NA, NA, "B", "B")
)
# Compute the relationship matrix
#A <- compute_nrm(pedigree)Using AI to generate function for
Prompt for claude AI
I’ll write an R program to compute the numerator relationship matrix (also known as the additive genetic relationship matrix) from pedigree data. This matrix shows the expected proportion of genes that are identical by descent between all pairs of individuals.
Response:
This R program provides a complete solution for computing the numerator relationship matrix from pedigree data. Here are the key features:
Main Functions:
compute_nrm(): Computes the relationship matrix using Henderson’s recursive algorithmcompute_nrm_matrix(): More efficient matrix-based implementation for larger pedigreessort_pedigree(): Ensures parents appear before offspring in the pedigreevalidate_pedigree(): Checks for common pedigree errors
Key Features:
Handles missing parents: Uses NA for unknown sire/dam
Proper ordering: Automatically sorts pedigree so parents come before offspring
Henderson’s rules: Implements the standard algorithm for computing additive relationships
Error checking: Validates pedigree structure
Demonstration: Includes example with interpretation
Usage:
Interpretation:
Diagonal elements = 1 + inbreeding coefficient
Off-diagonal elements = proportion of genes identical by descent
Parent-offspring relationships = 0.5
Full siblings = 0.5 + 0.25 × (sire-dam relationship)
Half siblings = 0.25
The matrix is symmetric and positive definite, making it suitable for quantitative genetics analyses and breeding value estimation.
Generated Code
# Numerator Relationship Matrix from Pedigree
# This program computes the additive genetic relationship matrix (A matrix)
# from pedigree data using Henderson's rules
compute_nrm <- function(pedigree) {
# Input: pedigree data frame with columns: id, sire, dam
# Output: numerator relationship matrix
# Check input format
if (!is.data.frame(pedigree)) {
stop("Pedigree must be a data frame")
}
required_cols <- c("id", "sire", "dam")
if (!all(required_cols %in% names(pedigree))) {
stop("Pedigree must have columns: id, sire, dam")
}
# Sort pedigree so parents appear before offspring
pedigree <- sort_pedigree(pedigree)
n <- nrow(pedigree)
ids <- pedigree$id
# Initialize relationship matrix
A <- matrix(0, nrow = n, ncol = n)
rownames(A) <- colnames(A) <- ids
# Compute relationships using Henderson's rules
for (i in 1:n) {
# Diagonal element (self-relationship)
sire_idx <- match(pedigree$sire[i], ids)
dam_idx <- match(pedigree$dam[i], ids)
if (is.na(sire_idx) && is.na(dam_idx)) {
# Both parents unknown
A[i, i] <- 1
} else if (is.na(sire_idx) || is.na(dam_idx)) {
# One parent unknown
A[i, i] <- 1
} else {
# Both parents known
A[i, i] <- 1 + 0.5 * A[sire_idx, dam_idx]
}
# Off-diagonal elements (relationships with others)
if (i > 1) {
for (j in 1:(i-1)) {
sire_idx <- match(pedigree$sire[i], ids)
dam_idx <- match(pedigree$dam[i], ids)
if (is.na(sire_idx) && is.na(dam_idx)) {
# Both parents unknown
A[i, j] <- A[j, i] <- 0
} else if (is.na(sire_idx)) {
# Sire unknown, dam known
A[i, j] <- A[j, i] <- 0.5 * A[dam_idx, j]
} else if (is.na(dam_idx)) {
# Dam unknown, sire known
A[i, j] <- A[j, i] <- 0.5 * A[sire_idx, j]
} else {
# Both parents known
A[i, j] <- A[j, i] <- 0.5 * (A[sire_idx, j] + A[dam_idx, j])
}
}
}
}
return(A)
}
sort_pedigree <- function(pedigree) {
# Sort pedigree so that parents appear before their offspring
# This is crucial for correct computation of the relationship matrix
ids <- pedigree$id
n <- length(ids)
sorted_order <- numeric(n)
used <- logical(n)
count <- 0
while (count < n) {
for (i in 1:n) {
if (!used[i]) {
sire <- pedigree$sire[i]
dam <- pedigree$dam[i]
# Check if parents are already sorted or unknown
sire_ok <- is.na(sire) || sire %in% ids[used]
dam_ok <- is.na(dam) || dam %in% ids[used]
if (sire_ok && dam_ok) {
count <- count + 1
sorted_order[count] <- i
used[i] <- TRUE
}
}
}
}
return(pedigree[sorted_order, ])
}
# Example usage and demonstration
demo_nrm <- function() {
# Create example pedigree data
# Format: id, sire, dam (NA for unknown parents)
pedigree <- data.frame(
id = c("A", "B", "C", "D", "E", "F"),
sire = c(NA, NA, "A", "A", "C", "C"),
dam = c(NA, NA, "B", "B", "D", "D"),
stringsAsFactors = FALSE
)
cat("Example Pedigree:\n")
print(pedigree)
cat("\n")
# Compute numerator relationship matrix
A <- compute_nrm(pedigree)
cat("Numerator Relationship Matrix:\n")
print(round(A, 3))
cat("\n")
# Interpretation examples
cat("Interpretation examples:\n")
cat("A[1,1] =", A[1,1], "- Individual A's relationship with itself\n")
cat("A[3,4] =", A[3,4], "- Relationship between C and D (half-sibs)\n")
cat("A[5,6] =", A[5,6], "- Relationship between E and F (full-sibs)\n")
cat("A[1,5] =", A[1,5], "- Relationship between A and E (parent-offspring)\n")
return(A)
}
# Alternative function using matrix operations (more efficient for large pedigrees)
compute_nrm_matrix <- function(pedigree) {
# More efficient implementation using matrix operations
pedigree <- sort_pedigree(pedigree)
n <- nrow(pedigree)
ids <- pedigree$id
# Create incidence matrices for sires and dams
S <- matrix(0, nrow = n, ncol = n) # Sire incidence matrix
D <- matrix(0, nrow = n, ncol = n) # Dam incidence matrix
for (i in 1:n) {
sire_idx <- match(pedigree$sire[i], ids)
dam_idx <- match(pedigree$dam[i], ids)
if (!is.na(sire_idx)) S[i, sire_idx] <- 1
if (!is.na(dam_idx)) D[i, dam_idx] <- 1
}
# Compute relationship matrix using recursive formula
# A = I + 0.5 * (S + D) %*% A
I <- diag(n)
P <- 0.5 * (S + D)
# Solve for A: A - P %*% A = I
# A %*% (I - P') = I
# A = solve(I - t(P))
A <- solve(I - t(P))
rownames(A) <- colnames(A) <- ids
return(A)
}
# Function to validate pedigree structure
validate_pedigree <- function(pedigree) {
# Check for common pedigree errors
errors <- character(0)
# Check for self-mating
self_mate <- which(pedigree$sire == pedigree$dam &
!is.na(pedigree$sire) & !is.na(pedigree$dam))
if (length(self_mate) > 0) {
errors <- c(errors, paste("Self-mating detected for ID:",
pedigree$id[self_mate]))
}
# Check for individuals as their own parents
self_sire <- which(pedigree$id == pedigree$sire)
self_dam <- which(pedigree$id == pedigree$dam)
if (length(self_sire) > 0) {
errors <- c(errors, paste("Individual is own sire:",
pedigree$id[self_sire]))
}
if (length(self_dam) > 0) {
errors <- c(errors, paste("Individual is own dam:",
pedigree$id[self_dam]))
}
# Check for duplicate IDs
if (any(duplicated(pedigree$id))) {
errors <- c(errors, "Duplicate IDs found in pedigree")
}
if (length(errors) > 0) {
cat("Pedigree validation errors:\n")
for (error in errors) {
cat("-", error, "\n")
}
return(FALSE)
} else {
cat("Pedigree validation passed\n")
return(TRUE)
}
}
# Run demonstration
cat("=== Numerator Relationship Matrix Demo ===\n\n")=== Numerator Relationship Matrix Demo ===
demo_result <- demo_nrm()Example Pedigree:
id sire dam
1 A <NA> <NA>
2 B <NA> <NA>
3 C A B
4 D A B
5 E C D
6 F C D
Numerator Relationship Matrix:
A B C D E F
A 1.0 0.0 0.50 0.50 0.50 0.50
B 0.0 1.0 0.50 0.50 0.50 0.50
C 0.5 0.5 1.00 0.50 0.75 0.75
D 0.5 0.5 0.50 1.00 0.75 0.75
E 0.5 0.5 0.75 0.75 1.25 0.75
F 0.5 0.5 0.75 0.75 0.75 1.25
Interpretation examples:
A[1,1] = 1 - Individual A's relationship with itself
A[3,4] = 0.5 - Relationship between C and D (half-sibs)
A[5,6] = 0.75 - Relationship between E and F (full-sibs)
A[1,5] = 0.5 - Relationship between A and E (parent-offspring)