Using AI to generate function for

Prompt for claude AI

I’ll write an R program to compute the numerator relationship matrix (also known as the additive genetic relationship matrix) from pedigree data. This matrix shows the expected proportion of genes that are identical by descent between all pairs of individuals.

Response:

This R program provides a complete solution for computing the numerator relationship matrix from pedigree data. Here are the key features:

Main Functions:

  • compute_nrm(): Computes the relationship matrix using Henderson’s recursive algorithm

  • compute_nrm_matrix(): More efficient matrix-based implementation for larger pedigrees

  • sort_pedigree(): Ensures parents appear before offspring in the pedigree

  • validate_pedigree(): Checks for common pedigree errors

Key Features:

  1. Handles missing parents: Uses NA for unknown sire/dam

  2. Proper ordering: Automatically sorts pedigree so parents come before offspring

  3. Henderson’s rules: Implements the standard algorithm for computing additive relationships

  4. Error checking: Validates pedigree structure

  5. Demonstration: Includes example with interpretation

Usage:

# Your pedigree should be a data frame with columns: id, sire, dam
pedigree <- data.frame(
  id = c("A", "B", "C", "D"),
  sire = c(NA, NA, "A", "A"),
  dam = c(NA, NA, "B", "B")
)

# Compute the relationship matrix
#A <- compute_nrm(pedigree)

Interpretation:

  • Diagonal elements = 1 + inbreeding coefficient

  • Off-diagonal elements = proportion of genes identical by descent

  • Parent-offspring relationships = 0.5

  • Full siblings = 0.5 + 0.25 × (sire-dam relationship)

  • Half siblings = 0.25

The matrix is symmetric and positive definite, making it suitable for quantitative genetics analyses and breeding value estimation.

Generated Code

# Numerator Relationship Matrix from Pedigree
# This program computes the additive genetic relationship matrix (A matrix)
# from pedigree data using Henderson's rules

compute_nrm <- function(pedigree) {
  # Input: pedigree data frame with columns: id, sire, dam
  # Output: numerator relationship matrix
  
  # Check input format
  if (!is.data.frame(pedigree)) {
    stop("Pedigree must be a data frame")
  }
  
  required_cols <- c("id", "sire", "dam")
  if (!all(required_cols %in% names(pedigree))) {
    stop("Pedigree must have columns: id, sire, dam")
  }
  
  # Sort pedigree so parents appear before offspring
  pedigree <- sort_pedigree(pedigree)
  
  n <- nrow(pedigree)
  ids <- pedigree$id
  
  # Initialize relationship matrix
  A <- matrix(0, nrow = n, ncol = n)
  rownames(A) <- colnames(A) <- ids
  
  # Compute relationships using Henderson's rules
  for (i in 1:n) {
    # Diagonal element (self-relationship)
    sire_idx <- match(pedigree$sire[i], ids)
    dam_idx <- match(pedigree$dam[i], ids)
    
    if (is.na(sire_idx) && is.na(dam_idx)) {
      # Both parents unknown
      A[i, i] <- 1
    } else if (is.na(sire_idx) || is.na(dam_idx)) {
      # One parent unknown
      A[i, i] <- 1
    } else {
      # Both parents known
      A[i, i] <- 1 + 0.5 * A[sire_idx, dam_idx]
    }
    
    # Off-diagonal elements (relationships with others)
    if (i > 1) {
      for (j in 1:(i-1)) {
        sire_idx <- match(pedigree$sire[i], ids)
        dam_idx <- match(pedigree$dam[i], ids)
        
        if (is.na(sire_idx) && is.na(dam_idx)) {
          # Both parents unknown
          A[i, j] <- A[j, i] <- 0
        } else if (is.na(sire_idx)) {
          # Sire unknown, dam known
          A[i, j] <- A[j, i] <- 0.5 * A[dam_idx, j]
        } else if (is.na(dam_idx)) {
          # Dam unknown, sire known
          A[i, j] <- A[j, i] <- 0.5 * A[sire_idx, j]
        } else {
          # Both parents known
          A[i, j] <- A[j, i] <- 0.5 * (A[sire_idx, j] + A[dam_idx, j])
        }
      }
    }
  }
  
  return(A)
}

sort_pedigree <- function(pedigree) {
  # Sort pedigree so that parents appear before their offspring
  # This is crucial for correct computation of the relationship matrix
  
  ids <- pedigree$id
  n <- length(ids)
  sorted_order <- numeric(n)
  used <- logical(n)
  count <- 0
  
  while (count < n) {
    for (i in 1:n) {
      if (!used[i]) {
        sire <- pedigree$sire[i]
        dam <- pedigree$dam[i]
        
        # Check if parents are already sorted or unknown
        sire_ok <- is.na(sire) || sire %in% ids[used]
        dam_ok <- is.na(dam) || dam %in% ids[used]
        
        if (sire_ok && dam_ok) {
          count <- count + 1
          sorted_order[count] <- i
          used[i] <- TRUE
        }
      }
    }
  }
  
  return(pedigree[sorted_order, ])
}

# Example usage and demonstration
demo_nrm <- function() {
  # Create example pedigree data
  # Format: id, sire, dam (NA for unknown parents)
  pedigree <- data.frame(
    id = c("A", "B", "C", "D", "E", "F"),
    sire = c(NA, NA, "A", "A", "C", "C"),
    dam = c(NA, NA, "B", "B", "D", "D"),
    stringsAsFactors = FALSE
  )
  
  cat("Example Pedigree:\n")
  print(pedigree)
  cat("\n")
  
  # Compute numerator relationship matrix
  A <- compute_nrm(pedigree)
  
  cat("Numerator Relationship Matrix:\n")
  print(round(A, 3))
  cat("\n")
  
  # Interpretation examples
  cat("Interpretation examples:\n")
  cat("A[1,1] =", A[1,1], "- Individual A's relationship with itself\n")
  cat("A[3,4] =", A[3,4], "- Relationship between C and D (half-sibs)\n")
  cat("A[5,6] =", A[5,6], "- Relationship between E and F (full-sibs)\n")
  cat("A[1,5] =", A[1,5], "- Relationship between A and E (parent-offspring)\n")
  
  return(A)
}

# Alternative function using matrix operations (more efficient for large pedigrees)
compute_nrm_matrix <- function(pedigree) {
  # More efficient implementation using matrix operations
  
  pedigree <- sort_pedigree(pedigree)
  n <- nrow(pedigree)
  ids <- pedigree$id
  
  # Create incidence matrices for sires and dams
  S <- matrix(0, nrow = n, ncol = n)  # Sire incidence matrix
  D <- matrix(0, nrow = n, ncol = n)  # Dam incidence matrix
  
  for (i in 1:n) {
    sire_idx <- match(pedigree$sire[i], ids)
    dam_idx <- match(pedigree$dam[i], ids)
    
    if (!is.na(sire_idx)) S[i, sire_idx] <- 1
    if (!is.na(dam_idx)) D[i, dam_idx] <- 1
  }
  
  # Compute relationship matrix using recursive formula
  # A = I + 0.5 * (S + D) %*% A
  I <- diag(n)
  P <- 0.5 * (S + D)
  
  # Solve for A: A - P %*% A = I
  # A %*% (I - P') = I
  # A = solve(I - t(P))
  A <- solve(I - t(P))
  
  rownames(A) <- colnames(A) <- ids
  return(A)
}

# Function to validate pedigree structure
validate_pedigree <- function(pedigree) {
  # Check for common pedigree errors
  
  errors <- character(0)
  
  # Check for self-mating
  self_mate <- which(pedigree$sire == pedigree$dam & 
                    !is.na(pedigree$sire) & !is.na(pedigree$dam))
  if (length(self_mate) > 0) {
    errors <- c(errors, paste("Self-mating detected for ID:", 
                             pedigree$id[self_mate]))
  }
  
  # Check for individuals as their own parents
  self_sire <- which(pedigree$id == pedigree$sire)
  self_dam <- which(pedigree$id == pedigree$dam)
  if (length(self_sire) > 0) {
    errors <- c(errors, paste("Individual is own sire:", 
                             pedigree$id[self_sire]))
  }
  if (length(self_dam) > 0) {
    errors <- c(errors, paste("Individual is own dam:", 
                             pedigree$id[self_dam]))
  }
  
  # Check for duplicate IDs
  if (any(duplicated(pedigree$id))) {
    errors <- c(errors, "Duplicate IDs found in pedigree")
  }
  
  if (length(errors) > 0) {
    cat("Pedigree validation errors:\n")
    for (error in errors) {
      cat("-", error, "\n")
    }
    return(FALSE)
  } else {
    cat("Pedigree validation passed\n")
    return(TRUE)
  }
}

# Run demonstration
cat("=== Numerator Relationship Matrix Demo ===\n\n")
=== Numerator Relationship Matrix Demo ===
demo_result <- demo_nrm()
Example Pedigree:
  id sire  dam
1  A <NA> <NA>
2  B <NA> <NA>
3  C    A    B
4  D    A    B
5  E    C    D
6  F    C    D

Numerator Relationship Matrix:
    A   B    C    D    E    F
A 1.0 0.0 0.50 0.50 0.50 0.50
B 0.0 1.0 0.50 0.50 0.50 0.50
C 0.5 0.5 1.00 0.50 0.75 0.75
D 0.5 0.5 0.50 1.00 0.75 0.75
E 0.5 0.5 0.75 0.75 1.25 0.75
F 0.5 0.5 0.75 0.75 0.75 1.25

Interpretation examples:
A[1,1] = 1 - Individual A's relationship with itself
A[3,4] = 0.5 - Relationship between C and D (half-sibs)
A[5,6] = 0.75 - Relationship between E and F (full-sibs)
A[1,5] = 0.5 - Relationship between A and E (parent-offspring)

What are the next steps?