Covariance of IBS between two loci

Initial setting

parameters:

p1 <- 0.25 # frequency of A of first (k) locus
q1 <- 1 - p1 # frequency of a of first (k) locus
p2 <- 0.4 # frequency of B of first (l) locus
q2 <- 1 - p2 # frequency of a of first (l) locus
c <- 0.3 # recombination rate
min(q1*p2,p1*q2) # maximum of LD in terms of D

## [1] 0.15

D <- 0.1 # LD

Standardize genotype function

corrected_value <- function(genotype, p) {
  if (genotype == "AA" | genotype == "BB") {
    (2 - 2*p) / sqrt(2 * p * (1 - p))
  } else if (genotype == "Aa" | genotype == "Bb") {
    (1 - 2*p) / sqrt(2 * p * (1 - p))
  } else if (genotype == "aa" | genotype == "bb") {
    (0 - 2*p) / sqrt(2 * p * (1 - p))
  } else {
    stop("Invalid genotype")
  }
}

All mating types and their corresponding sib types

这里我有一点疑问，按道理每个交配组合应该产生16种Sib type，但是我把相同的基因型给合并了，这样做是否合理呢？这也是后面结果种类比预期数量少的原因

mating_types <- list(
  list(parents = c("AA", "AA"), frequency = p1^4,
       sib_pairs = list(list(genotypes = c("AA", "AA"), prob = 1))),
  list(parents = c("aa", "aa"), frequency = q1^4,
       sib_pairs = list(list(genotypes = c("aa", "aa"), prob = 1))),
  list(parents = c("AA", "Aa"), frequency = 2*p1^3*q1,
       sib_pairs = list(
         list(genotypes = c("AA", "AA"), prob = 0.25),
         list(genotypes = c("AA", "Aa"), prob = 0.25),
         list(genotypes = c("Aa", "AA"), prob = 0.25),
         list(genotypes = c("Aa", "Aa"), prob = 0.25))),
  list(parents = c("Aa", "AA"), frequency = 2*p1^3*q1,
       sib_pairs = list(
         list(genotypes = c("AA", "AA"), prob = 0.25),
         list(genotypes = c("AA", "Aa"), prob = 0.25),
         list(genotypes = c("Aa", "AA"), prob = 0.25),
         list(genotypes = c("Aa", "Aa"), prob = 0.25))),
  list(parents = c("aa", "Aa"), frequency = 2*q1^3*p1,
       sib_pairs = list(
         list(genotypes = c("aa", "aa"), prob = 0.25),
         list(genotypes = c("aa", "Aa"), prob = 0.25),
         list(genotypes = c("Aa", "aa"), prob = 0.25),
         list(genotypes = c("Aa", "Aa"), prob = 0.25))),
  list(parents = c("Aa", "aa"), frequency = 2*q1^3*p1,
       sib_pairs = list(
         list(genotypes = c("aa", "aa"), prob = 0.25),
         list(genotypes = c("aa", "Aa"), prob = 0.25),
         list(genotypes = c("Aa", "aa"), prob = 0.25),
         list(genotypes = c("Aa", "Aa"), prob = 0.25))),
  list(parents = c("AA", "aa"), frequency = p1^2*q1^2,
       sib_pairs = list(list(genotypes = c("Aa", "Aa"), prob = 1))),
  list(parents = c("aa", "AA"), frequency = p1^2*q1^2,
       sib_pairs = list(list(genotypes = c("Aa", "Aa"), prob = 1))),
  list(parents = c("Aa", "Aa"), frequency = 4*p1^2*q1^2,
       sib_pairs = list(
         list(genotypes = c("AA", "AA"), prob = 0.0625),
         list(genotypes = c("AA", "Aa"), prob = 0.125),
         list(genotypes = c("Aa", "AA"), prob = 0.125),
         list(genotypes = c("Aa", "Aa"), prob = 0.25),
         list(genotypes = c("AA", "aa"), prob = 0.0625),
         list(genotypes = c("aa", "AA"), prob = 0.0625),
         list(genotypes = c("Aa", "aa"), prob = 0.125),
         list(genotypes = c("aa", "Aa"), prob = 0.125),
         list(genotypes = c("aa", "aa"), prob = 0.0625)))
)

Conditional probability between alleles of two loci under recombination and non-recombination

get_conditional_prob <- function(allele, is_recombination) {
  if (is_recombination) {
    return(c(B = p2, b = q2))
  } else {
    if (allele == "A") {
      B_prob <- p2 + D / p1
      b_prob <- q2 - D / p1
    } else if (allele == "a") {
      B_prob <- p2 - D / q1
      b_prob <- q2 + D / q1
    } else {
      stop("Invalid allele")
    }
    return(c(B = B_prob, b = b_prob))
  }
}

Calculation

# 结果保存表格
result_df <- data.frame(
  MatingType = character(),
  SibTypeK = character(),
  SibTypeL = character(),
  RecombinationType = character(),
  TotalProbability = numeric(),
  OmegaK = numeric(),
  OmegaL = numeric(),
  OmegaProduct = numeric(),
  TotalExpection = numeric(),
  stringsAsFactors = FALSE
)

# 存储Omega乘积的期望
total_expectation <- 0 

# 先是循环9种交配类型
for (mating in mating_types) {
  mating_freq <- mating$frequency
  mating_type_str <- paste(mating$parents, collapse = "x")
  
  # 再循环不同的sib-pair，正常是16种，但是有些相同的被我合并了
  for (sib_pair in mating$sib_pairs) {
    sib_type_str1 <- paste(sib_pair$genotypes, collapse = "_")
    sib1_k <- sib_pair$genotypes[1] # sib1在第一个位点的基因型
    sib2_k <- sib_pair$genotypes[2] # sib1在第二个位点的基因型
    sib_prob <- sib_pair$prob # sib组合的概率，正常是1/16，但是合并之后就有所差异
    
    # 分解基因型为父源和母源等位基因（假设格式如 "Aa" = 父A母a）
    sib1_p_allele <- substr(sib1_k, 1, 1)
    sib1_m_allele <- substr(sib1_k, 2, 2)
    sib2_p_allele <- substr(sib2_k, 1, 1)
    sib2_m_allele <- substr(sib2_k, 2, 2)
    
    # 计算k位点IBS得分乘积
    sib1_k_score <- corrected_value(sib1_k, p1)
    sib2_k_score <- corrected_value(sib2_k, p1)
    k_product <- sib1_k_score * sib2_k_score
    
    # 遍历所有重组组合（s1_p, s1_m, s2_p, s2_m）总共16种可能
    recomb <- expand.grid(s1_p = c(TRUE, FALSE), s1_m = c(TRUE, FALSE),
                          s2_p = c(TRUE, FALSE), s2_m = c(TRUE, FALSE))
    
    # 对于每种重组组合进行遍历
    for (i in 1:nrow(recomb)) {
      rc <- recomb[i, ] # 每一行的格式类似：TRUE TRUE TRUE TRUE，表示4条haplotype都重组 
      recomb_prob <- (c^sum(unlist(rc))) * ((1 - c)^(4 - sum(unlist(rc)))) # 概率
      recomb_type <- sprintf(
        "S1:%s%s|S2:%s%s",
        ifelse(rc$s1_p, "R", "N"), 
        ifelse(rc$s1_m, "R", "N"), 
        ifelse(rc$s2_p, "R", "N"), 
        ifelse(rc$s2_m, "R", "N")
      )
      sib_type_str1 <- paste(sib_pair$genotypes, collapse = "_")
      
      # 计算每个同胞的l位点等位基因概率
      # 同胞1父源
      s1_p_prob <- get_conditional_prob(sib1_p_allele, rc$s1_p)
      # 同胞1母源
      s1_m_prob <- get_conditional_prob(sib1_m_allele, rc$s1_m)
      # 同胞1的l位点基因型概率
      s1_BB <- s1_p_prob["B"] * s1_m_prob["B"]
      s1_Bb <- s1_p_prob["B"] * s1_m_prob["b"] + s1_p_prob["b"] * s1_m_prob["B"]
      s1_bb <- s1_p_prob["b"] * s1_m_prob["b"]
      
      # 同胞2父源
      s2_p_prob <- get_conditional_prob(sib2_p_allele, rc$s2_p)
      # 同胞2母源
      s2_m_prob <- get_conditional_prob(sib2_m_allele, rc$s2_m)
      # 同胞2的l位点基因型概率
      s2_BB <- s2_p_prob["B"] * s2_m_prob["B"]
      s2_Bb <- s2_p_prob["B"] * s2_m_prob["b"] + s2_p_prob["b"] * s2_m_prob["B"]
      s2_bb <- s2_p_prob["b"] * s2_m_prob["b"]
      
      # 遍历所有可能的l位点组合
      for (s1_l in c("BB", "Bb", "bb")) {
        for (s2_l in c("BB", "Bb", "bb")) {
          sib_type_str2 <- paste(c(s1_l,s2_l), collapse = "_")
          
          # 获取概率
          prob_s1_l <- switch(s1_l, BB = s1_BB, Bb = s1_Bb, bb = s1_bb) # sib1第二个位点的基因型概率
          prob_s2_l <- switch(s2_l, BB = s2_BB, Bb = s2_Bb, bb = s2_bb) # sib2第二个位点的基因型概率
          joint_prob_l <- prob_s1_l * prob_s2_l # 组合的概率
          
          # 计算l位点IBS得分乘积
          s1_l_score <- corrected_value(s1_l, p2)
          s2_l_score <- corrected_value(s2_l, p2)
          l_product <- s1_l_score * s2_l_score
          
          # 总得分乘积和概率
          total_product <- k_product * l_product
          total_prob <- mating_freq * sib_prob * recomb_prob * joint_prob_l # 经历所有历程之后总的概率
          
          # 累加到总期望
          total_expectation <- total_expectation + total_product * total_prob
          
          new_row <- data.frame(
            MatingType = mating_type_str,
            SibTypeK = sib_type_str1,
            SibTypeL=sib_type_str2,
            RecombinationType = recomb_type,
            TotalProbability = total_prob,
            OmegaK = k_product,
            OmegaL = l_product,
            OmegaProduct = total_product,
            TotalExpection = total_product * total_prob
          )
          result_df <- rbind(result_df, new_row) # 存储结果
        }
      }
    }
  }
}

cat("期望值 E(Ωk Ωl) =", total_expectation, "\n")

## 期望值 E(Ωk Ωl) = 0.1724074

Covariance of IBS between two loci

Zhe Zhang

2025-04-23

Initial setting

Calculation