parameters:
p1 <- 0.25 # frequency of A of first (k) locus
q1 <- 1 - p1 # frequency of a of first (k) locus
p2 <- 0.4 # frequency of B of first (l) locus
q2 <- 1 - p2 # frequency of a of first (l) locus
c <- 0.3 # recombination rate
min(q1*p2,p1*q2) # maximum of LD in terms of D
## [1] 0.15
D <- 0.1 # LD
Standardize genotype function
corrected_value <- function(genotype, p) {
if (genotype == "AA" | genotype == "BB") {
(2 - 2*p) / sqrt(2 * p * (1 - p))
} else if (genotype == "Aa" | genotype == "Bb") {
(1 - 2*p) / sqrt(2 * p * (1 - p))
} else if (genotype == "aa" | genotype == "bb") {
(0 - 2*p) / sqrt(2 * p * (1 - p))
} else {
stop("Invalid genotype")
}
}
All mating types and their corresponding sib types
这里我有一点疑问,按道理每个交配组合应该产生16种Sib type,但是我把相同的基因型给合并了,这样做是否合理呢?这也是后面结果种类比预期数量少的原因
mating_types <- list(
list(parents = c("AA", "AA"), frequency = p1^4,
sib_pairs = list(list(genotypes = c("AA", "AA"), prob = 1))),
list(parents = c("aa", "aa"), frequency = q1^4,
sib_pairs = list(list(genotypes = c("aa", "aa"), prob = 1))),
list(parents = c("AA", "Aa"), frequency = 2*p1^3*q1,
sib_pairs = list(
list(genotypes = c("AA", "AA"), prob = 0.25),
list(genotypes = c("AA", "Aa"), prob = 0.25),
list(genotypes = c("Aa", "AA"), prob = 0.25),
list(genotypes = c("Aa", "Aa"), prob = 0.25))),
list(parents = c("Aa", "AA"), frequency = 2*p1^3*q1,
sib_pairs = list(
list(genotypes = c("AA", "AA"), prob = 0.25),
list(genotypes = c("AA", "Aa"), prob = 0.25),
list(genotypes = c("Aa", "AA"), prob = 0.25),
list(genotypes = c("Aa", "Aa"), prob = 0.25))),
list(parents = c("aa", "Aa"), frequency = 2*q1^3*p1,
sib_pairs = list(
list(genotypes = c("aa", "aa"), prob = 0.25),
list(genotypes = c("aa", "Aa"), prob = 0.25),
list(genotypes = c("Aa", "aa"), prob = 0.25),
list(genotypes = c("Aa", "Aa"), prob = 0.25))),
list(parents = c("Aa", "aa"), frequency = 2*q1^3*p1,
sib_pairs = list(
list(genotypes = c("aa", "aa"), prob = 0.25),
list(genotypes = c("aa", "Aa"), prob = 0.25),
list(genotypes = c("Aa", "aa"), prob = 0.25),
list(genotypes = c("Aa", "Aa"), prob = 0.25))),
list(parents = c("AA", "aa"), frequency = p1^2*q1^2,
sib_pairs = list(list(genotypes = c("Aa", "Aa"), prob = 1))),
list(parents = c("aa", "AA"), frequency = p1^2*q1^2,
sib_pairs = list(list(genotypes = c("Aa", "Aa"), prob = 1))),
list(parents = c("Aa", "Aa"), frequency = 4*p1^2*q1^2,
sib_pairs = list(
list(genotypes = c("AA", "AA"), prob = 0.0625),
list(genotypes = c("AA", "Aa"), prob = 0.125),
list(genotypes = c("Aa", "AA"), prob = 0.125),
list(genotypes = c("Aa", "Aa"), prob = 0.25),
list(genotypes = c("AA", "aa"), prob = 0.0625),
list(genotypes = c("aa", "AA"), prob = 0.0625),
list(genotypes = c("Aa", "aa"), prob = 0.125),
list(genotypes = c("aa", "Aa"), prob = 0.125),
list(genotypes = c("aa", "aa"), prob = 0.0625)))
)
Conditional probability between alleles of two loci under recombination and non-recombination
get_conditional_prob <- function(allele, is_recombination) {
if (is_recombination) {
return(c(B = p2, b = q2))
} else {
if (allele == "A") {
B_prob <- p2 + D / p1
b_prob <- q2 - D / p1
} else if (allele == "a") {
B_prob <- p2 - D / q1
b_prob <- q2 + D / q1
} else {
stop("Invalid allele")
}
return(c(B = B_prob, b = b_prob))
}
}
# 结果保存表格
result_df <- data.frame(
MatingType = character(),
SibTypeK = character(),
SibTypeL = character(),
RecombinationType = character(),
TotalProbability = numeric(),
OmegaK = numeric(),
OmegaL = numeric(),
OmegaProduct = numeric(),
TotalExpection = numeric(),
stringsAsFactors = FALSE
)
# 存储Omega乘积的期望
total_expectation <- 0
# 先是循环9种交配类型
for (mating in mating_types) {
mating_freq <- mating$frequency
mating_type_str <- paste(mating$parents, collapse = "x")
# 再循环不同的sib-pair,正常是16种,但是有些相同的被我合并了
for (sib_pair in mating$sib_pairs) {
sib_type_str1 <- paste(sib_pair$genotypes, collapse = "_")
sib1_k <- sib_pair$genotypes[1] # sib1在第一个位点的基因型
sib2_k <- sib_pair$genotypes[2] # sib1在第二个位点的基因型
sib_prob <- sib_pair$prob # sib组合的概率,正常是1/16,但是合并之后就有所差异
# 分解基因型为父源和母源等位基因(假设格式如 "Aa" = 父A母a)
sib1_p_allele <- substr(sib1_k, 1, 1)
sib1_m_allele <- substr(sib1_k, 2, 2)
sib2_p_allele <- substr(sib2_k, 1, 1)
sib2_m_allele <- substr(sib2_k, 2, 2)
# 计算k位点IBS得分乘积
sib1_k_score <- corrected_value(sib1_k, p1)
sib2_k_score <- corrected_value(sib2_k, p1)
k_product <- sib1_k_score * sib2_k_score
# 遍历所有重组组合(s1_p, s1_m, s2_p, s2_m)总共16种可能
recomb <- expand.grid(s1_p = c(TRUE, FALSE), s1_m = c(TRUE, FALSE),
s2_p = c(TRUE, FALSE), s2_m = c(TRUE, FALSE))
# 对于每种重组组合进行遍历
for (i in 1:nrow(recomb)) {
rc <- recomb[i, ] # 每一行的格式类似:TRUE TRUE TRUE TRUE,表示4条haplotype都重组
recomb_prob <- (c^sum(unlist(rc))) * ((1 - c)^(4 - sum(unlist(rc)))) # 概率
recomb_type <- sprintf(
"S1:%s%s|S2:%s%s",
ifelse(rc$s1_p, "R", "N"),
ifelse(rc$s1_m, "R", "N"),
ifelse(rc$s2_p, "R", "N"),
ifelse(rc$s2_m, "R", "N")
)
sib_type_str1 <- paste(sib_pair$genotypes, collapse = "_")
# 计算每个同胞的l位点等位基因概率
# 同胞1父源
s1_p_prob <- get_conditional_prob(sib1_p_allele, rc$s1_p)
# 同胞1母源
s1_m_prob <- get_conditional_prob(sib1_m_allele, rc$s1_m)
# 同胞1的l位点基因型概率
s1_BB <- s1_p_prob["B"] * s1_m_prob["B"]
s1_Bb <- s1_p_prob["B"] * s1_m_prob["b"] + s1_p_prob["b"] * s1_m_prob["B"]
s1_bb <- s1_p_prob["b"] * s1_m_prob["b"]
# 同胞2父源
s2_p_prob <- get_conditional_prob(sib2_p_allele, rc$s2_p)
# 同胞2母源
s2_m_prob <- get_conditional_prob(sib2_m_allele, rc$s2_m)
# 同胞2的l位点基因型概率
s2_BB <- s2_p_prob["B"] * s2_m_prob["B"]
s2_Bb <- s2_p_prob["B"] * s2_m_prob["b"] + s2_p_prob["b"] * s2_m_prob["B"]
s2_bb <- s2_p_prob["b"] * s2_m_prob["b"]
# 遍历所有可能的l位点组合
for (s1_l in c("BB", "Bb", "bb")) {
for (s2_l in c("BB", "Bb", "bb")) {
sib_type_str2 <- paste(c(s1_l,s2_l), collapse = "_")
# 获取概率
prob_s1_l <- switch(s1_l, BB = s1_BB, Bb = s1_Bb, bb = s1_bb) # sib1第二个位点的基因型概率
prob_s2_l <- switch(s2_l, BB = s2_BB, Bb = s2_Bb, bb = s2_bb) # sib2第二个位点的基因型概率
joint_prob_l <- prob_s1_l * prob_s2_l # 组合的概率
# 计算l位点IBS得分乘积
s1_l_score <- corrected_value(s1_l, p2)
s2_l_score <- corrected_value(s2_l, p2)
l_product <- s1_l_score * s2_l_score
# 总得分乘积和概率
total_product <- k_product * l_product
total_prob <- mating_freq * sib_prob * recomb_prob * joint_prob_l # 经历所有历程之后总的概率
# 累加到总期望
total_expectation <- total_expectation + total_product * total_prob
new_row <- data.frame(
MatingType = mating_type_str,
SibTypeK = sib_type_str1,
SibTypeL=sib_type_str2,
RecombinationType = recomb_type,
TotalProbability = total_prob,
OmegaK = k_product,
OmegaL = l_product,
OmegaProduct = total_product,
TotalExpection = total_product * total_prob
)
result_df <- rbind(result_df, new_row) # 存储结果
}
}
}
}
}
cat("期望值 E(Ωk Ωl) =", total_expectation, "\n")
## 期望值 E(Ωk Ωl) = 0.1724074