library(ape)
count_tips <- function(t) length(t$tip.label)
pd <- function(t) sum(t$edge.length)
induced_subtree <- function(t,tip) {
drop.tip(t, setdiff(t$tip.label, tip))
}
node_length_depth_by_label <- function(t, lab) {
node.depth.edgelength(t)[which(t$tip.label == lab)]
}
# even though we reorder edges here, the output is in terms of nodes, which don't change
count_node_descendants <- function(t) {
t = reorder(t, "postorder")
ntips = count_tips(t)
res = integer(max(t$edge))
res[1:ntips] = 1L
parent = t$edge[,1]
child = t$edge[,2]
for(i in 1:nrow(t$edge)){
res[parent[i]] = res[parent[i]] + res[child[i]]
}
res
}
# note that edge order for result is whatever t's order is
count_edge_descendants <- function(t) {
node_descendants <- count_node_descendants(t)
# assuming that the right column is the node below the edge in a rooted tree
sapply(t$edge[,2], function(n) node_descendants[[n]])
}
erpdwr <- function(t,k) {
beta = count_edge_descendants(t)/(count_tips(t))
pd(t) - (t$edge.length %*% ((1-beta)^k))[[1,1]]
}
induced_subtree_pd <- function(t, keep_tips) {
if(length(keep_tips) == 1)
node_length_depth_by_label(t,keep_tips)
else
pd(induced_subtree(t, keep_tips))
}
# k is the number to keep, without replacement
random_subtree_pd_with_replace <- function(t,k) {
keep_tips <- unique(sample(t$tip.label, k, replace=TRUE))
induced_subtree_pd(t, keep_tips)
}
check_erpdwr <- function(t, k, reps) {
trials = integer(reps)
mean(sapply(trials, function(dummy) random_subtree_pd_with_replace(t,k)))
}
Test with a random tree:
set.seed(1)
y = rtree(9)
plot(y, show.tip.label=FALSE)
nodelabels();
tiplabels();
Unfortunately the simulated and the calculated values are not the same:
check_erpdwr(y,3,1000)
## [1] 3.841
erpdwr(y,3)
## [1] 4.146