library(ape)

count_tips <- function(t) length(t$tip.label)

pd <- function(t) sum(t$edge.length)

induced_subtree <- function(t,tip) {
  drop.tip(t, setdiff(t$tip.label, tip))
}

node_length_depth_by_label <- function(t, lab) {
  node.depth.edgelength(t)[which(t$tip.label == lab)]
}

# even though we reorder edges here, the output is in terms of nodes, which don't change
count_node_descendants <- function(t) {
  t = reorder(t, "postorder")
  ntips = count_tips(t)
  res = integer(max(t$edge))
  res[1:ntips] = 1L
  parent = t$edge[,1]
  child = t$edge[,2]
  for(i in 1:nrow(t$edge)){
    res[parent[i]] = res[parent[i]] + res[child[i]]
  }
  res
}

# note that edge order for result is whatever t's order is
count_edge_descendants <- function(t) {
  node_descendants <- count_node_descendants(t)
  # assuming that the right column is the node below the edge in a rooted tree
  sapply(t$edge[,2], function(n) node_descendants[[n]])
}

erpdwr <- function(t,k) {
  beta = count_edge_descendants(t)/(count_tips(t))
  pd(t) - (t$edge.length %*% ((1-beta)^k))[[1,1]]
}

induced_subtree_pd <- function(t, keep_tips) {
  if(length(keep_tips) == 1)
    node_length_depth_by_label(t,keep_tips)
  else
    pd(induced_subtree(t, keep_tips))
}

# k is the number to keep, without replacement
random_subtree_pd_with_replace <- function(t,k) {
  keep_tips <- unique(sample(t$tip.label, k, replace=TRUE))
  induced_subtree_pd(t, keep_tips)
}

check_erpdwr <- function(t, k, reps) {
  trials = integer(reps)
  mean(sapply(trials, function(dummy) random_subtree_pd_with_replace(t,k)))
}

Test with a random tree:

set.seed(1)
y = rtree(9)
plot(y, show.tip.label=FALSE)
nodelabels();
tiplabels();

plot of chunk unnamed-chunk-2

Unfortunately the simulated and the calculated values are not the same:

check_erpdwr(y,3,1000)

## [1] 3.841

erpdwr(y,3)

## [1] 4.146