For the R functions we need
Following previous notation we refer to \(\mathcal{X}_k(y)\) to the set of complete trees that are in agreement with the observed tree \(y\) and has \(k\) missing species. So,
\[ \mathcal{X}(y) = \cup_{k=0}^\infty \mathcal{X}_k(y)\]
Moreover, we know that
\[ \mathcal{X}_k(y) = \mathcal{T}_k \times \mathbb{R}^{2*d} \]
where \(\mathcal{T}_k\) is the set of possible sequences of missing events, the size of \(\mathcal{T}_k\) set is given by the catalan number
\[ c_k = \frac{1}{k+1}\binom{2k}{k} \]
To calculate the probability that the resconstructed tree \(y\) has \(k\) missing species we follow the uniform algorithm
Calculate \(\mathcal{T}_k\), count all elements on it.
sample \(s\) uniformly from \(\mathcal{T}_k\)
Sample \(2k\) uniform branching times \(u_1,...,u_{2k}\) in \([0,T]\), sort them and then assign them speciation or extinction status according to \(s\)
We then consider the probability of sampling a tree \(x_i\) on this squeme which is
\[ g_k(x_i|y) = \frac{1}{c_n}\left(\frac{1}{T}\right)^{2d} (2d)! \]
Base on that, we can approximate the probability that the reconstructed tree \(y\) has \(k\) missing species
\[ f(x \in \mathcal{X}_k|y) \approx \frac{1}{m}\sum_{x_i \sim g_k} \frac{f(x_i,y)}{g_k(x_i|y)} = \frac{c_nT^{2d}}{(2d)!m}\sum_{x_i \sim g_k} f(x_i,y)\]
We can test that:
get.k.tops <- function(d,obt,nsim=10){
#print(paste("Getting topologies for", d," missing species"))
top = get.topologies(d)
sumofllik = vector(mode="numeric",length=ncol(top))
#print(paste("Drawing uniform trees for ",ncol(top),"topologies"))
for(j in 1:ncol(top)){
topi = top[,j]
topi = replicate(nsim,topi,simplify = F)
ntree = lapply(topi,sim.unif.tree.df,obt=obt) # simulate unif random tree given topology
nlliks = lapply(ntree,weight.unif,pars=pars,d=d,ct=max(obt))
sumofllik[j] = Reduce("+", nlliks)
}
sumofllik = sum(sumofllik)
return(sumofllik/(ncol(top)*nsim))
}
weight.unif <- function(tree,pars,d,ct){
lik.tree(pars,tree,initspec = 1,topology = F)*((catalan(d)*(ct*(2*d)))/(factorial(2*d)))
}
sim.unif.mtree <- function(top,obt){
nmbt = length(top) #length of missing bt
mbt = sort(runif(nmbt,min=0,max=max(obt)))
df = data.frame(bt=c(mbt,obt),to=c(top,rep(2,length(obt))))
df = df[order(df$bt),]
return(df)
}
sim.unif.tree.df <- function(top,obt){
df = df2tree2(sim.unif.mtree(top,obt))
return(df)
}
time = proc.time()
pars = c(0.2,0.1)
### for comparison with rampal method
obt = 1
pars = c(pars,Inf)
dim = 7
int.d = vector(mode="numeric",length = 9)
print(paste("for 0 missing species the probability is",exp(-nllik.tree(pars,tree=list(wt=diff(c(0,obt)),to=rep(1,length(obt)-1)),initspec = 1,topology = F))))
## [1] "for 0 missing species the probability is 0.740818220681718"
###################
next think is to check probabilities of \(\mathcal{X}_1,\mathcal{X}_2,...,\mathcal{X}_M\)
for(i in 1:dim){
int.d[i] = get.k.tops(nsim=100,obt=obt,d = i)
}
sum(int.d)
## [1] 0.01015096
int.d
## [1] 1.008924e-02 6.157530e-05 1.440317e-07 1.814700e-10 1.427348e-13
## [6] 7.772776e-17 3.090966e-20 0.000000e+00 0.000000e+00
get.time(time)
## [1] 16.944
dd_loglik_rhsEA2009 <- function(t,x,pars)
{
la <- pars[1]
mu <- pars[2]
nx <- sqrt(length(x))
dim(x) <- c(nx,nx)
xx <- matrix(0,nx+2,nx+2)
xx[2:(nx+1),2:(nx+1)] <- x
nl <- t(array(0:(nx - 1), dim = c(nx,nx)))
dx <- mu * (nl + 1) * xx[1:nx,3:(nx+2)] + la * (nl - 1) * xx[2:(nx+1),1:(nx)] - (la + mu) * nl * xx[2:(nx+1),2:(nx+1)]
dim(dx) <- c(nx^2,1)
return(list(dx))
}
dd_loglik_EA2009 <- function(pars,lx,age)
{
p <- array(0,dim = c(lx,lx))
p[1,2] <- 1
dim(p) <- c(lx^2,1)
methode <- 'ode45'
rtol <- 1E-10
atol <- 1E-10
y <- ode(p,c(0,age),dd_loglik_rhsEA2009,parms = pars,rtol = rtol,atol = atol,method = methode)[2,2:(lx^2 + 1)]
dim(y) <- c(lx,lx)
return(y)
}
lx <- 9
age <- 1
pars <- c(0.2,0.1)
pNENL <- dd_loglik_EA2009(pars = pars,lx = lx,age = age)
pNENL
## [,1] [,2] [,3] [,4] [,5]
## [1,] 0.000000e+00 7.408182e-01 1.280044e-01 2.211760e-02 3.821653e-03
## [2,] 8.639393e-02 1.343950e-02 3.057232e-03 6.552600e-04 1.351663e-04
## [3,] 4.961060e-04 1.609181e-04 4.633716e-05 1.193801e-05 2.868076e-06
## [4,] 3.508038e-06 1.628988e-06 5.727376e-07 1.728352e-07 4.742042e-08
## [5,] 2.518233e-08 1.512068e-08 6.303184e-09 2.182868e-09 6.734557e-10
## [6,] 1.810672e-10 1.332543e-10 6.436058e-11 2.516934e-11 8.114057e-12
## [7,] 1.302228e-12 1.134702e-12 6.236493e-13 2.566033e-13 8.008002e-14
## [8,] 9.366570e-15 9.430019e-15 5.489566e-15 2.203077e-15 6.394020e-16
## [9,] 6.737713e-17 7.260250e-17 4.148291e-17 1.558860e-17 4.154739e-18
## [,6] [,7] [,8] [,9]
## [1,] 6.603353e-04 1.140979e-04 1.971473e-05 3.406463e-06
## [2,] 2.714698e-05 5.345858e-06 1.036908e-06 1.450355e-07
## [3,] 6.564956e-07 1.450035e-07 2.707409e-08 3.177024e-09
## [4,] 1.218039e-08 2.724257e-09 4.600150e-10 4.644674e-11
## [5,] 1.788531e-10 3.753164e-11 5.642514e-12 4.995628e-13
## [6,] 2.062174e-12 3.934412e-13 5.278509e-14 4.158588e-15
## [7,] 1.875478e-14 3.233062e-15 3.900663e-16 2.766767e-17
## [8,] 1.365462e-16 2.134309e-17 2.335248e-18 1.505494e-19
## [9,] 8.098929e-19 1.155020e-19 1.154933e-20 6.819864e-22
pNL <- colSums(pNENL)
ENL <- (0:(lx -1)) %*% pNL
pNL
## [1] 8.689357e-02 7.544203e-01 1.311085e-01 2.278498e-02 3.959736e-03
## [6] 6.881511e-04 1.195915e-04 2.077918e-05 3.554723e-06
print(sum(pNENL))
## [1] 0.9999992
pNENL[,1]
## [1] 0.000000e+00 8.639393e-02 4.961060e-04 3.508038e-06 2.518233e-08
## [6] 1.810672e-10 1.302228e-12 9.366570e-15 6.737713e-17
sum(pNENL[,1])
## [1] 0.08689357
comparing
int.d
## [1] 1.008924e-02 6.157530e-05 1.440317e-07 1.814700e-10 1.427348e-13
## [6] 7.772776e-17 3.090966e-20 0.000000e+00 0.000000e+00
pNENL[,2]
## [1] 7.408182e-01 1.343950e-02 1.609181e-04 1.628988e-06 1.512068e-08
## [6] 1.332543e-10 1.134702e-12 9.430019e-15 7.260250e-17
Xd.mc <- function(d,obt,nsim=10,pars){
#print(paste("Getting topologies for", d," missing species"))
top = get.topologies(d)
sumofllik = vector(mode="numeric",length=ncol(top))
#print(paste("Drawing uniform trees for ",ncol(top),"topologies"))
for(j in 1:ncol(top)){
topi = top[,j]
topi = replicate(nsim,topi,simplify = F)
ntree = lapply(topi,sim.unif.tree.df,obt=obt) # simulate unif random tree given topology
nlliks = lapply(ntree,weight.unif,pars=pars,d=d,ct=max(obt))
sumofllik[j] = Reduce("+", nlliks)
}
sumofllik = sum(sumofllik)
return(sumofllik/(ncol(top)*nsim))
}
weight.unif <- function(tree,pars,d,ct){
lik.tree(pars,tree,initspec = 1,topology = F)*((catalan(d)*(ct*(2*d)))/(factorial(2*d)))
}
sim.unif.mtree <- function(top,obt){
nmbt = length(top) #length of missing bt
mbt = sort(runif(nmbt,min=0,max=max(obt)))
df = data.frame(bt=c(mbt,obt),to=c(top,rep(2,length(obt))))
df = df[order(df$bt),]
return(df)
}
sim.unif.tree.df <- function(top,obt){
df = df2tree2(sim.unif.mtree(top,obt))
return(df)
}
pars = c(pars,Inf)
Xd.mc(d=1,obt = 1,nsim = 10,pars = pars)
## [1] 0.01011338
Xd.mc(d=1,obt = 1,nsim = 10,pars = pars)
## [1] 0.01006089
Xd.mc(d=1,obt = 1,nsim = 10,pars = pars)
## [1] 0.009924435
Xd.mc(d=1,obt = 1,nsim = 100,pars = pars)
## [1] 0.009987545
Xd.mc(d=1,obt = 1,nsim = 1000,pars = pars)
## [1] 0.009990221
Xd.mc(d=1,obt = 1,nsim = 1000,pars = pars)
## [1] 0.009889434