Questions we want to answer

Do we fairly approximate the normalization constants correctly?
What is the relationship of the estimated approximattes constant vs the empirical one?
Are the same relationships happening on the lambda-plane?
Is it this tree dependent?
how does influence to add the last probability event on the likelihood?

For the likelihood to be normalized we need to calculate the normalization constant

\[ C = \int_{x = \mathcal{X}} f(x|\theta) dx = \int_{x = \mathcal{X}} f(x|\theta) \frac{g(x|\theta)}{g(x|\theta)} = E_{x \sim g} \left[ \frac{f(x|\theta)}{g(x|\theta)} \right] \approx \frac{1}{m} \sum_{i=1}^m \frac{f(x_i|\theta)}{g(x_i|\theta)}\]

where \(g\) is described as

\[g(x|y) = \left[ N_e!\frac{(\lambda-\mu)^{N_e}e^{-(\lambda-\mu)}}{N_e!}\left(\frac{1}{T} \right)^{N_e} \prod_{i=1}^{N_e} \frac{1}{T-T_i} \prod_{j=1}^{N_e} \frac{1}{n_j}\right] \left[ N_p!\frac{(\lambda-\mu)^{N_p}e^{-(\lambda-\mu)}}{N_p!}\left(\frac{1}{T} \right)^{N_p} \prod_{j=1}^{N_p} \frac{1}{n_j}\right] \]

and the DAA algorithm is

s1. Draw \(N_e\) from poisson

s2. Draw \(N_p\) from poisson

s3. Draw \(N_e\) speciation times and extinction times from uniforms

s4. Draw \(N_p\) speciation times from uniform

s5. that’s it.

norma.const <- function(lambda,mu,CT,n,truncdim = FALSE){
  lg = NULL
  lf = NULL
  for(i in 1:n){
    df = sim.tree.g(lambda,mu,CT) # simulation of tree
    lg[i] = g_samp_missobs(df,c(lambda,mu)) # calculate sampling probability 
    # and calculate joint probability 
    to = df$to[-nrow(df)]
    to[to==2] = 1
    lf[i] = -emphasis:::nllik.tree(pars=c(lambda,mu,Inf),tree=list(wt=diff(c(0,df$bt)),to=to),truncdim = truncdim)
  }
  # finally, get the approximation
  const.approx = sum(exp(lf-lg))/n
  variance = sum((exp(lf-lg)-const.approx)^2)/n
  return(list(nconst=const.approx,var=variance))
}


sim.tree.g <- function(lambda,mu,CT){
  Ne = rpois(1,(lambda+mu)*CT) # number of extinct species
  Np = rpois(1,(lambda-mu)*CT) # number of present species
  brts = runif(Np,min=0,max=CT) # branching times of speciation of present species
  ms = runif(Ne,min=0,max=CT) # branching times of speciation of extinct species
  me = runif(Ne,min=ms,max=CT) # branching times of extinctions
  # return matrix with branching times and topology
  to = c(rep(1,Ne),rep(0,Ne))
  df = data.frame(bt = c(ms,me),to=to)
  df = rbind(df,data.frame(bt=c(brts,CT),to=c(rep(2,length(brts)),2)))
  df = df[order(df$bt),]
  return(df)
}

g_samp_missobs <- function(df,pars){
  # sampling probability 
  last = nrow(df)
  ct = df[last,1]
  Ne = sum(df$to==0)
  Np = sum(df$to==2) - 1
  subdf_e = df[df$to==1,]
  subdf_p = df[df$to==2,]
  Ts = ct-subdf_e$bt
  to = df$to[-last]
  rto = to
  to[to==2] = 1
  n = c(2,2+cumsum(to)+cumsum(to-1))
  nm = n[c(rto,2)==1]
  log_dens_miss = dpois(Ne,lambda =(pars[1]+pars[2])*ct,log=TRUE)-Ne*log(ct)-sum(log(Ts))-sum(log(nm))+lgamma(Ne+1)
  ne = n[c(rto,0)==2]
  log_dens_obs = dpois(Np,lambda = (pars[1]-pars[2])*ct,log=TRUE)-Np*log(ct)-sum(log(ne))+lgamma(Np+1)
  log_dens = log_dens_miss+log_dens_obs
  return(log_dens)
}

time=proc.time()
nsim = 15
n=100000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
f.true <- NULL
f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = CT = 2
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n)
  norm[i] = nc$nconst
  vari[i] = nc$var
  f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE)
  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 921.283

plot(mu,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(mu,f_hat_e,col="red")

plot(mu,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(mu,-log(norm))

#calculating variance
vari

##  [1] 129.19902198 509.84644285  47.32715165 167.61694980 443.74660884
##  [6] 295.33508619 214.67190544   0.69854688   4.35923800   1.36775041
## [11]  15.38865500   0.57046930   2.15935322   0.13602734   0.06476124

plot(mu,f.true - f_hat_e + log(norm))

df1 = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^5")

time=proc.time()
nsim = 15
n=1000000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
#f.true <- NULL
#f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = CT = 2
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n)
  norm[i] = nc$nconst
  vari[i] = nc$var
 # f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
#  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE)
#  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 33362.09

plot(mu,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(mu,f_hat_e,col="red")

plot(mu,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(mu,-log(norm))

#calculating variance
vari

##  [1]  143.430546  530.062933 1041.773881  925.650624 3565.821124
##  [6]  288.781613  146.314666   14.180789   33.483505  389.122374
## [11]   21.658820  146.833882    8.669815  242.198508    6.127648

plot(mu,f.true - f_hat_e + log(norm))

df = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^6")
ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var))+geom_point()

ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat))+geom_point(aes(x=mu,y=-norm, size=var))

df = rbind(df1,df)
ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat))+geom_point(aes(x=mu,y=-norm, size=var,col=ss)) # + geom_text()

ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var,col=ss))+geom_point(aes(size=var))+ geom_text(hjust=-0.8, vjust=-0.8,aes(label=as.character(round(var,digits = 1)),size=90))

df2s=df

longer tree

time=proc.time()
nsim = 15
n=100000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
f.true <- NULL
f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = 5:1
CT = 5
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n)
  norm[i] = nc$nconst
  vari[i] = nc$var
  f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE)
  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 1028.703

plot(mu,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(mu,f_hat_e,col="red")

plot(mu,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(mu,-log(norm))

#calculating variance
vari

##  [1] 1.068269e-01 1.122355e-02 1.107358e-02 2.760442e-01 6.160086e-04
##  [6] 5.350459e-03 1.383222e-03 2.959724e-03 1.535594e-04 1.610463e-01
## [11] 9.887375e-02 2.312514e-02 7.490132e-05 5.218419e-05 9.571961e-07

plot(mu,f.true - f_hat_e + log(norm))

df1 = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^5")

time=proc.time()
nsim = 15
n=1000000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
#f.true <- NULL
#f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = 5:1
CT = 5
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n)
  norm[i] = nc$nconst
  vari[i] = nc$var
 # f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
#  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE)
#  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 8492.463

#calculating variance
vari

##  [1] 2.024753e-01 4.856171e-01 1.036236e+00 4.808018e-02 1.612979e-01
##  [6] 2.872299e-02 6.366715e-03 4.365125e-02 3.457030e-03 5.885286e-03
## [11] 3.758576e-04 6.476092e-04 1.549889e-04 7.135274e-05 3.443154e-05

plot(mu,f.true - f_hat_e + log(norm))

df = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^6")
ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var))+geom_point()

ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat))+geom_point(aes(x=mu,y=-norm, size=var))

df = rbind(df1,df)
ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat))+geom_point(aes(x=mu,y=-norm, size=var,col=ss)) # + geom_text()

ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var,col=ss))+geom_point(aes(size=var))+ geom_text(hjust=-0.8, vjust=-0.8,aes(label=as.character(round(var,digits = 3)),size=3))

df2=df

observing lamba

time=proc.time()
nsim = 15
n=100000
lambda = seq(0.11,1,length=nsim)
mu = 0.1
norm = NULL
vari = NULL
f.true <- NULL
f_hat_e <- NULL
pars = c(1,mu,9e+9999999)
brts = 5:1
CT = 5
for(i in 1:nsim){
  pars[1] = lambda[i] 
  nc = norma.const(lambda[i],mu,CT,n)
  norm[i] = nc$nconst
  vari[i] = nc$var
  f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE)
  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 878.264

plot(lambda,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(lambda,f_hat_e,col="red")

plot(lambda,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(lambda,-log(norm))

#calculating variance
vari
plot(lambda,f.true - f_hat_e + log(norm))
df1 = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^5")

No topology & simpler sampling algorithm

removing the last bit

time=proc.time()
nsim = 15
n=100000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
f.true <- NULL
f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = 5:1
CT = 5
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n,truncdim = TRUE)
  norm[i] = nc$nconst
  vari[i] = nc$var
  f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
  S = emphasis:::sim.sct(brts,pars,1000,print = FALSE,truncdim = TRUE)
  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 932.283

plot(mu,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(mu,f_hat_e,col="red")

plot(mu,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(mu,-log(norm))

#calculating variance
vari

##  [1]  239549.36 2645536.74 2133635.98   37191.70 2569712.90  156124.30
##  [7]   96951.07   74916.61  119249.18  127840.98   27307.24  908794.44
## [13]   21147.61   30575.31  269323.75

plot(mu,f.true - f_hat_e + log(norm))

df3 = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^5",lb="no")
df2$lb = "yes"
df = rbind(df2,df3)
ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var,col=ss))+geom_point(aes(size=var,shape=lb))+ geom_text(hjust=-0.8, vjust=-0.8,aes(label=as.character(round(var,digits = 3)),size=1.8))

ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat,col=lb))+geom_point(aes(x=mu,y=-norm, size=var,col=ss,shape=lb))

time=proc.time()
nsim = 15
n=1000000
mu = seq(0.1,0.8,length=nsim)
lambda = 1
norm = NULL
vari = NULL
f.true <- NULL
f_hat_e <- NULL
pars = c(1,NaN,9e+9999999)
brts = 5:1
CT = 5
for(i in 1:nsim){
  pars[2] = mu[i] 
  nc = norma.const(lambda,mu[i],CT,n,truncdim = TRUE)
  norm[i] = nc$nconst
  vari[i] = nc$var
  f.true[i]<- DDD:::dd_loglik(pars1 = pars, pars2 =  emphasis:::pars2,brts = brts, missnumspec = 0)
  S = emphasis:::sim.sct(brts,pars,10000,print = FALSE,truncdim = TRUE)
  f_hat_e[i] <- log(mean(S$w))
}
emphasis:::get.time(time)

## [1] 8686.619

plot(mu,f.true,type="l",ylim = c(min(f.true,f_hat_e),max(f.true,f_hat_e)))
points(mu,f_hat_e,col="red")

plot(mu,f.true - f_hat_e,type="l",ylim=c(min(-log(norm),f.true- f_hat_e),max(-log(norm),f.true- f_hat_e)))
points(mu,-log(norm))

#calculating variance
vari

##  [1]  2080573.1   259115.1   570046.5  4581525.5  3242043.2  3655047.3
##  [7]  1288931.6   757914.7   193709.2   166683.2   495086.8   376502.4
## [13]   111158.1 79484637.1  1484293.7

plot(mu,f.true - f_hat_e + log(norm))

df4 = data.frame(mu=mu,f.true=f.true,f.hat=f_hat_e,norm=log(norm),var=vari,ss="10^6",lb="no")
df = rbind(df,df4)
ggplot(data=df, aes(x=mu,y=f.true-f.hat+norm,size=var,col=ss))+geom_point(aes(size=var,shape=lb))+ geom_text(hjust=-0.8, vjust=-0.8,aes(label=as.character(round(var,digits = 3)),size=1.8))

ggplot(data=df) + geom_line(aes(x=mu,y=f.true-f.hat,col=lb))+geom_point(aes(x=mu,y=-norm, size=var,col=ss,shape=lb))

14Nov2018

Questions we want to answer

longer tree

observing lamba

No topology & simpler sampling algorithm

removing the last bit