A first exercise

  1. The normal one…
p = proc.time()
n_sim = 1000
n_trees = 100
MP = matrix(nrow=n_sim, ncol=3)
RP = matrix(nrow=n_sim, ncol=3)
for(j in 1:n_sim){
  st = sim_phyl(seed=j)
  p <- subplex(par = c(8,0.175,0.9),fn = llik,n = st$n, E = st$E, t = st$t)$par
  RP[j,] = p
  sit = dmea::drop.fossil(st$newick)
  sit = phylo2p(sit)
  trees = sim_srt(tree=sit, pars=p, parallel = F, n_trees = 100)
  pars = subplex(par = c(8,0.175,0.9), fn = llik_st , setoftrees = trees, impsam = FALSE)$par
  MP[j,] = pars
}
par_est_vis(P=MP,par=1,PR=RP)
par_est_vis(P=MP,par=2,PR=RP)
par_est_vis(P=MP,par=3,PR=RP)
print(proc.time()-p)

This one took toooo much time..

  1. Now the same, but trying to run it in parallel
no_cores <- detectCores()- 1 
cl <- makeCluster(no_cores)
registerDoParallel(cl)


n_sim = 100
n_trees = 10
MP = matrix(nrow=n_sim,ncol=3)
RP = matrix(nrow=n_sim,ncol=3)
p = proc.time()
ests <- foreach(i = 1:n_sim, .combine=data.frame,.packages='dmea') %dopar% sim_est(n_trees=n_trees)
print(proc.time()-p)
##    user  system elapsed 
##   0.144   0.008  85.038
for (i in 1:n_sim){
  RP[i,] = ests[,(2*i-1)]
  MP[i,] = ests[,2*i]
}
par_est_vis(P=MP,par=1,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP,par=2,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP,par=3,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stopCluster(cl)
  1. Now the same, but more simulations in parallel
p = proc.time()
no_cores <- detectCores()- 1 
cl <- makeCluster(no_cores)
registerDoParallel(cl)

n_sim = 1000
n_trees = 10
MP = matrix(nrow=n_sim,ncol=3)
RP = matrix(nrow=n_sim,ncol=3)

ests <- foreach(i = 1:n_sim, .combine=data.frame,.packages='dmea') %dopar% {  
  set.seed(i)
  sim_est(n_trees=n_trees)
}
for (i in 1:n_sim){
  RP[i,] = ests[,(2*i-1)]
  MP[i,] = ests[,2*i]
}
par_est_vis(P=MP,par=1,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP,par=2,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP,par=3,PR=RP)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stopCluster(cl)
print(proc.time()-p)
##    user  system elapsed 
##   4.440   0.064 804.806
summary(MP)
##        V1               V2                V3         
##  Min.   :0.4189   Min.   :0.00000   Min.   :0.03735  
##  1st Qu.:0.7159   1st Qu.:0.01507   1st Qu.:0.07697  
##  Median :0.7954   Median :0.01717   Median :0.08777  
##  Mean   :0.7978   Mean   :0.01726   Mean   :0.08918  
##  3rd Qu.:0.8742   3rd Qu.:0.01928   3rd Qu.:0.09908  
##  Max.   :1.2042   Max.   :0.02933   Max.   :0.18025
summary(RP)
##        V1               V2                V3         
##  Min.   :0.4558   Min.   :0.00000   Min.   :0.04457  
##  1st Qu.:0.7490   1st Qu.:0.01609   1st Qu.:0.08800  
##  Median :0.8305   Median :0.01830   Median :0.09862  
##  Mean   :0.8432   Mean   :0.01853   Mean   :0.09938  
##  3rd Qu.:0.9307   3rd Qu.:0.02089   3rd Qu.:0.11072  
##  Max.   :1.3472   Max.   :0.03163   Max.   :0.17832
  1. Now more trees for each iteration
p = proc.time()
no_cores <- detectCores()- 1 
cl <- makeCluster(no_cores)
registerDoParallel(cl)

n_sim = 1000
n_trees = 100
MP2 = matrix(nrow=n_sim,ncol=3)
RP2 = matrix(nrow=n_sim,ncol=3)

ests <- foreach(i = 1:n_sim, .combine=data.frame,.packages='dmea') %dopar% {  
  set.seed(i)
  sim_est(n_trees=n_trees)
}
for (i in 1:n_sim){
  RP2[i,] = ests[,(2*i-1)]
  MP2[i,] = ests[,2*i]
}
par_est_vis(P=MP2,par=1,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP2,par=2,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP2,par=3,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stopCluster(cl)
print(proc.time()-p)
##     user   system  elapsed 
##    5.528    0.064 7903.931
summary(MP2)
##        V1               V2                 V3         
##  Min.   :0.4222   Min.   :0.006073   Min.   :0.04381  
##  1st Qu.:0.6826   1st Qu.:0.014050   1st Qu.:0.07752  
##  Median :0.7398   Median :0.015744   Median :0.08756  
##  Mean   :0.7446   Mean   :0.015794   Mean   :0.08908  
##  3rd Qu.:0.8054   3rd Qu.:0.017467   3rd Qu.:0.09924  
##  Max.   :1.0666   Max.   :0.026789   Max.   :0.19743
summary(RP2)
##        V1               V2                V3         
##  Min.   :0.4558   Min.   :0.00000   Min.   :0.04457  
##  1st Qu.:0.7490   1st Qu.:0.01609   1st Qu.:0.08800  
##  Median :0.8305   Median :0.01830   Median :0.09862  
##  Mean   :0.8432   Mean   :0.01853   Mean   :0.09938  
##  3rd Qu.:0.9307   3rd Qu.:0.02089   3rd Qu.:0.11072  
##  Max.   :1.3472   Max.   :0.03163   Max.   :0.17832

This is strange, with 100 trees for monte-carlo simulation we introduce some biases to lambda and beta that with 10 trees MC we did not have.

  1. Does it makes sense to run 1 tree monte carlo?
p = proc.time()
no_cores <- detectCores()- 1 
cl <- makeCluster(no_cores)
registerDoParallel(cl)

n_sim = 1000
n_trees = 1
MP2 = matrix(nrow=n_sim,ncol=3)
RP2 = matrix(nrow=n_sim,ncol=3)

ests <- foreach(i = 1:n_sim, .combine=data.frame,.packages='dmea') %dopar% {  
  set.seed(i)
  sim_est(n_trees=n_trees)
}
for (i in 1:n_sim){
  RP2[i,] = ests[,(2*i-1)]
  MP2[i,] = ests[,2*i]
}
par_est_vis(P=MP2,par=1,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP2,par=2,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

par_est_vis(P=MP2,par=3,PR=RP2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

stopCluster(cl)
print(proc.time()-p)
##    user  system elapsed 
##   5.572   0.080 244.471