time = proc.time()
seed = 3
s = sim_phyl(seed=3)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = F, impsam = T,tol=0.01)
## [1] "iteration # 1 :"
## [1] 1.8 0.6 80.0
## [1] "iteration # 2 :"
## [1] 1.4842506 0.5207339 85.0305292
## [1] "iteration # 3 :"
## [1] 1.2192968 0.5125791 84.6833348
## [1] "iteration # 4 :"
## [1] 1.1936977 0.4731993 86.3697085
## [1] "iteration # 5 :"
## [1] 1.068568 0.423197 81.356096
## [1] "iteration # 6 :"
## [1] 1.0022360 0.3461626 76.2385238
## [1] "iteration # 7 :"
## [1] 0.7566864 0.3218578 74.8233132
## [1] "iteration # 8 :"
## [1] 0.7189199 0.2733859 70.7821735
## [1] "iteration # 9 :"
## [1] 0.6959333 0.2312761 65.4365753
## [1] "iteration # 10 :"
## [1] 0.6853191 0.1933207 56.7717044
## [1] "iteration # 11 :"
## [1] 0.6580751 0.1397195 51.9871795
## [1] "iteration # 12 :"
## [1] 0.7782279 0.1049570 46.7171976
## [1] "iteration # 13 :"
## [1] 0.55532981 0.09431441 43.99874741
## [1] "iteration # 14 :"
## [1] 0.58056037 0.07290122 41.97261890
## [1] "iteration # 15 :"
## [1] 0.70374226 0.06355204 38.20742836
## [1] "iteration # 16 :"
## [1] 0.65611372 0.04316876 36.44853944
## [1] "iteration # 17 :"
## [1] 0.50578122 0.02898578 37.28834490
## [1] "iteration # 18 :"
## [1] 0.41548191 0.02073383 39.29751273
## [1] "iteration # 19 :"
## [1] 0.3985476 0.0106999 39.5556110
## [1] "iteration # 20 :"
## [1] 0.371649626 0.003713163 41.040860097
## [1] "iteration # 21 :"
## [1] 0.352464908 0.000857078 41.649117144
## [1] "iteration # 22 :"
## [1] 3.485289e-01 7.682937e-06 4.184807e+01
## [1] "iteration # 23 :"
## [1] 3.485040e-01 7.407260e-17 4.184898e+01
## [1] "iteration # 24 :"
## [1] 3.485039e-01 3.361365e-17 4.184899e+01
## [1] "iteration # 25 :"
## [1] 3.485039e-01 8.980709e-18 4.184899e+01
## [1] "iteration # 26 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 27 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 28 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 29 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
## [1] "iteration # 30 :"
## [1] 3.485038e-01 1.050390e-18 4.184899e+01
print(proc.time()-time)
## user system elapsed
## 17.352 0.004 17.358
qplot(1:30,EM$pars[,2])
EM$pars
## [,1] [,2] [,3]
## [1,] 1.6826445 5.131967e-01 84.49220
## [2,] 1.2881443 4.833367e-01 83.75733
## [3,] 1.1303295 4.215241e-01 84.25198
## [4,] 1.0615753 4.011336e-01 79.57020
## [5,] 1.0438218 3.842778e-01 75.84205
## [6,] 0.8454106 3.229822e-01 81.24672
## [7,] 0.7304166 3.019546e-01 72.99091
## [8,] 0.9039366 2.730956e-01 66.81371
## [9,] 0.8543810 2.379271e-01 64.55642
## [10,] 0.7998869 2.063764e-01 55.54143
## [11,] 0.7763548 1.729484e-01 52.05320
## [12,] 0.8177746 1.536267e-01 46.29201
## [13,] 0.8366668 1.155314e-01 44.81956
## [14,] 0.6681238 8.438512e-02 44.55862
## [15,] 0.7038666 8.490554e-02 38.69240
## [16,] 0.5882912 4.809249e-02 38.56652
## [17,] 0.5153125 4.011941e-02 37.32253
## [18,] 0.4897349 2.787306e-02 37.44175
## [19,] 0.4066526 1.277044e-02 39.18647
## [20,] 0.3748614 4.705835e-03 40.49155
## [21,] 0.3502617 4.478507e-04 41.74687
## [22,] 0.3485157 2.887696e-06 41.84835
## [23,] 0.3485038 4.594623e-17 41.84900
## [24,] 0.3485038 7.845184e-18 41.84900
## [25,] 0.3485038 7.845184e-18 41.84900
## [26,] 0.3485038 7.845184e-18 41.84900
## [27,] 0.3485038 7.845184e-18 41.84900
## [28,] 0.3485038 7.845184e-18 41.84900
## [29,] 0.3485038 7.845184e-18 41.84900
## [30,] 0.3485038 7.845184e-18 41.84900
time = proc.time()
stat=NULL
phylo1 = s$newick.extant
for(i in 1:30){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
print(proc.time()-time)
## user system elapsed
## 109.712 0.028 109.873
qplot(1:30,stat)
what if fo the expected thing with 100 trees?
time = proc.time()
stat=NULL
for(i in 1:30){
expe = expectedLTT2(pars=EM$pars[i,],n_it=100)
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,(length(expe$bt)-1)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
print(proc.time()-time)
## user system elapsed
## 1088.716 0.168 1089.881
qplot(1:30,stat)
wait, that was with importance sampling, now without that
n_it=70
time = proc.time()
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = F, impsam = F,tol=0.01, n_it=n_it)
## [1] "iteration # 1 :"
## [1] 1.8 0.6 80.0
## [1] "iteration # 2 :"
## [1] 1.4687259 0.5109511 83.9367905
## [1] "iteration # 3 :"
## [1] 1.2354543 0.4524527 85.9787185
## [1] "iteration # 4 :"
## [1] 1.0885570 0.4125031 83.7872732
## [1] "iteration # 5 :"
## [1] 0.9940949 0.3743226 83.4062401
## [1] "iteration # 6 :"
## [1] 0.9299009 0.3437404 80.6332006
## [1] "iteration # 7 :"
## [1] 0.8770352 0.3098066 76.2143483
## [1] "iteration # 8 :"
## [1] 0.8097597 0.2776245 69.8307526
## [1] "iteration # 9 :"
## [1] 0.7881551 0.2528049 65.8456808
## [1] "iteration # 10 :"
## [1] 0.7974545 0.2343711 63.8426577
## [1] "iteration # 11 :"
## [1] 0.8096807 0.2184623 59.9496685
## [1] "iteration # 12 :"
## [1] 0.7963039 0.1963645 57.0823244
## [1] "iteration # 13 :"
## [1] 0.7389314 0.1773784 55.0472440
## [1] "iteration # 14 :"
## [1] 0.7641483 0.1617025 50.7431973
## [1] "iteration # 15 :"
## [1] 0.7741984 0.1440097 49.2333581
## [1] "iteration # 16 :"
## [1] 0.6836012 0.1295144 48.5280765
## [1] "iteration # 17 :"
## [1] 0.6978149 0.1226944 45.7937147
## [1] "iteration # 18 :"
## [1] 0.7313179 0.1095122 43.8770433
## [1] "iteration # 19 :"
## [1] 0.8724892 0.1003700 41.6371633
## [1] "iteration # 20 :"
## [1] 0.82028540 0.09371596 40.91410866
## [1] "iteration # 21 :"
## [1] 0.76927161 0.08361124 40.59825246
## [1] "iteration # 22 :"
## [1] 0.67135728 0.07663422 40.05942278
## [1] "iteration # 23 :"
## [1] 0.77832781 0.06553154 37.84457077
## [1] "iteration # 24 :"
## [1] 0.78770345 0.06545125 37.74017470
## [1] "iteration # 25 :"
## [1] 0.76422907 0.06202151 37.93118475
## [1] "iteration # 26 :"
## [1] 0.74606037 0.06077505 37.93305216
## [1] "iteration # 27 :"
## [1] 0.71416559 0.05624148 38.36826827
## [1] "iteration # 28 :"
## [1] 0.7072428 0.0582574 37.6734186
## [1] "iteration # 29 :"
## [1] 0.72305926 0.06010951 37.81696416
## [1] "iteration # 30 :"
## [1] 0.76956648 0.06043783 37.48630953
## [1] "iteration # 31 :"
## [1] 0.73362660 0.05663433 37.23176318
## [1] "iteration # 32 :"
## [1] 0.72083824 0.05583536 37.11367041
## [1] "iteration # 33 :"
## [1] 0.68886747 0.05150015 36.88106284
## [1] "iteration # 34 :"
## [1] 0.7127205 0.0506989 36.7601088
## [1] "iteration # 35 :"
## [1] 0.64434903 0.04746435 37.03155861
## [1] "iteration # 36 :"
## [1] 0.64740184 0.04224881 37.01371103
## [1] "iteration # 37 :"
## [1] 0.67716329 0.04802233 36.90074846
## [1] "iteration # 38 :"
## [1] 0.69207295 0.05004281 36.45681273
## [1] "iteration # 39 :"
## [1] 0.63378296 0.04739888 37.52700969
## [1] "iteration # 40 :"
## [1] 0.62858848 0.04502988 36.83182319
## [1] "iteration # 41 :"
## [1] 0.62849968 0.04352506 36.93523458
## [1] "iteration # 42 :"
## [1] 0.65642924 0.04711901 36.75218388
## [1] "iteration # 43 :"
## [1] 0.68779618 0.04913433 36.31085789
## [1] "iteration # 44 :"
## [1] 0.71830797 0.04843048 36.43859463
## [1] "iteration # 45 :"
## [1] 0.66533796 0.04548733 37.11412500
## [1] "iteration # 46 :"
## [1] 0.65688384 0.04301202 37.31367248
## [1] "iteration # 47 :"
## [1] 0.64390148 0.04234054 36.99772508
## [1] "iteration # 48 :"
## [1] 0.5770895 0.0412872 37.2369383
## [1] "iteration # 49 :"
## [1] 0.5353054 0.0376977 37.1807887
## [1] "iteration # 50 :"
## [1] 0.52022136 0.03166425 37.82516736
## [1] "iteration # 51 :"
## [1] 0.50451690 0.03076214 37.82028853
## [1] "iteration # 52 :"
## [1] 0.52485663 0.03251099 37.47260283
## [1] "iteration # 53 :"
## [1] 0.50059051 0.02932463 37.66703467
## [1] "iteration # 54 :"
## [1] 0.46773086 0.02303666 38.38598078
## [1] "iteration # 55 :"
## [1] 0.45769681 0.02058162 38.26449341
## [1] "iteration # 56 :"
## [1] 0.44438411 0.01941827 38.80611765
## [1] "iteration # 57 :"
## [1] 0.42232536 0.01766626 39.09161329
## [1] "iteration # 58 :"
## [1] 0.41664523 0.01661823 39.19990684
## [1] "iteration # 59 :"
## [1] 0.40629754 0.01494541 39.57194186
## [1] "iteration # 60 :"
## [1] 0.39844771 0.01135293 39.99976033
## [1] "iteration # 61 :"
## [1] 0.40288079 0.01016386 39.72117476
## [1] "iteration # 62 :"
## [1] 0.387260049 0.008882784 40.147021035
## [1] "iteration # 63 :"
## [1] 0.382414898 0.007965389 40.322196845
## [1] "iteration # 64 :"
## [1] 0.386174000 0.007500892 40.201903034
## [1] "iteration # 65 :"
## [1] 0.394299336 0.009228552 39.879275466
## [1] "iteration # 66 :"
## [1] 0.367316760 0.005084478 40.922945984
## [1] "iteration # 67 :"
## [1] 0.371033192 0.005091513 40.704360899
## [1] "iteration # 68 :"
## [1] 0.36878164 0.00538057 40.93377223
## [1] "iteration # 69 :"
## [1] 0.378192961 0.005741875 40.580185778
## [1] "iteration # 70 :"
## [1] 0.371278103 0.005439656 40.661981893
print(proc.time()-time)
## user system elapsed
## 20.684 0.004 20.699
qplot(1:n_it,EM$pars[,2])
stat=NULL
time=proc.time()
for(i in 1:n_it){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
print(proc.time()-time)
## user system elapsed
## 174.948 0.024 175.159
qplot(1:n_it,stat)
I am going to do the same but with 100 trees:
nice, I want to check for another tree
seed = 2
s = sim_phyl(seed = seed)
s2 = s$newick.extant.p
time=proc.time()
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80),n_trees = 10, parallel = T, impsam = T,tol=0.01)
## [1] "iteration # 1 :"
## [1] 1.8 0.6 80.0
## [1] "iteration # 2 :"
## [1] 1.3461830 0.5465302 81.5614811
## [1] "iteration # 3 :"
## [1] 1.2138246 0.5073566 76.1795179
## [1] "iteration # 4 :"
## [1] 1.0033049 0.4994802 78.9430244
## [1] "iteration # 5 :"
## [1] 0.8293162 0.4597334 81.6740216
## [1] "iteration # 6 :"
## [1] 0.7893348 0.4239375 69.3908200
## [1] "iteration # 7 :"
## [1] 0.7346098 0.3742524 65.9291583
## [1] "iteration # 8 :"
## [1] 0.8155829 0.3352657 55.9614470
## [1] "iteration # 9 :"
## [1] 0.8186718 0.2832650 50.2406139
## [1] "iteration # 10 :"
## [1] 0.8521416 0.2551442 46.9344381
## [1] "iteration # 11 :"
## [1] 0.7696572 0.1996186 46.3053002
## [1] "iteration # 12 :"
## [1] 0.8218108 0.1893510 43.4042169
## [1] "iteration # 13 :"
## [1] 0.7015924 0.1525135 41.6455866
## [1] "iteration # 14 :"
## [1] 0.5882046 0.1145913 38.5167894
## [1] "iteration # 15 :"
## [1] 0.46028620 0.07810912 37.74753205
## [1] "iteration # 16 :"
## [1] 0.38635507 0.06546688 38.79687303
## [1] "iteration # 17 :"
## [1] 0.31732085 0.03638115 49.25900691
## [1] "iteration # 18 :"
## [1] 0.25961305 0.01965025 64.26064281
## [1] "iteration # 19 :"
## [1] 0.232889866 0.007452604 82.856482212
## [1] "iteration # 20 :"
## [1] 0.23489907 0.00554337 77.17070133
## [1] "iteration # 21 :"
## [1] 0.235077077 0.005236152 79.031905645
## [1] "iteration # 22 :"
## [1] 2.226448e-01 4.626761e-05 9.216272e+01
## [1] "iteration # 23 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 24 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 25 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 26 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 27 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 28 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 29 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
## [1] "iteration # 30 :"
## [1] 2.225562e-01 8.803887e-18 9.225888e+01
print(proc.time()-time)
## user system elapsed
## 16.896 0.004 16.909
qplot(1:30,EM$pars[,2])
EM$pars
## [,1] [,2] [,3]
## [1,] 1.3461830 5.465302e-01 81.56148
## [2,] 1.2138246 5.073566e-01 76.17952
## [3,] 1.0033049 4.994802e-01 78.94302
## [4,] 0.8293162 4.597334e-01 81.67402
## [5,] 0.7893348 4.239375e-01 69.39082
## [6,] 0.7346098 3.742524e-01 65.92916
## [7,] 0.8155829 3.352657e-01 55.96145
## [8,] 0.8186718 2.832650e-01 50.24061
## [9,] 0.8521416 2.551442e-01 46.93444
## [10,] 0.7696572 1.996186e-01 46.30530
## [11,] 0.8218108 1.893510e-01 43.40422
## [12,] 0.7015924 1.525135e-01 41.64559
## [13,] 0.5882046 1.145913e-01 38.51679
## [14,] 0.4602862 7.810912e-02 37.74753
## [15,] 0.3863551 6.546688e-02 38.79687
## [16,] 0.3173208 3.638115e-02 49.25901
## [17,] 0.2596130 1.965025e-02 64.26064
## [18,] 0.2328899 7.452604e-03 82.85648
## [19,] 0.2348991 5.543370e-03 77.17070
## [20,] 0.2350771 5.236152e-03 79.03191
## [21,] 0.2226448 4.626761e-05 92.16272
## [22,] 0.2225562 8.803887e-18 92.25888
## [23,] 0.2225562 8.803887e-18 92.25888
## [24,] 0.2225562 8.803887e-18 92.25888
## [25,] 0.2225562 8.803887e-18 92.25888
## [26,] 0.2225562 8.803887e-18 92.25888
## [27,] 0.2225562 8.803887e-18 92.25888
## [28,] 0.2225562 8.803887e-18 92.25888
## [29,] 0.2225562 8.803887e-18 92.25888
## [30,] 0.2225562 8.803887e-18 92.25888
stat=NULL
time=proc.time()
phylo1 = s$newick.extant
for(i in 1:30){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
print(proc.time()-time)
## user system elapsed
## 88.312 0.016 88.491
qplot(1:30,stat)
Complete simulations with method 1 (with the fasted way (less accuracy))
The algorithm corresponds to
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 30 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim)
for(j in 1:n_sim){
s <- sim_phyl(seed=j)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = T, printpar = F, n_it=n_it)
stat = vector(mode='numeric',length = n_it)
phylo1 = s$newick.extant
for(i in 1:30){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
Pars[j,] = EM$pars[stat==min(stat),]
times[j] = sum(EM$times)
qplot(1:30,stat)
}
print(proc.time()-time)
## user system elapsed
## 11382.232 0.928 11386.495
summary(Pars)
## V1 V2 V3
## Min. :0.3959 Min. :0.01203 Min. :33.59
## 1st Qu.:0.5884 1st Qu.:0.05669 1st Qu.:39.58
## Median :0.6981 Median :0.09055 Median :42.65
## Mean :0.6990 Mean :0.10257 Mean :43.93
## 3rd Qu.:0.7669 3rd Qu.:0.11388 3rd Qu.:46.25
## Max. :1.4188 Max. :0.60123 Max. :83.32
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(time)
## user system elapsed
## 2.496 0.068 2.544
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 60 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim)
for(j in 1:n_sim){
s <- sim_phyl(seed=j)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
stat = vector(mode='numeric',length = n_it)
phylo1 = s$newick.extant
for(i in 1:n_it){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
Pars[j,] = EM$pars[stat==min(stat),]
times[j] = sum(EM$times)
print(qplot(1:n_it,stat))
}
print(proc.time()-time)
## user system elapsed
## 19525.400 1.092 19525.954
summary(Pars)
## V1 V2 V3
## Min. :0.4762 Min. :0.02977 Min. :33.49
## 1st Qu.:0.6773 1st Qu.:0.07093 1st Qu.:39.26
## Median :0.7659 Median :0.09063 Median :41.46
## Mean :0.7830 Mean :0.10130 Mean :41.85
## 3rd Qu.:0.8831 3rd Qu.:0.11238 3rd Qu.:44.00
## Max. :1.2563 Max. :0.59881 Max. :66.73
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(time)
## user system elapsed
## 2.540 0.064 2.584
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 60 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim)
EMs = vector(mode='list',length = n_sim)
LTTs = vector(mode='list',length = n_sim)
for(j in 1:n_sim){
s <- sim_phyl(seed=j,mu0 = 0.2)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
stat = vector(mode='numeric',length = n_it)
phylo1 = s$newick.extant
for(i in 1:n_it){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
EMs[[j]] = EM$pars
LTTs[[j]] = stat
Pars[j,] = EM$pars[stat==min(stat),]
times[j] = sum(EM$times)
# print(qplot(1:n_it,stat))
}
print(proc.time()-time)
## user system elapsed
## 23605.01 0.22 23616.50
summary(Pars)
## V1 V2 V3
## Min. :0.4647 Min. :0.05325 Min. :25.39
## 1st Qu.:0.6499 1st Qu.:0.11239 1st Qu.:35.53
## Median :0.7395 Median :0.13715 Median :39.34
## Mean :0.7631 Mean :0.14133 Mean :39.18
## 3rd Qu.:0.8427 3rd Qu.:0.16089 3rd Qu.:42.54
## Max. :1.2313 Max. :0.40700 Max. :52.53
qplot(Pars[,1], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,2], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
qplot(Pars[,3], geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
summary(time)
## user system elapsed
## 2.588 0.076 2.647
qplot(times, geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
for(j in 1:n_sim){
EM=EMs[[j]]
df = data.frame(n=1:n_it, EM=EM[,2],ltt=LTTs[[j]])
p = ggplot(data=df,aes(x=n,y=EM,color=ltt))+geom_point()
print(paste('seed',j))
print(p)
}
## [1] "seed 1"
## [1] "seed 2"
## [1] "seed 3"
## [1] "seed 4"
## [1] "seed 5"
## [1] "seed 6"
## [1] "seed 7"
## [1] "seed 8"
## [1] "seed 9"
## [1] "seed 10"
## [1] "seed 11"
## [1] "seed 12"
## [1] "seed 13"
## [1] "seed 14"
## [1] "seed 15"
## [1] "seed 16"
## [1] "seed 17"
## [1] "seed 18"
## [1] "seed 19"
## [1] "seed 20"
## [1] "seed 21"
## [1] "seed 22"
## [1] "seed 23"
## [1] "seed 24"
## [1] "seed 25"
## [1] "seed 26"
## [1] "seed 27"
## [1] "seed 28"
## [1] "seed 29"
## [1] "seed 30"
## [1] "seed 31"
## [1] "seed 32"
## [1] "seed 33"
## [1] "seed 34"
## [1] "seed 35"
## [1] "seed 36"
## [1] "seed 37"
## [1] "seed 38"
## [1] "seed 39"
## [1] "seed 40"
## [1] "seed 41"
## [1] "seed 42"
## [1] "seed 43"
## [1] "seed 44"
## [1] "seed 45"
## [1] "seed 46"
## [1] "seed 47"
## [1] "seed 48"
## [1] "seed 49"
## [1] "seed 50"
## [1] "seed 51"
## [1] "seed 52"
## [1] "seed 53"
## [1] "seed 54"
## [1] "seed 55"
## [1] "seed 56"
## [1] "seed 57"
## [1] "seed 58"
## [1] "seed 59"
## [1] "seed 60"
## [1] "seed 61"
## [1] "seed 62"
## [1] "seed 63"
## [1] "seed 64"
## [1] "seed 65"
## [1] "seed 66"
## [1] "seed 67"
## [1] "seed 68"
## [1] "seed 69"
## [1] "seed 70"
## [1] "seed 71"
## [1] "seed 72"
## [1] "seed 73"
## [1] "seed 74"
## [1] "seed 75"
## [1] "seed 76"
## [1] "seed 77"
## [1] "seed 78"
## [1] "seed 79"
## [1] "seed 80"
## [1] "seed 81"
## [1] "seed 82"
## [1] "seed 83"
## [1] "seed 84"
## [1] "seed 85"
## [1] "seed 86"
## [1] "seed 87"
## [1] "seed 88"
## [1] "seed 89"
## [1] "seed 90"
## [1] "seed 91"
## [1] "seed 92"
## [1] "seed 93"
## [1] "seed 94"
## [1] "seed 95"
## [1] "seed 96"
## [1] "seed 97"
## [1] "seed 98"
## [1] "seed 99"
## [1] "seed 100"
time = proc.time()
n_sim = 100 #number of simulated trees
n_it = 80 # number of EM iterations
Pars = matrix(nrow=n_sim,ncol=3)
times = vector(mode='numeric',length = n_sim)
EMs = vector(mode='list',length = n_sim)
LTTs = vector(mode='list',length = n_sim)
for(j in 1:n_sim){
s <- sim_phyl(seed=j,mu0 = 0.4)
s2 = s$newick.extant.p
EM = EM_phylo(wt=s2$wt, init_par = c(1.8,0.6,80), n_trees = 10, parallel = F, impsam = F, printpar = F, n_it=n_it)
stat = vector(mode='numeric',length = n_it)
phylo1 = s$newick.extant
for(i in 1:n_it){
expe = expectedLTT2(pars=EM$pars[i,])
wt = c(expe$bt[1],diff(expe$bt))
p = list(wt=wt,E=rep(1,length(expe$bt)),n=expe$Ex)
phylo2 = p2phylo(p)
ltt = ltt_stat(phylo1,phylo2)
stat[i] = ltt
}
EMs[[j]] = EM$pars
LTTs[[j]] = stat
Pars[j,] = EM$pars[stat==min(stat),]
times[j] = sum(EM$times)
# print(qplot(1:n_it,stat))
}
print(proc.time()-time)
## user system elapsed
## 33702.216 0.304 33701.852
summary(Pars)
## V1 V2 V3
## Min. :0.2613 Min. :0.07469 Min. :1.700e+01
## 1st Qu.:0.6259 1st Qu.:0.16789 1st Qu.:3.200e+01
## Median :0.8254 Median :0.23070 Median :3.700e+01
## Mean :0.7996 Mean :0.26859 Mean :2.617e+10
## 3rd Qu.:0.9467 3rd Qu.:0.29555 3rd Qu.:4.300e+01
## Max. :1.2776 Max. :0.72551 Max. :2.617e+12
summary(times)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.53 27.92 32.03 32.04 36.20 46.89
Simulations March 2017
Given a reconstructed phylogenetic tree
phylo <- sim_phyl(seed=3)$newick.extant
The new proposed method corresponds to the following steps
pars = c(2,80)
mu=0.2
# this does not work at all:
#pars = subplex(par = mu, fn = ltt_mu, phylo=phylo, prior_pars = pars)$par
#let´s visualize the ltt_mu curve
pp = proc.time()
mu = seq(0.01, 0.8, by=0.03)
stat=NULL
for(i in 1:length(mu)){
stat[i] = ltt_mu(mu = mu[i], phylo = phylo, prior_pars = pars)
}
## [1] "DIFFERENT LENGHT IN PHYLOGENIES!"
print(pp - proc.time())
## user system elapsed
## -392.140 -0.136 -392.470
and
qplot(mu,stat)
nmu = mean(mu[stat < quantile(stat,0.05)])
nmu
## [1] 0.325
HERE SHOULD BE a 2-parameter version of the EM
Once we have the new \(\lambda\) and \(K\) we go to step 2 to re-calculate \(\mu\)
We repeat this algorithm until convergence.