On this report we aim to compare the two main reconstruction methods and set up a standar tool for comparison for other purposes.
To do that we consider the following statistics:
pp=proc.time()
s = sim_phyl(seed=2)
s2 = phylo2p(drop.fossil(s$newick))
n_trees=5000
phylos=vector('list',length=n_trees)
nLTT = vector(length=n_trees)
gam = vector(length=n_trees)
phylos2=vector('list',length=n_trees)
nLTT2 = vector(length=n_trees)
gam2 = vector(length=n_trees)
dt=0.01
for(i in 1:n_trees){
r = rec_tree(wt=s2$t)
r1 = p2phylo(r)
phylos[[i]] = read.tree(text=r1)
nLTT[i] = nLTTstat(s$newick, phylos[[i]], distance_method = "abs")
gam[i] = gammaStat(phylos[[i]])
#
r = rec_tree(wt=s2$t,rec_method=2)
r2 = p2phylo(r)
phylos2[[i]] = read.tree(text=r2)
nLTT2[i] = nLTTstat(s$newick, phylos2[[i]], distance_method = "abs")
gam2[i] = gammaStat(phylos2[[i]])
}
nltt_values <- get_nltt_values(phylos, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
dat = data.frame(nltt=c(nLTT,nLTT2),gamma = c(gam,gam2),method=c(rep('method 1',length(nLTT)),rep('method 2',length(nLTT))))
ggplot(dat, aes(x=nltt, fill=method)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat, aes(x=gamma, fill=method)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(pp-proc.time())
## user system elapsed
## -1609.680 -0.144 -1609.957
nltt_values <- get_nltt_values(phylos2, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
s = sim_phyl(seed=2)
s2 = phylo2p(drop.fossil(s$newick))
n_trees=5000
phylos=vector('list',length=n_trees)
nLTT = vector(length=n_trees)
gam = vector(length=n_trees)
phylos2=vector('list',length=n_trees)
nLTT2 = vector(length=n_trees)
gam2 = vector(length=n_trees)
dt=0.001
for(i in 1:n_trees){
r = rec_tree(wt=s2$t)
r1 = p2phylo(r)
phylos[[i]] = read.tree(text=r1)
nLTT[i] = nLTTstat(s$newick, phylos[[i]], distance_method = "abs")
gam[i] = gammaStat(phylos[[i]])
#
r = rec_tree(wt=s2$t,rec_method=2)
r2 = p2phylo(r)
phylos2[[i]] = read.tree(text=r2)
nLTT2[i] = nLTTstat(s$newick, phylos2[[i]], distance_method = "abs")
gam2[i] = gammaStat(phylos2[[i]])
}
nltt_values <- get_nltt_values(phylos, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
dat = data.frame(x=c(nLTT,nLTT2),method=c(rep('method 1',length(nLTT)),rep('method 2',length(nLTT))))
ggplot(dat, aes(x=x, fill=method)) + geom_histogram(alpha=0.2, position="identity")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
nltt_values <- get_nltt_values(phylos2, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
dat = data.frame(nltt=c(nLTT,nLTT2),gamma = c(gam,gam2),method=c(rep('method 1',length(nLTT)),rep('method 2',length(nLTT))))
ggplot(dat, aes(x=nltt, fill=method)) + geom_histogram(alpha=0.5)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat, aes(x=gamma, fill=method)) + geom_histogram(alpha=0.3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(dat, aes(x=gamma, fill=method)) + geom_histogram(alpha=0.5, position='identity')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
s = sim_phyl(seed=2)
s2 = phylo2p(drop.fossil(s$newick))
n_trees=10000
phylos=vector('list',length=n_trees)
nLTT = vector(length=n_trees)
gam = vector(length=n_trees)
phylos2=vector('list',length=n_trees)
nLTT2 = vector(length=n_trees)
gam2 = vector(length=n_trees)
dt=0.001
for(i in 1:n_trees){
r = rec_tree(wt=s2$t)
r1 = p2phylo(r)
phylos[[i]] = read.tree(text=r1)
nLTT[i] = nLTTstat(s$newick, phylos[[i]], distance_method = "abs")
gam[i] = gammaStat(phylos[[i]])
#
r = rec_tree(wt=s2$t,rec_method=2)
r2 = p2phylo(r)
phylos2[[i]] = read.tree(text=r2)
nLTT2[i] = nLTTstat(s$newick, phylos2[[i]], distance_method = "abs")
gam2[i] = gammaStat(phylos2[[i]])
}
nltt_values <- get_nltt_values(phylos, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
dat = data.frame(x=c(nLTT,nLTT2),method=c(rep('method 1',length(nLTT)),rep('method 2',length(nLTT))))
ggplot(dat, aes(x=x, fill=method)) + geom_histogram(alpha=0.2, position="identity")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
nltt_values <- get_nltt_values(phylos2, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
pp = proc.time()
s = sim_phyl(seed=2)
s2 = phylo2p(drop.fossil(s$newick))
n_trees=1000
phylos=vector('list',length=n_trees)
nLTT = vector(length=n_trees)
gam = vector(length=n_trees)
phylos2=vector('list',length=n_trees)
nLTT2 = vector(length=n_trees)
gam2 = vector(length=n_trees)
dt=0.0001
for(i in 1:n_trees){
r = rec_tree(wt=s2$t)
r1 = p2phylo(r)
phylos[[i]] = read.tree(text=r1)
nLTT[i] = nLTTstat(s$newick, phylos[[i]], distance_method = "abs")
gam[i] = gammaStat(phylos[[i]])
#
r = rec_tree(wt=s2$t,rec_method=2)
r2 = p2phylo(r)
phylos2[[i]] = read.tree(text=r2)
nLTT2[i] = nLTTstat(s$newick, phylos2[[i]], distance_method = "abs")
gam2[i] = gammaStat(phylos2[[i]])
}
nltt_values <- get_nltt_values(phylos, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)
dat = data.frame(x=c(nLTT,nLTT2),method=c(rep('method 1',length(nLTT)),rep('method 2',length(nLTT))))
ggplot(dat, aes(x=x, fill=method)) + geom_histogram(alpha=0.2, position="identity")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
print(proc.time()-pp)
## user system elapsed
## 691.264 4.200 695.495
nltt_values <- get_nltt_values(phylos2, dt = dt)
# Plot the phylognies, where the individual nLTT values are visible
qplot(t, nltt, data = nltt_values, geom = "point",
ylim = c(0,1),
main = "Average nLTT plot of phylogenies", size = I(0.1)
) + stat_summary(
fun.data = "mean_cl_boot", color = "red", geom = "smooth"
)