## vector with package names
x <- c("pbapply", "parallel", "coda", "DT", "stringr", "rwty", "viridis", "wesanderson", "tidyr", "kableExtra")
aa <- lapply(x, function(y) {
# check if installed, if not then install
if (!y %in% installed.packages()[,"Package"])
install.packages(y)
# load package
try(require(y, character.only = T), silent = T)
})
knitr::opts_knit$set(root.dir = normalizePath(".."))
# knitr::opts_chunk$set(dpi = 58, fig.width = 12, echo = TRUE)
# options(knitr.kable.NA = '')
# function to extract diagnostic stats
diag_stats <- function(x, thin = 1, age.extant = TRUE) {
mods <- grep(x, model_runs, value = TRUE)
out <- lapply(mods, function(y){
temp <- read.table(file.path("output/revbayes", y), header = TRUE)
chain <- mcmc(data = temp , start = 1, end = length(temp[,1]), thin = thin)
return(chain)
})
# make it a mcmc list object
mh_list <- mcmc.list(out)
#minimum effective sample size from combined traces
min_ess <- min(effectiveSize(mh_list)[-1])
#autocorrelations between draws in run 1
autocorr_post <- diag(autocorr(mh_list[[1]])[2, , ])["Posterior"]
autocorr_age_extant <- diag(autocorr(mh_list[[1]])[2, , ])["age_extant"]
autocorr_origin_time <- diag(autocorr(mh_list[[1]])[2, , ])["origin_time"]
autocorr_diversification <- diag(autocorr(mh_list[[1]])[2, , ])["diversification"]
autocorr_max <- max(abs(diag(autocorr(mh_list[[1]])[2, , ])[-1]))
autocorr_bad <- paste0(names(which(diag(autocorr(mh_list[[1]])[2, , ])[-1] > 0.1)),
collapse= ",")
# Gelman- Rubin diagnostic of convergence between runs
gr <- try(gelman.diag(mh_list,confidence = 0.95, transform=TRUE,
autoburnin=FALSE, multivariate = TRUE), silent = TRUE)
# catch error
if (!is(gr, "try-error")){
gr_post <- gr$psrf["Posterior", 1]
gr_age_extant <- if(age.extant) gr$psrf["age_extant", 1] else NA
gr_origin_time <- gr$psrf["origin_time",1]
gr_diversification <- gr$psrf["diversification",1]
gr_max <- max(gr$psrf[-1,1])
gr_bad <- paste0(names(which(gr$psrf[-1,1] > 1.05)), collapse = ",")
gr_mpsrf <- gr$mpsrf
} else {
gr_post <- NA
gr_age_extant <- NA
gr_origin_time <- NA
gr_diversification <- NA
gr_max <- NA
gr_bad <- NA
gr_mpsrf <- NA
}
# date it was run
updated <- substr(x = file.info(file.path("output/revbayes", mods[1]))$mtime, 0 , 10)
# put results in a data frame
out_df <- data.frame(model = x, min_ess, autocorr_post, autocorr_age_extant, autocorr_origin_time, autocorr_diversification, autocorr_max, autocorr_bad, gr_post, gr_age_extant, gr_origin_time, gr_diversification, gr_max, gr_bad, gr_mpsrf, updated)
}
# function for MCMCglmm diagnostics
# plot diagonostic stuff for mcmcglmmm models
# X == mcmc.list
plot_traces <- function(X, clms = 4, cex = 1, cols = adjustcolor(c("yellow","blue"), alpha.f = 0.6)) {
# reset par when done
opar <- par()
on.exit(par(opar))
# trace and autocorrelation
if (clms > 1) par(mfrow = c(1, clms))
for(y in colnames(X[[1]])){
Z <- lapply(X, function(x) x[, y, drop = FALSE])
traceplot(Z, col = cols, cex.lab=cex, cex.axis=cex, main = "")
title(y, cex.main = cex + 0.5)
if (grepl("age_extant", y))
autocorr.plot2(Z[[1]], col = "gray", lwd = 4, ask = FALSE, auto.layout = FALSE, cex.axis = cex, cex.lab = cex, cex = cex + 0.5, main = y)
}
}
# modified internal from coda
autocorr.plot2 <- function(x, lag.max, auto.layout = TRUE, ask, main, cex = 1, ...) {
if (missing(ask)) {
ask <- if (is.R()) {
dev.interactive()
}
else {
interactive()
}
}
oldpar <- NULL
on.exit(par(oldpar))
if (auto.layout)
oldpar <- par(mfrow = set.mfrow(Nchains = nchain(x),
Nparms = nvar(x)))
if (!is.mcmc.list(x))
x <- mcmc.list(as.mcmc(x))
for (i in 1:nchain(x)) {
xacf <- if (missing(lag.max))
acf(as.ts.mcmc2(x[[i]]), plot = FALSE)
else acf(as.ts.mcmc2(x[[i]]), lag.max = lag.max, plot = FALSE)
for (j in 1:nvar(x)) {
plot(xacf$lag[, j, j], xacf$acf[, j, j], type = "h",
ylab = "Autocorrelation", xlab = "Lag", ylim = c(-1,
1), ...)
title(main, cex.main = cex)
if (i == 1 && j == 1)
oldpar <- c(oldpar, par(ask = ask))
}
}
invisible(x)
}
# modified internal from coda
as.ts.mcmc2 <- function (x, ...)
{
x <- as.mcmc(x)
y <- ts(x, start = start(x), end = end(x), deltat = thin(x))
attr(y, "mcpar") <- NULL
return(y)
}
## RUN TO UPDATE OUTPUT ###
# get model run log file names
model_runs <- list.files("output/most_recent_revbayes_models", pattern = "run_([[:digit:]]+)\\.log$")
# get base model name (removing run_#)
base_models <- sapply(strsplit(model_runs, "_run", fixed = TRUE), "[", 1)
# extract diagnostic stats for each model
# wrapped in try() to catch errors
runs <- pblapply(unique(base_models), cl = parallel::detectCores() - 1, function(x) try(diag_stats(x, thin = if (substr(x, 0, 3) %in% c("CCE", "SUR")) 2 else 1, age.extant = if (grepl("no.fossils", x)) FALSE else TRUE), silent = TRUE))
# remove errors
runs <- runs[sapply(runs, is.data.frame)]
# put in a single data frame
model_diags <- do.call(rbind, runs)
# save
write.csv(model_diags, "output/diagnostics_revbayes_output.csv", row.names = FALSE)
rb.trees <- load.multi("~/Dropbox/Projects/lbh_cultural_evolution/output/most_recent_revbayes_models/", format = "revbayes")
saveRDS(rb.trees, "./output/revbayes_output_in_single_R_object.RDS")
model_diags <- read.csv("output/diagnostics_revbayes_output.csv", stringsAsFactors = FALSE)
# get lek name
model_diags$lek <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 1)
# substitution model
model_diags$subs <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 2)
# period
model_diags$period <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 3)
# and which fossils were used
model_diags$fossils <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 4)
model_diags$align <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 5)
# order columns
model_diags <- model_diags[, c(c("model", "lek", "subs", "period", "fossils", "align", "autocorr_bad", "gr_bad") , names(which(sapply(model_diags, is.numeric))), "updated")]
# round
model_diags[, sapply(model_diags, is.numeric)] <- round(model_diags[, sapply(model_diags, is.numeric)], 4)
#count "bad" branch rates per row
brc <- paste0("branch_rates_count:",(str_count(pattern = "branch_rates", model_diags$autocorr_bad)))
model_diags$autocorr_bad <- gsub("branch_rates\\.([[:digit:]]+)\\.|\\,branch_rates\\.([[:digit:]]+)\\.","", model_diags$autocorr_bad)
model_diags$autocorr_bad <- paste(model_diags$autocorr_bad, brc)
brc2 <- paste0("branch_rates_count:",(str_count(pattern = "branch_rates", model_diags$gr_bad)))
model_diags$gr_bad <- gsub("branch_rates\\.([[:digit:]]+)\\.|\\,branch_rates\\.([[:digit:]]+)\\.","", model_diags$gr_bad)
model_diags$gr_bad <- paste(model_diags$gr_bad, brc)
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")
names(rb.trees) <- gsub("\\.trees", "", names(rb.trees))
model_diags$iterations <- sapply(model_diags$model, function(x) max(rb.trees[[x]]$ptable$Iteration))
# Number of models by iterations and lek
kbl <- knitr::kable(as.matrix(table(model_diags$iterations, model_diags$lek)), caption = "Number of models by iterations and lek")
kableExtra::kable_styling(kbl)
BR1 | CCE | HC1 | SUR | TR1 | |
---|---|---|---|---|---|
3001 | 18 | 0 | 24 | 6 | 18 |
6001 | 0 | 24 | 0 | 24 | 0 |
=======
The table below can be read like this: read.csv("https://raw.githubusercontent.com/maRce10/lbh_cultural_evolution/master/output/diagnostics_revbayes_output.csv")
model_diags <- read.csv("output/diagnostics_revbayes_output.csv", stringsAsFactors = FALSE)
# get lek name
model_diags$lek <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 1)
# substitution model
model_diags$subs <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 2)
# period
model_diags$period <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 3)
# and which fossils were used
model_diags$fossils <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 4)
model_diags$align <- sapply(strsplit(model_diags$model, "_", fixed = TRUE), "[", 5)
# order columns
model_diags <- model_diags[, c(c("model", "lek", "subs", "period", "fossils", "align", "autocorr_bad", "gr_bad") , names(which(sapply(model_diags, is.numeric))), "updated")]
# round
model_diags[, sapply(model_diags, is.numeric)] <- round(model_diags[, sapply(model_diags, is.numeric)], 4)
#count "bad" branch rates per row
brc <- paste0("branch_rates_count:",(str_count(pattern = "branch_rates", model_diags$autocorr_bad)))
model_diags$autocorr_bad <- gsub("branch_rates\\.([[:digit:]]+)\\.|\\,branch_rates\\.([[:digit:]]+)\\.","", model_diags$autocorr_bad)
model_diags$autocorr_bad <- paste(model_diags$autocorr_bad, brc)
brc2 <- paste0("branch_rates_count:",(str_count(pattern = "branch_rates", model_diags$gr_bad)))
model_diags$gr_bad <- gsub("branch_rates\\.([[:digit:]]+)\\.|\\,branch_rates\\.([[:digit:]]+)\\.","", model_diags$gr_bad)
model_diags$gr_bad <- paste(model_diags$gr_bad, brc)
rb.trees <- readRDS("./output/revbayes_output_in_single_R_object.RDS")
names(rb.trees) <- gsub("\\.trees", "", names(rb.trees))
model_diags$iterations <- sapply(model_diags$model, function(x) max(rb.trees[[x]]$ptable$Iteration))
# Number of models by iterations and lek
kbl <- knitr::kable(as.matrix(table(model_diags$iterations, model_diags$lek)), caption = "Number of models by iterations and lek")
kableExtra::kable_styling(kbl)
BR1 | CCE | HC1 | SUR | TR1 | |
---|---|---|---|---|---|
3001 | 18 | 0 | 24 | 6 | 18 |
6001 | 0 | 24 | 0 | 24 | 0 |
# model_diags$model[model_diags$lek == "CCE" & model_diags$iterations == 3001]
# add link to plots
plts <- list.files(path = "./output/MCMC_diagnostic_plots", pattern = "\\.jpg$")
jpgs <- lapply(model_diags$model, function(x)
grep(paste(gsub("\\_([[:digit:]]+)", "", x), collapse = "|"), plts, value = TRUE))
model_diags$diag_plots <- sapply(model_diags$model, function(x)
paste(paste0('<a href=\"https://raw.githubusercontent.com/maRce10/lbh_cultural_evolution/master/output/MCMC_diagnostic_plots/', grep(paste(paste0(gsub("\\_([[:digit:]]+)", "", x), "_0"), collapse = "|"), plts, value = TRUE),'">', grep(paste(paste0(gsub("\\_([[:digit:]]+)", "", x), "_0"), collapse = "|"), plts, value = TRUE), '</a>'), collapse = " ")
)
# print dynamic table
datatable(model_diags, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
Record.labs <- c("Recent records only", "Including oldest records")
names(Record.labs) <- c("new", "old")
model_diags$period[model_diags$period == "no.fossils"] <- "new"
# order levels
model_diags$align <- factor(model_diags$align, levels = c("all.equal", "optimal", "prank"))
model_diags$lek <- factor(model_diags$lek, levels = c("BR1", "TR1", "CCE", "HC1", "SUR"))
model_diags$fossils[model_diags$fossils == "full.process"] <- "no.fossils"
# parameters to plot
params <- c("gr_post", "gr_age_extant", "gr_origin_time", "gr_diversification", "gr_max", "gr_mpsrf")
# create graphs
ggs <- lapply(params, function(x){
dat <- model_diags[!is.na(model_diags[,x]), ]
if (x == "gr_age_extant") dat <- dat[grep("no.fossils", dat$model, invert = TRUE), ]
dat$signif <- ifelse(dat[,x] <= 1.1, "good", "bad")
dat$signif <- factor(dat$signif, levels = c("good", "bad"))
# dat$alpha <- ifelse(dat[,x] > 1.1, 1, 0.5)
ggplot(data = dat, aes(
x = fossils,
y = get(x),
colour = subs,
fill = subs,
shape = signif,
alpha = signif,
)) +
geom_jitter(size = 3,
# alpha = 0.7,
width = 0.12) +
facet_grid(period ~ lek,
scales = "fixed",
labeller = labeller(period = Record.labs)) +
theme_light(base_size = 16) +
labs(y = x, shape = "GR", colour = "Alignment", x = "Historical records per song") +
scale_shape_manual(values = c(20, 21), labels = c("< 1.1", "> 1.1")) +
scale_alpha_manual(values = c(0.5, 1), guide = FALSE) +
scale_colour_manual(
values = viridis(3),
labels = c("MAFFT-agnostic", "MAFFT-optimal", "PRANK")
) +
scale_fill_manual(
values = viridis(3),
guide = FALSE) +
scale_x_discrete(labels = c("All", "Earliest", "No fossils")) +
geom_hline(yintercept = 1.1, lty = 2, col = "gray42") +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
})
names(ggs) <- unique(params)
ggs
## $gr_post
##
## $gr_age_extant
##
## $gr_origin_time
##
## $gr_diversification
##
## $gr_max
##
## $gr_mpsrf
## RUN TO UPDATE OUTPUT ###
# these plots will be linked to in the table of this Rmarkdown's output
# get model run log file names
model_runs <- list.files("output/most_recent_revbayes_models", pattern = "run_([[:digit:]]+)\\.log$")
# get base model name (removing run_#)
base_models <- sapply(strsplit(model_runs, "_run", fixed = TRUE), "[", 1)
# loop to make plots
out <- pblapply(base_models, function(x){
# info on base model
# print(gsub("_([[:digit:]]+)", "", x))
# extract run names
mods <- grep(x, model_runs, value = TRUE)
# loop over each pair of runs
out <- lapply(mods, function(z){
temp <- read.table(file.path("output/most_recent_revbayes_models", z), header = TRUE)
temp <- temp[,c("Posterior", "Likelihood", "Prior", "age_extant", "origin_time", "num_samp_anc", "speciation_rate", "extinction_rate", "diversification", "turnover", "psi", "er.1.", "er.2.", "er.3.", "er.4.", "er.5.", "er.6.", "er.7.", "er.8.", "er.9.", "er.10.", "er.11.","er.12.", "er.13.", "er.14.", "er.15.", "sf.1.", "sf.2.", "sf.3.", "sf.4.", "sf.5.", "sf.6.","alpha_song", "rates_song.1.", "rates_song.2.", "rates_song.3.", "rates_song.4.", grep("branch_rates", colnames(temp), value = TRUE))]
chain <- mcmc(data = temp, start = 1, end = length(temp[,1]), thin = if (substr(x, 0, 3) %in% c("CCE", "SUR")) 2 else 1)
return(chain)
}
)
# create if it doesn't exist
if (!file.exists(file.path("./output/MCMC_diagnostic_plots", paste0(gsub("_([[:digit:]]+)", "", x), "_01.jpg")))){
# plot
jpeg(filename = file.path("./output/MCMC_diagnostic_plots", paste0(gsub("_([[:digit:]]+)", "", x), "_%02d.jpg")), width = 1500, height = 1200, pointsize = 6)
ncol <- ncol(out[[1]])
par(mfrow = c(10, 10))
out2 <- try(plot_traces(X = out, clms = 1, cex = 2.5), silent = TRUE)
# close device
dev.off()
}
}
)
p3_output_files <- list.files(path = "./output/revbayes/P3_Results", recursive = TRUE, pattern = "effectsizes", full.names = TRUE)
p3_results_l <- lapply(p3_output_files, function(x){
Y <- read.csv(file.path(getwd(), x))
Y$model <- sapply(strsplit(x, "/", fixed = TRUE), "[", 5)
return(Y)
})
p3_results <- do.call(rbind, p3_results_l)
p3_results$Lek <- sapply(p3_results$model, function(x)
strsplit(x, "_", fixed = TRUE, "[[", 1)[[1]][2], USE.NAMES = FALSE)
p3_results$Lek <- factor(p3_results$Lek, levels = c("BR1", "TR1", "CCE", "HC1", "SUR"))
p3_results$Align <- sapply(p3_results$model, function(x)
strsplit(x, "_", fixed = TRUE, "[[", 1)[[1]][3], USE.NAMES = FALSE)
p3_results$Align <- factor(p3_results$Align, levels = c("all.equal", "optimal", "prank"))
p3_results$Fossils <- sapply(p3_results$model, function(x)
strsplit(x, "_", fixed = TRUE, "[[", 1)[[1]][5], USE.NAMES = FALSE)
p3_results$Fossils[p3_results$Fossils == "full.process"] <- "no.fossils"
p3_results$Fossils <- factor(p3_results$Fossils, levels = c("all", "early", "no.fossils"))
p3_results$Record <- sapply(p3_results$model, function(x)
strsplit(x, "_", fixed = TRUE, "[[", 1)[[1]][4], USE.NAMES = FALSE)
p3_results$Record[p3_results$Record == "no.fossils"] <- "new"
Record.labs <- c("Recent records only", "Including oldest records")
names(Record.labs) <- c("new", "old")
ggs <- lapply(unique(p3_results$Statistic), function(x){
ggplot(data = p3_results[p3_results$Statistic == x, ], aes(
x = Fossils,
y = Effect.Size,
colour = Align,
)) +
geom_jitter(size = 3,
alpha = 0.7,
width = 0.12) +
facet_grid(Record ~ Lek,
scales = "fixed",
labeller = labeller(Record = Record.labs)) +
theme_light(base_size = 16) +
labs(y = "Effect size", colour = "Alignment", x = "Historical records per song") +
scale_shape_discrete(guide = FALSE) +
scale_colour_manual(
values = viridis(4, alpha = 0.6),
labels = c("MAFFT-agnostic", "MAFFT-optimal", "PRANK")
) +
scale_x_discrete(labels = c("All", "Earliest", "No fossils")) +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
})
names(ggs) <- unique(p3_results$Statistic)
ggs
## $`Number Invariant Sites`
##
## $`Number Invariant Sites Excluding Ambiguous`
##
## $`Max Invariant Block Length`
##
## $`Max Invariant Block Length Excluding Ambiguous`
##
## $`Max Pairwise Difference`
##
## $`Max Pairwise Difference Excluding Ambiguous`
##
## $`Max Variable Block Length`
##
## $`Max Variable Block Length Excluding Ambiguous`
##
## $`Min Pairwise Difference`
##
## $`Min Pairwise Difference Excluding Ambiguous`
##
## $`Number Invariable Block`
##
## $`Number Invariable Block Excluding Ambiguous`
##
## $`Segregating-Sites`
##
## $Theta
##
## $`Tajima-D`
##
## $`Tajima-Pi`
p3_results$signif <- ifelse(p3_results$Two.tailed < 0.05, " < 0.05", " >= 0.05")
p3_results$signif <- factor(p3_results$signif, levels = c(" >= 0.05", " < 0.05"))
p3_results$alpha <- ifelse(p3_results$Two.tailed < 0.05, 1, 0.95)
ggs2 <- lapply(unique(p3_results$Statistic), function(x){
ggplot(data = p3_results[p3_results$Statistic == x, ], aes(
x = Fossils,
y = Two.tailed,
colour = Align,
fill = Align,
shape = signif,
alpha = signif,
)) +
geom_jitter(size = 5,
# alpha = 0.7,
width = 0.12) +
facet_grid(Record ~ Lek,
scales = "fixed",
labeller = labeller(Record = Record.labs)) +
theme_light(base_size = 16) +
labs(y = "pvalue", colour = "Alignment", x = "Historical records per song", shape = "P value") +
# scale_shape_discrete(guide = FALSE) +
scale_shape_manual(values = c(20, 21)) +
scale_alpha_manual(values = c(0.44, 1), guide = FALSE) +
scale_colour_manual(
values = viridis(4),
labels = c("MAFFT-agnostic", "MAFFT-optimal", "PRANK")
) +
scale_fill_manual(
values = viridis(4), guide = FALSE,
labels = c("MAFFT-agnostic", "MAFFT-optimal", "PRANK")
) +
scale_x_discrete(labels = c("All", "Earliest", "No fossils")) +
geom_hline(yintercept = 0.05, lty = 2, col = "gray42") +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
})
names(ggs2) <- unique(p3_results$Statistic)
ggs2
## $`Number Invariant Sites`
##
## $`Number Invariant Sites Excluding Ambiguous`
##
## $`Max Invariant Block Length`
##
## $`Max Invariant Block Length Excluding Ambiguous`
##
## $`Max Pairwise Difference`
##
## $`Max Pairwise Difference Excluding Ambiguous`
##
## $`Max Variable Block Length`
##
## $`Max Variable Block Length Excluding Ambiguous`
##
## $`Min Pairwise Difference`
##
## $`Min Pairwise Difference Excluding Ambiguous`
##
## $`Number Invariable Block`
##
## $`Number Invariable Block Excluding Ambiguous`
##
## $`Segregating-Sites`
##
## $Theta
##
## $`Tajima-D`
##
## $`Tajima-Pi`
# Read model names
m <- read.table(file = "./output/revbayes/stdout/dataset.txt", header = F)
# Read two replicates of ML estimates under the stepping stone algorithm
ss.r1 <- read.table(file ="./output/revbayes/stdout/SteppingStone.txt", header = F, sep = ",")
ss.r2 <- read.table(file = "./output/revbayes/stdout/SteppingStoneR2.txt", header = F, sep = ",")
ss.r3 <- read.table(file = "./output/revbayes/stdout/SteppingStoneR3.txt", header = F, sep = ",")
# missing models to NA and failed estimations to "Repeat"
n = 3 # currently two replicates
# make df
m.ml <- m
for (i in 1:n) {
m.ml <- cbind(m.ml, eval(parse(text = paste0("ss.r", i))))
}
colnames(m.ml) <- c("Model", paste0("ss",c(1:n)))
for(i in 1:n) {
levels( m.ml[, i + 1]) <- c(levels( m.ml[, i + 1]),"Repeat")
for (j in 1:nrow(m.ml)) {
if (grepl(pattern = "This typically refers",
x = as.character(m.ml[j, i + 1]),
fixed = TRUE)) {
m.ml[j, i + 1] <- "Repeat"
}
if (grepl(pattern = "Problem processing",
x = m.ml[j, i + 1],
fixed = TRUE)) {
m.ml[j, i + 1] <- "Repeat"
}
if (grepl(pattern = "Could not open",
x = m.ml[j, i + 1],
fixed = TRUE)) {
m.ml[j, i + 1] <- NA
}
}
# m.ml[,i+1] <- droplevels(m.ml[,i+1])
}
# remove CCE that I accidentally repeated
#m.ml <- m.ml[-(49:60),]
m.rep<- read.table(file = "./output/revbayes/stdout/datasetRepeats.txt", header = F)
ss.rep <- read.table(file = "./output/revbayes/stdout/SteppingStoneRepeat.txt", header = F, sep = ",")
m.ml.rep <-cbind(m.rep, ss.rep)
colnames(m.ml.rep) <- c("Model", "ss")
# match name format
m.ml.rep$Model <- gsub(pattern = "_", replacement = ".", x = m.ml.rep$Model)
m.ml.rep$Model <- gsub(pattern = ".global", replacement = "._global", x = m.ml.rep$Model)
m.ml.rep$Model <- gsub(pattern = ".Uexp", replacement = "._Uexp", x = m.ml.rep$Model)
#Replace repeat for ML value
m.ml$ss1 <- as.character(m.ml$ss1)
m.ml$ss2 <- as.character(m.ml$ss2)
m.ml$ss3 <- as.character(m.ml$ss3)
for (i in 1:nrow(m.ml)) {
for (j in 1:nrow(m.ml.rep)) {
if (m.ml$Model[i] == m.ml.rep$Model[j] & m.ml$ss1[i] == "Repeat") {
m.ml$ss1[i] <- m.ml.rep$ss[j]
}
if (m.ml$Model[i] == m.ml.rep$Model[j] & m.ml$ss2[i] == "Repeat") {
m.ml$ss2[i] <- m.ml.rep$ss[j]
}
if (m.ml$Model[i] == m.ml.rep$Model[j] & m.ml$ss3[i] == "Repeat") {
m.ml$ss3[i] <- m.ml.rep$ss[j]
}
}
}
#m.ml$ss1 <- as.numeric(m.ml$ss1)
#m.ml$ss2 <- as.numeric(m.ml$ss2)
# remove "." from alignment name
m.ml$Model <- gsub(pattern = "all.equal", replacement = "allequal", x = m.ml$Model )
# split name into columns
m.ml <- m.ml %>% separate(Model, c("Lek", "Align", "Record", "Fossils", "Clock"))
# long to wide and rearrange
m.ml <-
pivot_wider(
data = m.ml,
names_from = Clock,
values_from = paste0("ss", c(1:n))
)
m.ml <-
m.ml[, c(
"Lek",
"Align",
"Record",
"Fossils",
paste0("ss", c(1:n), "_global"),
paste0("ss", c(1:n), "_Uexp")
)]
# m.ml %>%
# kbl() %>%
# kable_minimal()
# print dynamic table
datatable(m.ml, editable = list(
target = 'row'
), rownames = FALSE, style = "bootstrap", filter = 'top', options = list(
pageLength = 100, autoWidth = TRUE, dom = 'ft'
), autoHideNavigation = TRUE, escape = FALSE)
# while missing data make numeric
m.ml$ss1_diff <- as.numeric(m.ml$ss1_Uexp) - as.numeric(m.ml$ss1_global)
m.ml$ss2_diff <- as.numeric(m.ml$ss2_Uexp) - as.numeric(m.ml$ss2_global)
m.ml$ss3_diff <- as.numeric(m.ml$ss3_Uexp) - as.numeric(m.ml$ss3_global)
# make long data frame
ml.long <- m.ml[,c(1:4,11:13)]
ml.long <-
pivot_longer(data = ml.long,
cols = paste0("ss", c(1:n), "_diff"),
names_to = "run")
ml.long$Lek <- factor(ml.long$Lek, levels=c('BR1','TR1','CCE','HC1', 'SUR'))
# new facet label names for fossil record
Record.labs <- c("Recent records only", "Including oldest records")
names(Record.labs) <- c("new", "old")
# plot
ML.plot <-
ggplot(data = ml.long, aes(
x = Fossils,
y = value,
colour = Align,
shape = Record
)) +
geom_jitter(size = 3,
alpha = 0.7,
width = 0.2) +
facet_grid(Record ~ Lek,
scales = "fixed",
labeller = labeller(Record = Record.labs)) +
theme_light(base_size = 24) +
labs(y = "Differencce in marginal likelihood \nbetween clock models (relaxed - fixed)", colour = "Alignment", x = "Historical records per song") +
scale_shape_discrete(guide = FALSE) +
scale_colour_manual(
values = wes_palette("IsleofDogs1"),
labels = c("MAFFT-agnostic", "MAFFT-optimal", "PRANK")
) +
scale_x_discrete(labels = c("all", "earliest")) +
theme(axis.text.x = element_text(angle = 35, hjust = 1))
ML.plot
R session information
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=pt_PT.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_PT.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_PT.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] kableExtra_1.3.1 tidyr_1.1.2 wesanderson_0.3.6 viridis_0.5.1
## [5] viridisLite_0.3.0 rwty_1.0.2 ggplot2_3.3.2 ape_5.4-1
## [9] stringr_1.4.0 DT_0.16 coda_0.19-4 pbapply_1.4-3
##
## loaded via a namespace (and not attached):
## [1] phangorn_2.5.5 tidyselect_1.1.0 xfun_0.20 purrr_0.3.4
## [5] reshape2_1.4.4 lattice_0.20-41 colorspace_1.4-1 vctrs_0.3.6
## [9] generics_0.1.0 htmltools_0.5.0 yaml_2.2.1 rlang_0.4.10
## [13] pillar_1.4.6 glue_1.4.2 withr_2.3.0 RColorBrewer_1.1-2
## [17] lifecycle_0.2.0 plyr_1.8.6 munsell_0.5.0 gtable_0.3.0
## [21] rvest_0.3.6 htmlwidgets_1.5.2 evaluate_0.14 labeling_0.3
## [25] knitr_1.30 GGally_2.1.0 crosstalk_1.1.0.1 highr_0.8
## [29] Rcpp_1.0.5 scales_1.1.1 jsonlite_1.7.1 webshot_0.5.2
## [33] farver_2.0.3 gridExtra_2.3 fastmatch_1.1-0 digest_0.6.27
## [37] stringi_1.5.3 dplyr_1.0.2 grid_4.0.3 quadprog_1.5-8
## [41] tools_4.0.3 magrittr_2.0.1 tibble_3.0.4 ggdendro_0.1.22
## [45] crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.1 MASS_7.3-53
## [49] Matrix_1.2-18 xml2_1.3.2 httr_1.4.2 rstudioapi_0.11
## [53] rmarkdown_2.4 reshape_0.8.8 R6_2.5.0 igraph_1.2.6
## [57] nlme_3.1-149 compiler_4.0.3