---
title: Cultural evolution analysis
subtitle: LBH song type prevalence and degradation
author: <a href="https://marce10.github.io/">Marcelo Araya-Salas</a>
date: "`r Sys.Date()`"
toc: true
toc-depth: 2
toc-location: left
number-sections: true
highlight-style: pygments
format:
html:
df-print: kable
code-fold: true
code-tools: true
css: qmd.css
editor_options:
chunk_output_type: console
---
<!-- this code add line numbers to code blocks -->
<!-- only works when code folding is not used in yaml (code_folding: show) -->
```{=html}
<style>
body
{ counter-reset: source-line 0; }
pre.numberSource code
{ counter-reset: none; }
</style>
```
```{r set root directory, echo = FALSE}
knitr::opts_knit$set(root.dir = "..")
```
```{r add link to github repo, echo = FALSE, results='asis'}
# print link to github repo if any
if (file.exists("./.git/config")){
config <- readLines("./.git/config")
url <- grep("url", config, value = TRUE)
url <- gsub("\\turl = |.git$", "", url)
cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
}
```
```{r setup style, echo = FALSE, message = FALSE, warning=FALSE}
# options to customize chunk outputs
knitr::opts_chunk$set(
class.source = "numberLines lineAnchors", # for code line numbers
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE,
warning = FALSE
)
```
<!-- skyblue box -->
::: {.alert .alert-info}
# Purpose
- Depict the effet of cultural evolution on song structure variation in long-billed hermits
:::
# Load packages
```{r load packages, eval = TRUE}
# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code
# install/ load packages
sketchy::load_packages(
packages = c(
"knitr",
"formatR",
"rprojroot",
github = "maRce10/warbleR",
github = "maRce10/baRulho",
"readxl",
"Rraven",
"ggplot2",
"brms",
"viridis",
"corrplot",
"DT",
"ape",
"phangorn",
"ggtree",
"phytools",
"ratematrix",
"geomorph",
"evolvability",
"geiger",
"ggdist",
"gghalves"
)
)
```
# Extract SUR tree
```{r}
trees <- read.tree ("/home/m/Dropbox/Projects/lbh_cultural_evolution_revBayes/output/most_recent_revbayes_models/SUR_prank_old_early_global_219792.trees" )
# mcct <- mcc(x = tre)
tree_sty <- unique (unlist (lapply (trees, "[" , "tip.label" )))
ntips_trees <- sapply (trees, Ntip)
tree <- trees[[which.max (ntips_trees)]]
# #Highlight and label clade
# ggtree(tree, layout = "fan", size = 2) +
# geom_nodelab(col= "red", hjust = 2) +
# geom_tiplab(size=3, offset=0.1)
sur_dat <- read.csv ("./data/raw/allsongsSUR.csv" )
sur_dat <- sur_dat[grep ("marker|measu" , sur_dat$ song.type, invert = TRUE ),]
sur_dat$ song.type.year <- paste (sur_dat$ song.type, sur_dat$ year, sep = "-" )
# unique(sur_dat$song.type.year)
sur_dat <- sur_dat[sur_dat$ song.type.year %in% tree_sty, ]
sty <- unique (sur_dat$ song.type.year)
trees <- lapply (trees, function (x) drop.tip (x, tip = setdiff (x$ tip.label, sty)))
# unique(unlist(lapply(trees, "[", "tip.label")))
ntips_trees <- sapply (trees, Ntip)
tree <- trees[[which.max (ntips_trees)]]
ggtree (tree, layout = "fan" , size = 2 ) +
geom_nodelab (col= "red" , hjust = 2 ) +
geom_tiplab (size= 3 , offset= 0.1 )
ultra_tree <- ape:: chronoMPL (tree)
ggtree (ultra_tree, layout = "fan" , size = 2 , options (ignore.negative.edge= TRUE )) +
geom_nodelab (col= "red" , hjust = 2 ) +
geom_tiplab (size= 3 , offset= 0.1 )
```
# Measure acoustic structure
```{r}
est <- readRDS ("./data/raw/extended_selection_table_all_songtype_high_snr.RDS" )
est$ song.type.year <- paste (est$ lek.song.type, est$ year, sep = "-" )
est <- est[est$ song.type.year %in% sty,]
table (table (est$ song.type.year))
spec_features <- spectro_analysis (est, wl = 200 , ovlp = 90 )
spec_features$ song.type.year <- est$ song.type.year
spec_features$ org.sound.files <- est$ sound.files
spec_features$ SNR <- est$ SNR
# read fundamental freq contour data
# fund_contours <- read.csv("./data/raw/seltailor_output.csv")
# cs <- check_sels(fund_contours, fix.selec = T, path = "./data/raw")
# fund_contours <- fund_contours[cs$check.res == "OK",]
#
#
# contour_id <- imp_raven(path = "./data/raw/", files = "44kKz_songtype_segment_analysis_sep_2019.txt", warbler.format = TRUE, all.data = TRUE)
#
# contour_id$song.type.year <- paste(contour_id$lek.song.type, contour_id$year, sep = "-")
#
# contour_id$song.type.year.sfs <- paste(contour_id$lek.song.type, contour_id$year, contour_id$sf.selec, sep = "-")
#
# all(table(contour_id$song.type.year.sfs) == 20)
#
# spid <- song_param(contour_id[, c(1:6, ncol(contour_id))], song_colm = "song.type.year.sfs")
#
# spid$song.type.year <- sapply(spid$song.type.year.sfs, function(x) contour_id$song.type.year[contour_id$song.type.year.sfs == x][1])
#
# colms <- c("sound.files", "start", "end", "selec", "song.type.year.sfs", "song.type.year")
# fund_contours$song.type.year <- sapply(seq_len(nrow(fund_contours)), function(x){
#
# Y <- fund_contours[x, c("sound.files", "start", "end", "selec")]
# Z <- spid[, c("sound.files", "start", "end", "selec", "song.type.year.sfs", "song.type.year")]
# Y$song.type.year.sfs <- NA
# Y$song.type.year.sfs <- as.character(Y$song.type.year.sfs)
# Y$song.type.year <- NA
# Y$song.type.year <- as.character(Y$song.type.year)
# Y$source <- "contours"
# Z$source <- "IDs"
#
# X <- rbind(Y, Z)
#
# ovlp <- overlapping_sels(X, indx.row = TRUE, pb = FALSE, verbose = FALSE)
#
# sty <- ovlp$song.type.year[!is.na(ovlp$song.type.year.sfs) & !is.na(ovlp$indx.row)]
#
# return(sty)
# })
#
# spec_features$song.type <- substr(spec_features$song.type.year, 0, 6)
# fund_contours$song.type <- substr(fund_contours$song.type.year, 0, 6)
# spec_features$meanfun <- sapply(spec_features$song.type, function(x){
#
# X <- fund_contours[fund_contours$song.type == x, ]
#
# meanfun <- mean(unlist(X[,grep("ff", names(X))]))
# return(meanfun)
# })
#
# spec_features$minfun <- sapply(spec_features$song.type, function(x){
#
# X <- fund_contours[fund_contours$song.type == x, ]
#
# minfun <- min(unlist(X[,grep("ff", names(X))]))
#
# return(minfun)
# })
#
```
# Phylogenetic analyses
## Phylogenetic signal
```{r, fig.height=12, fig.width=12}
features <- c("meandom", "mindom", "maxdom", "dfrange", "sp.ent", "modindx", "duration", "skew", "kurt", "time.ent", "time.IQR", "meanpeakf")
spec_features <- spec_features[, c("sound.files", "selec", "song.type.year", "org.sound.files", "SNR", features)]
mean_spec_features <- aggregate(x = . ~ song.type.year, spec_features[, c("song.type.year", features)], mean)
# add standard error
se_spec_features <- aggregate(x = . ~ song.type.year, spec_features[, c("song.type.year", features)], function(x) sd(x) / sqrt(length(x)))
for(i in features)
se_spec_features[is.na(se_spec_features[, i]), i] <- mean(se_spec_features[, i], na.rm = TRUE)
out <- lapply(features, function(x){
trait <- mean_spec_features[, x]
# trait_se <- se_spec_features[, x]
# names(trait_se) <-
names(trait) <- mean_spec_features$song.type.year
ps <- phylosig(tree = tree, x = trait, method = "lambda", test = TRUE)
out_df <- data.frame(feature = x, as.data.frame(ps[1:4]))
return(out_df)
})
phylosig_df <- do.call(rbind, out)
phylosig_df$lambda <- round(phylosig_df$lambda, 2)
phylosig_df[order(phylosig_df$lambda), ]
```
## Phenograms by acoustic feature
```{r}
sub_features <- c (Modulation = "modindx" , ` Max dominant ` = "maxdom" , ` Frequency range ` = "dfrange" , ` Mean frequency ` = "meandom" , ` Time IQR ` = "time.IQR" ,Kurtosis = "kurt" )
cols <- viridis (10 )[c (3 , 7 )]
par (mfcol = c (3 , 2 ))
for (i in seq_along (sub_features)){
feat <- mean_spec_features[, sub_features[i]]
feat <- scale (feat)
names (feat) <- mean_spec_features$ song.type.year
phenogram (tree, x = feat, spread.labels = F, offset = 0 , ftype = "off" , colors = ifelse (i < 4 , cols[1 ], cols[2 ]), ylim = c (- 2.4 , 2.4 ))
title (names (sub_features)[i], cex.main = 2 )
}
```
## Multivariate phylogenetic signal
```{r}
library (mvMORPH)
mean_spec_features_matrix <- as.matrix (mean_spec_features[, 2 : ncol (mean_spec_features)])
rownames (mean_spec_features_matrix) <- mean_spec_features$ song.type.year
dat <- list (Y = mean_spec_features_matrix)
signal <- mvgls (Y~ 1 , data= dat, tree, model= "lambda" , penalty= "RidgeArch" )
summary (signal)
```
## Modes of evolution
```{r, eval = FALSE}
models <- c(brownian_motion = "BM",Ornstein_Uhlenbeck = "OU", early_burst = "EB", rate_trend = "rate_trend", puntctuational = "kappa", time_dependent = "delta", white = "white")
out <- warbleR:::pblapply_wrblr_int(features, cl = 10, function(x){
trait <- mean_spec_features[, x]
# trait_se <- se_spec_features[, x]
names(trait) <- mean_spec_features$song.type.year
mods <- lapply(models, function(y){
fc <- fitContinuous(phy = tree, dat = trait, model = y)
out <- data.frame(feature = x, model = y, AICc = fc$opt$aicc, alpha = ifelse(length(fc$opt$alpha) > 0, fc$opt$alpha, NA))
return(out)
})
out_df <- do.call(rbind, mods)
return(out_df)
})
evo_mods <- do.call(rbind, out)
write.csv(evo_mods, "./data/processed/evolutionary_models_by_feature.csv", row.names = FALSE)
```
```{r, fig.width=12, fig.height=12}
evo_mods <- read.csv("./data/processed/evolutionary_models_by_feature.csv")
ou_evo <- evo_mods[evo_mods$model == "OU", ]
out <- warbleR:::pblapply_wrblr_int(1:nrow(ou_evo), cl = 10, function(x){
trait <- mean_spec_features[, ou_evo$feature[x]]
# trait_se <- se_spec_features[, x]
names(trait) <- mean_spec_features$song.type.year
cou <- compar.ou(trait, tree, alpha = ou_evo$alpha[x])
out_df <- data.frame(feature = ou_evo$feature[x], ou_opt = cou$para[2, 1])
return(out_df)
})
ou_opts <- do.call(rbind, out)
spec_features$year <- as.numeric(substr(spec_features$song.type.year,8, 11))
agg_feat_by_year <- aggregate(. ~ year, spec_features[, c("year", features)], mean)
stck_feat_by_year <- stack(agg_feat_by_year)
stck_feat_by_year <- stck_feat_by_year[stck_feat_by_year$ind != "year", ]
stck_feat_by_year$year <- agg_feat_by_year$year
ggplot(stck_feat_by_year, aes(x = year, y = values)) +
geom_line() +
theme_classic(base_size = 20) +
facet_wrap(~ ind, scales = "free_y")
```
```{r}
evo_mods <- read.csv ("./data/processed/evolutionary_models_by_feature.csv" )
evo_mods <- evo_mods[order (evo_mods$ feature, evo_mods$ AICc), ]
out <- lapply (unique (evo_mods$ feature), function (x) {
X <- evo_mods[evo_mods$ feature == x, ]
X <- X[X$ AICc <= min (X$ AICc) + 4 , ]
return (X)
})
do.call (rbind, out)
```
```{r plot aic by model}
out <- lapply(unique(evo_mods$feature), function(x) {
X <- evo_mods[evo_mods$feature == x, ]
X$delta_aicc <- X$AICc - min(X$AICc)
X$delta_aicc <- X$delta_aicc
return(X)
})
evo_delta_aic <- do.call(rbind, out)
restricted <- c("meandom", "mindom", "maxdom", "dfrange", "sp.ent", "meanpeakf")
evo_delta_aic$restriction <- ifelse(evo_delta_aic$feature %in% restricted, "restricted", "unrestricted")
agg_delta_aicc <- aggregate(delta_aicc ~ model + restriction, evo_delta_aic, mean)
# agg_delta_aicc$delta_aicc <- agg_delta_aicc$delta_aicc * -1
agg_delta_aicc <- agg_delta_aicc[agg_delta_aicc$model != "white", ]
ggplot(agg_delta_aicc, aes(x = model, y = delta_aicc, fill = restriction)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_viridis_d(begin = 0.2, end = 0.8, alpha = 0.6) +
theme_classic(base_size = 20) +
labs(x = "Evolutionary model", y = "Delta AICc", fill = "Morphological\nrestriction")
```
## Coevolution
```{r, eval = TRUE}
mean_spec_features_matrix <- as.matrix(mean_spec_features[, 2:ncol(mean_spec_features)])
rownames(mean_spec_features_matrix) <- mean_spec_features$song.type.year
```
```{r, eval = FALSE}
## Run multiple MCMC chains.
handle_1 <- ratematrixMCMC(data=mean_spec_features_matrix, phy = tree, gen=30000, dir=tempdir())
handle_2 <- ratematrixMCMC(data=mean_spec_features_matrix, phy = tree, gen=30000, dir=tempdir())
## Read both chains:
posterior_1 <- readMCMC(handle_1, burn = 0.2, thin = 1)
posterior_2 <- readMCMC(handle_2, burn = 0.2, thin = 1)
## Check for convergence
# checkConvergence(posterior_1, posterior_2)
## Merge all posteriors in the list.
merged.posterior <- mergePosterior(posterior_1, posterior_2)
# length(merged.posterior$matrix)
# str(merged.posterior$matrix[[1]])
mean_cov <- merged.posterior$matrix[[1]]
for (i in 2:length(merged.posterior$matrix))
mean_cov <- mean_cov + merged.posterior$matrix[[i]]
mean_cov <- mean_cov / length(merged.posterior$matrix)
# range(mean_cov)
mean_corr <- cov2cor(mean_cov)
colnames(mean_corr) <- paste(names(mean_spec_features)[-1], c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1))
cols_corr <- colorRampPalette(c(viridis(3, direction = 1, begin = 0.2,
end = 0.5), "#BEBEBE1A", "white", "#BEBEBE1A", viridis(3, direction = 1,
begin = 0.7, end = 0.9)))(30)
cp <- corrplot.mixed(mean_corr, tl.cex = 0.7, upper.col = cols_corr,
lower.col = cols_corr, order = "hclust", lower = "number", upper = "ellipse",
tl.col = "black")
## Plot results:
# plotRatematrix(merged.posterior)
```
## Compare evolutionary rates of acoustic features
```{r}
feat_grp <- c (1 , 1 , 1 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 1 )
names (feat_grp) <- colnames (mean_spec_features_matrix)
EMR2 <- compare.multi.evol.rates (A = mean_spec_features_matrix, gp= feat_grp,
Subset= TRUE , phy= tree,iter= 10000 , print.progress = FALSE )
EMR2
```
## Jacknife evolutionary rate intervals
```{r, eval = FALSE}
# get confidence intervals
out <- warbleR:::pblapply_wrblr_int(1:50, cl = 20, function(x){
tips <- sample(tree$tip.label, 4)
sub_tree <- drop.tip(tree, tip = tips)
sub_mat <- mean_spec_features_matrix[!row.names(mean_spec_features_matrix) %in% tips, ]
jck_EMR <- compare.multi.evol.rates(A = sub_mat, gp= feat_grp,
Subset=TRUE, phy= sub_tree,iter=10000, print.progress = FALSE)
return(jck_EMR$sigma.d.gp)
})
jck_evo_rates <- as.data.frame(do.call(rbind, out))
names(jck_evo_rates) <- c("Unrestricted", "Restricted")
jck_evo_rates <- stack(jck_evo_rates)
write.csv(jck_evo_rates, "./data/processed/evolutionary_rate_jacknife.csv", row.names = FALSE)
```
```{r}
jck_evo_rates <- read.csv ("./data/processed/evolutionary_rate_jacknife.csv" )
cols <- viridis (10 )
# raincoud plot:
fill_color <- adjustcolor (cols[[6 ]], 0.4 )
ggplot (jck_evo_rates, aes (y = values, x = ind)) +
# add half-violin from {ggdist} package
stat_halfeye (
fill = fill_color,
alpha = 0.5 ,
# custom bandwidth
adjust = .5 ,
# adjust height
width = .6 ,
.width = 0 ,
# move geom to the cright
justification = - .2 ,
point_colour = NA
) +
geom_boxplot (fill = fill_color,
width = .15 ,
# remove outliers
outlier.shape = NA # `outlier.shape = NA` works as well
) +
# add justified jitter from the {gghalves} package
geom_half_point (
color = fill_color,
# draw jitter on the left
side = "l" ,
# control range of jitter
range_scale = .4 ,
# add some transparency
alpha = .5 ,
) +
# ylim(c(-30, 310)) +
labs (x = "Acoustic features" , y = "Evolutionary rate" ) + theme (axis.text.x = element_text (angle = 15 , hjust = 1 )) +
theme_classic (base_size = 20 )
```
## Evolutionary integration
```{r, eval = TRUE}
feat_grp <- c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1)
IT<- phylo.integration(A = mean_spec_features_matrix, partition.gp = feat_grp, phy= tree, iter=10000, print.progress = FALSE)
summary(IT) # Test summary
# plot(IT) # PLS plot
```
## Degradation and evolutionary rate of acoustic features
### PCA
```{r}
#Taking only numeric data for the pca
degrad_df <- read.csv ("./data/processed/degradation_measures_SUR_ALL.csv" )
degrad_params <- c ("signal.to.noise.ratio" , "tail.to.signal.ratio" , "excess.attenuation" , "envelope.correlation" , "blur.ratio" , "cross.correlation" , "spectrum.blur.ratio" , "spectrum.correlation" )
degrad_df <- degrad_df[complete.cases (degrad_df[, degrad_params]), ]
sapply (degrad_df[,degrad_params], function (x) any (is.infinite (x)))
degrad_df <- degrad_df[! is.infinite (degrad_df$ excess.attenuation), ]
pca_degrad <- prcomp (degrad_df[ , degrad_params], scale = T)
pca_var <- round (summary (pca_degrad)$ importance[2 , ] * 100 )
degrad_df$ pc1 <- pca_degrad$ x[ , 1 ]
degrad_params <- c (degrad_params, "pc1" )
# plot rotation values by PC
pca_rot <- as.data.frame (pca_degrad$ rotation[, 1 : 4 ])
pca_rot_stck <- stack (pca_rot)
pca_rot_stck$ variable <- rownames (pca_rot)
pca_rot_stck$ values[pca_rot_stck$ ind == "PC1" ] <- pca_rot_stck$ values[pca_rot_stck$ ind ==
"PC1" ]
pca_rot_stck$ Sign <- ifelse (pca_rot_stck$ values > 0 , "Positive" , "Negative" )
pca_rot_stck$ rotation <- abs (pca_rot_stck$ values)
pca_rot_stck$ ind_var <- paste0 (pca_rot_stck$ ind, " (" , sapply (pca_rot_stck$ ind, function (x) pca_var[names (pca_var)== x]), "%)" )
ggplot (pca_rot_stck, aes (x = variable, y = rotation, fill = Sign)) +
geom_col () + coord_flip () + scale_fill_viridis_d (alpha = 0.7 ,
begin = 0.2 , end = 0.8 ) + facet_wrap (~ ind_var) + theme_classic ()
```
## Estimate evolutinary rate association to degradation
```{r, eval = FALSE}
mean_spec_features$pc1 <-
sapply(mean_spec_features$song.type.year, function(x)
mean(degrad_df$pc1[degrad_df$song.type.year == x]))
# Standardize the tree to unit length
ultra_tree_standardized <- multi2di(ultra_tree)
ultra_tree_standardized <-
compute.brlen(ultra_tree_standardized, method = "cladewise")
mods <- warbleR:::pblapply_wrblr_int(2:ncol(mean_spec_features), cl = 10, function(x) {
# brownian motion
bm_mod <- rate_gls(
x = mean_spec_features$pc1,
y = mean_spec_features[, x],
species = mean_spec_features$song.type.year,
ultra_tree_standardized,
model = "predictor_BM",
silent = TRUE,
maxiter = 1000
)
bootout_bm <- rate_gls_boot(bm_mod, n = 100, silent = TRUE, maxiter = 1000)
bootout_bm <- cbind(model = "BM", feature = names(mean_spec_features)[x], parameter = rownames(bootout_bm$summary), as.data.frame(bootout_bm$summary))
# geometric brownian motion
gbm_mod <- rate_gls(
x = mean_spec_features$pc1 + 1,
y = mean_spec_features[, x],
species = mean_spec_features$song.type.year,
ultra_tree_standardized,
model = "predictor_gBM",
silent = TRUE,
maxiter = 1000
)
bootout_gbm <- rate_gls_boot(gbm_mod, n = 100, silent = TRUE, maxiter = 1000)
bootout_gbm <- cbind(model = "gBM", feature = names(mean_spec_features)[x], parameter = rownames(bootout_gbm$summary), as.data.frame(bootout_gbm$summary))
re_mod <- rate_gls(
x = mean_spec_features$pc1,
y = mean_spec_features[, x],
species = mean_spec_features$song.type.year,
ultra_tree_standardized,
model = "recent_evol",
silent = TRUE,
maxiter = 1000
)
bootout_re <- rate_gls_boot(re_mod, n = 100, silent = TRUE, maxiter = 1000)
bootout_re <- cbind(model = "recent_evol", feature = names(mean_spec_features)[x], parameter = rownames(bootout_re$summary), as.data.frame(bootout_re$summary))
bootout <- rbind(bootout_bm, bootout_gbm, bootout_re)
return(bootout)
})
gls_res <- do.call(rbind, mods)
write.csv(gls_res, "./data/processed/evolutionary_rate_by_degradation.csv", row.names = FALSE)
```
```{r, eval = TRUE}
gls_res <- read.csv("./data/processed/evolutionary_rate_by_degradation.csv")
gls_res <- gls_res[gls_res$parameter == "b" | gls_res$parameter == "Rsquared", c("model", "feature", "parameter", "Estimate", "SE", "X2.5.", "X97.5.")]
gls_res_kbl <- kableExtra::kbl(gls_res, row.names = FALSE,
escape = FALSE, format = "html", digits = 3)
gls_res_kbl <- kableExtra::row_spec(kable_input =
gls_res_kbl, row = which(gls_res$`97.5%` * gls_res$`2.5%` > 0 & gls_res$parameter == "b"), background = grDevices::adjustcolor('#6DCD59FF', alpha.f = 0.3))
gls_res_kbl
```
<!-- light green box -->
::: {.alert .alert-success}
# Takeaways {#takeaways .unnumbered .unlisted}
:::
<!-- '---' adds a gray vertical line -->
------------------------------------------------------------------------
<!-- add packages used, system details and versions -->
<font size="4">Session information</font>
```{r session info, echo=F}
sessionInfo()
```