# get phylogenetic signal froma brms model objectphylo_sig_brms <-function(model) { hyp <-"sd_phylo__Intercept^2 / (sd_phylo__Intercept^2 + sigma^2) = 0" hyp_test <-hypothesis(model, hyp, class =NULL)return(hyp_test$hypothesis[1, c("Estimate", "CI.Lower", "CI.Upper", "Post.Prob")])}# match data and tree tip labelsmatch_data_tree <-function(data, tree, index) {if (!is.null(dim(data))){ data <- data[index %in% tree$tip.label, ]rownames(data) <- index[index %in% tree$tip.label] } else { data <- data[index %in% tree$tip.label]names(data) <- index[index %in% tree$tip.label] }# prun tree tree <-drop.tip(tree, tip = tree$tip.label[!tree$tip.label %in% index])# return out <-list(data = data, tree = tree)return(out)}# compare evolutionary rates between featuresCompareRates.multTrait <-function(phy,x,TraitCov=T,ms.err=NULL,ms.cov=NULL){#Compares LLik of R-matrix vs. LLik of R-matrix with constrained diagonal#TraitCov = TRUE assumes covariation among traits (default)#ms.err allows the incorporation of within-species measurement error. Input is a matrix of species (rows) by within-species #variation for each trait (columns).#ms.cov allows the incorporation of within-species covariation between traits. Input is a matrix of species (rows) by within-species #covariation for each pair of traits (columns). These must be provided in a specific order, beginning with covariation between trait 1 #and the rest, then trait 2 and the rest, etc. For instance, for 4 traits, the columns are: cov_12, cov_13, cov_14, cov_23, cov_24 cov_34. #Some calculations adapted from 'evol.vcv' in phytools (Revell, 2012) x<-as.matrix(x) N<-nrow(x) p<-ncol(x) C<-vcv.phylo(phy) C<-C[rownames(x),rownames(x)]if (is.matrix(ms.err)){ ms.err<-as.matrix(ms.err[rownames(x),])}if (is.matrix(ms.cov)){ ms.cov<-as.matrix(ms.cov[rownames(x),])}#Cholesky decomposition function for diagonal-constrained VCV build.chol<-function(b){ c.mat<-matrix(0,nrow=p,ncol=p) c.mat[lower.tri(c.mat)] <- b[-1] c.mat[p,p]<-exp(b[1]) c.mat[1,1]<-sqrt(sum((c.mat[p,])^2))if(p>2){for (i in2:(p-1)){ c.mat[i,i]<-ifelse( (c.mat[1,1]^2-sum((c.mat[i,])^2) )>0,sqrt(c.mat[1,1]^2-sum((c.mat[i,])^2)), 0) }}return(c.mat) }#Fit Rate matrix for all traits: follows code of L. Revell (evol.vcv) a.obs<-colSums(solve(C))%*%x/sum(solve(C)) D<-matrix(0,N*p,p)for(i in1:(N*p)) for(j in1:p) if((j-1)*N<i&&i<=j*N) D[i,j]=1.0 y<-as.matrix(as.vector(x)) one<-matrix(1,N,1) R.obs<-t(x-one%*%a.obs)%*%solve(C)%*%(x-one%*%a.obs)/Nif (TraitCov==F) #for TraitCov = F { R.obs<-diag(diag(R.obs),p) }#Calculate observed likelihood with or without measurement error LLik.obs<-ifelse(is.matrix(ms.err)==TRUE, -t(y-D%*%t(a.obs))%*%ginv((kronecker(R.obs,C)+diag(as.vector(ms.err))))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant((kronecker(R.obs,C)+diag(as.vector(ms.err))))$modulus[1]/2 , -t(y-D%*%t(a.obs))%*%ginv(kronecker(R.obs,C))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant(kronecker(R.obs,C))$modulus[1]/2 ) #Fit common rate for all traits; search over parameter space sigma.mn<-mean(diag(R.obs)) #reasonable start value for diagonal#Within-species measurement error matrixif(is.matrix(ms.err)){m.e<-diag(as.vector(ms.err))}#Within-species measurement error and trait covariation matrixif (is.matrix(ms.err) &&is.matrix(ms.cov)){ within.spp<-cbind(ms.err,ms.cov) rc.label<-NULLfor (i in1:p){ rc.label<-rbind(rc.label,c(i,i)) }for (i in1:p){for (j in2:p){ if (i!=j && i<j){rc.label<-rbind(rc.label,c(i,j))} }} m.e<-NULLfor (i in1:p){ tmp<-NULLfor (j in1:p){for (k in1:nrow(rc.label)){if(setequal(c(i,j),rc.label[k,])==T) {tmp<-cbind(tmp,diag(within.spp[,k]))} } } m.e<-rbind(m.e,tmp) } }#likelihood optimizer for no trait covariation lik.covF<-function(sigma){ R<-matrix(0,nrow=p,ncol=p)diag(R)<-sigma LLik<-ifelse(is.matrix(ms.err)==TRUE, -t(y-D%*%t(a.obs))%*%ginv((kronecker(R,C)+ m.e))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant((kronecker(R,C)+ m.e))$modulus[1]/2 , -t(y-D%*%t(a.obs))%*%ginv(kronecker(R,C))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant(kronecker(R,C))$modulus[1]/2 ) if (LLik ==-Inf) { LLikk <--1e+10 }return(-LLik) }#likelihood optimizer with trait covariation lik.covT<-function(sigma){ low.chol<-build.chol(sigma) R<-low.chol%*%t(low.chol) LLik<-ifelse(is.matrix(ms.err)==TRUE, -t(y-D%*%t(a.obs))%*%ginv((kronecker(R,C)+ m.e))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant((kronecker(R,C)+ m.e))$modulus[1]/2 , -t(y-D%*%t(a.obs))%*%ginv(kronecker(R,C))%*%(y-D%*%t(a.obs))/2-N*p*log(2*pi)/2-determinant(kronecker(R,C))$modulus[1]/2 ) if (LLik ==-Inf) {LLikk <--1e+10 }return(-LLik) }##Optimize for no trait covariation (methods: "Nelder-Mead" or "SANN")if (TraitCov==F) { model1<-optim(sigma.mn,fn=lik.covF,method="L-BFGS-B",lower=c(0.0))}##Optimize with trait covariation R.offd<-rep(0,(p*(p-1)/2))if (TraitCov==T) {model1<-optim(par=c(sigma.mn,R.offd),fn=lik.covT,method="L-BFGS-B")}#### Assemble R.constrainedif (TraitCov==F){R.constr<-diag(model1$par,p)}if (TraitCov==T){ chol.mat<-build.chol(model1$par) R.constr<-chol.mat%*%t(chol.mat)}if(model1$convergence==0) message<-"Optimization has converged."else message<-"Optim may not have converged. Consider changing start value or lower/upper limits." LRT<- (-2*((-model1$value-LLik.obs))) LRT.prob<-pchisq(LRT, (p-1),lower.tail=FALSE) #df = Nvar-1 AIC.obs<--2*LLik.obs+2*p+2*p #(2p twice: 1x for rates, 1x for anc. states) AIC.common<--2*(-model1$value)+2+2*p #(2*1: for 1 rate 2p for anc. states)return(list(Robs=R.obs, Rconstrained=R.constr,Lobs=LLik.obs,Lconstrained=(-model1$value),LRTest=LRT,Prob=LRT.prob,AICc.obs=AIC.obs,AICc.constrained=AIC.common,optimmessage=message)) }cols_corr <-colorRampPalette(c(mako(3, direction =1, begin =0.2,end =0.5), "#BEBEBE1A", "white", "#BEBEBE1A",mako(3, direction =1,begin =0.7, end =0.9)))(30)
5 Plot tree
Code
tree <-readRDS("./data/processed/single_combined_map_tree_prank_all_uexp_old.RDS")# #Highlight and label cladeult_tree <-chronopl(tree, lambda =0.5)groupInfo <-sapply(strsplit(ult_tree$tip.label, split ="-"), "[[",1)g <-split(ult_tree$tip.label, groupInfo)p <-ggtree(ult_tree, layout ="fan")clades <-sapply(g, function(n) MRCA(p, n))p <-groupClade(p, clades, group_name ="subtree") +aes(color = subtree) +scale_color_viridis_d(option ="G", begin =0.1, end =0.9, guide ="none")for (i inseq_along(clades)) p <- p +geom_cladelabel(clades[i], names(clades)[i],barsize =2, offset.text =0.2, fontsize =3, hjust =0.5, color =mako(5,begin =0.1, end =0.9)[i])p
6 Select sound data
Code
# edited file openest <-readRDS("~/Dropbox/Projects/lbh_songtype_prevalence_and_degradation/data/raw/EST all sels aug 2019.RDS")est$lek.song.type.id <-paste(est$lek.song.type, seq_len(nrow(est)),sep ="-")est$lek.songtype.year <-paste(est$lek.song.type, est$year, sep ="-")# remove weird selectionest <- est[grep("310.CCL.2013.3.3.7.38|43.CCL.2011.6.16.8.19", est$sound.files,invert =TRUE), ]# choose from those with SNR >3 best examples from different# years when possibleout <-lapply(tree_sty, function(x) { Y <-as.data.frame(est[est$lek.songtype.year == x, ])# year_buffer <- c(-1, 1, -2, 2) z <- 0 while(nrow(Y) == 0 &# z < 4){ z <- z + 1 year <- as.numeric(strsplit(x,# '-')[[1]][3]) e <- paste(paste(strsplit(x, '-')[[1]][-3],# collapse = '-'), year - year_buffer[z], sep = '-') Y <-# as.data.frame(est[est$lek.songtype.year == e, ]) }# Y$lek.songtype.year.target <- if (nrow(Y) > 0) x else# vector() sort by SNR Y <- Y[order(Y$SNR, decreasing =TRUE), ]# remove bad ones (below -40 SNR) Y <- Y[Y$SNR >-40, ]# keep highest 3 SNRif (nrow(Y) >4) Y <- Y[1:4, ]# if(nrow(Y) > 0) Y <- rename_est_waves(Y, new.selec =# 1:nrow(Y), new.sound.files = paste(x,# seq_len(length(attr(Y, 'wave.objects'))), sep = '_'))return(Y)})sub_dat <-do.call(rbind, out)sub_est <-fix_extended_selection_table(sub_dat, est)sub_est <-resample_est(sub_est, samp.rate =44.1, bit.depth =16,parallel =20)saveRDS(sub_est, "./data/raw/extended_selection_table_all_lek_songtype_year_high_snr.RDS")
# prune tree and datamatched <-match_data_tree(mean_spec_features_matrix, tree, index = mean_spec_features$song.type.year)signal <-mvgls(Y ~1, data =list(Y = matched$data), matched$tree,model ="lambda", penalty ="RidgeArch")summary(signal)
Call:
mvgls(formula = Y ~ 1, data = list(Y = matched$data), tree = matched$tree,
model = "lambda", penalty = "RidgeArch")
Generalized least squares fit by penalized REML
GIC logLik
-2438.203 1350.432
Parameter estimate(s):
lambda: 0.7416
Regularization parameter (gamma): 0
Covariance matrix of size: 15 by 15
for 130 observations
Coefficients (truncated):
meandom mindom maxdom dfrange sp.ent modindx duration meanpeakf
(Intercept) 4.309 2.282 7.157 4.875 0.9157 12.27 0.147 5.704
skew kurt
(Intercept) -0.1068 14.85
Use "coef" to display all the coefficients
15.4 Modes of evolution
15.4.1 Single traits
Code
models <-c(brownian_motion ="BM", Ornstein_Uhlenbeck ="OU", early_burst ="EB",rate_trend ="rate_trend", puntctuational ="kappa", time_dependent ="delta",mean_trend ="mean_trend", white ="white")out <-pblapply(features, cl =10, function(x) {# prune tree and data matched <-match_data_tree(mean_spec_features[, x], tree, index = mean_spec_features$song.type.year) mods <-lapply(models, function(y) { fc <-fitContinuous(phy = matched$tree, dat =scale(matched$data),model = y) out <-data.frame(feature = x, model = y, AICc = fc$opt$aicc,sigma2 = fc$opt$sigsq, alpha =ifelse(length(fc$opt$alpha) >0, fc$opt$alpha, NA))return(out) }) out_df <-do.call(rbind, mods) out_df$delta_AIC <- out_df$AICc -min(out_df$AICc)return(out_df)})evo_mods <-do.call(rbind, out)rownames(evo_mods) <-seq_len(nrow(evo_mods))write.csv(evo_mods, "./data/processed/evolutionary_models_by_feature_consensus_tree.csv",row.names =FALSE)
Code
evo_mods <-read.csv("./data/processed/evolutionary_models_by_feature_consensus_tree.csv")restricted <-c("meandom", "mindom", "maxdom", "dfrange", "sp.ent","meanpeakf", "duration")evo_mods$restriction <-ifelse(evo_mods$feature %in% restricted, "restricted","unrestricted")# order features by restriction (first restricted second# unrestricted)evo_mods$feature <-factor(evo_mods$feature, levels =c(restricted,setdiff(unique(evo_mods$feature), restricted)))# flip y axisggplot()
Code
## ggplot showing the delta AICc by model and feature with## facets get the mean with apoint and the range + - the sdggplot(evo_mods, aes(x = model, y = delta_AIC, color = restriction)) +# geom_errorbar(aes(ymin=neg_delta-sd, ymax=neg_delta+sd),# width=.3, linewidth = 2) +geom_point(size =4) +facet_wrap(~feature, scales ="free_y") +scale_color_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.6, guide ="none") +theme_classic(base_size =14) +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1)) +labs(x ="Evolutionary model", y ="Delta AICc", fill ="Evolutionary\nmodel") +scale_y_reverse()
Code
sub_evo_mods <- evo_mods[evo_mods$feature %in%c("duration", "dfrange","sp.ent", "meanpeakf", "modindx", "skew", "kurt", "time.ent"), ]sub_evo_mods <- sub_evo_mods[sub_evo_mods$model !="kappa", ]ggplot(sub_evo_mods, aes(x = model, y = delta_AIC, color = restriction)) +geom_point(size =4) +facet_wrap(~feature, ncol =4, scales ="free_y") +scale_color_viridis_d(option ="G", end =0.8, begin =0.2, alpha =0.7) +theme_classic(base_size =14) +theme(axis.text.x =element_text(angle =90,vjust =0.5, hjust =1)) +labs(x ="Evolutionary model", y ="Delta AICc",fill ="Evolutionary\nmodel", color ="") +scale_y_reverse()
Code
ggplot(sub_evo_mods, aes(x = model, y = delta_AIC, fill = feature,color = feature, shape = restriction)) +geom_jitter(size =2,width =0.1) +scale_shape_manual(values =c(21, 22)) +scale_fill_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +scale_color_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +theme_classic(base_size =20) +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1)) +labs(x ="Evolutionary model", y ="Delta AICc", fill ="", color ="",shape ="") +scale_y_reverse()
Code
ggplot(sub_evo_mods[sub_evo_mods$model %in%c("BM", "delta", "OU","rate_trend"), ], aes(x = model, y = delta_AIC, fill = feature,color = feature, shape = restriction)) +geom_jitter(size =2,width =0.1) +scale_shape_manual(values =c(21, 22)) +scale_fill_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +scale_color_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +theme_classic(base_size =20) +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1)) +labs(x ="Evolutionary model", y ="Delta AICc", fill ="", color ="",shape ="") +scale_y_reverse()
Code
ggplot(sub_evo_mods[sub_evo_mods$model %in%c("BM", "delta", "OU","rate_trend"), ], aes(x = model, y = delta_AIC, fill = feature,color = feature)) +geom_jitter(size =2, width =0.1) +scale_fill_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +scale_color_viridis_d(option ="G",end =0.8, begin =0.2, alpha =0.7) +theme_classic(base_size =20) +theme(axis.text.x =element_text(angle =90, vjust =0.5, hjust =1)) +labs(x ="Evolutionary model", y ="Delta AICc", fill ="", color ="") +scale_y_reverse()
Code
### Multivariate trait evmatched <-match_data_tree(mean_spec_features[, features[features !="pc1.song"]], tree, index = mean_spec_features$song.type.year)combs <-combn(names(matched$data), 2)comp_rates_list <-lapply(seq_len(ncol(combs)), function(i) {# comp_rates_list <- lapply(c(1:3, 14), function(i) { W <- (matched$data[, combs[, i]]) x <-lapply(W, function(x) log(x +min(abs(x)) +1)) x <-do.call(cbind, x)rownames(x) <-rownames(W)print(i) cr <-try(CompareRates.multTrait(phy = matched$tree, x = x), silent =TRUE)if (!is(cr, "try-error")) { out <-data.frame(feat1 = combs[1, i], feat2 = combs[2, i],rate1 = cr$Robs[1, 1], rate2 = cr$Robs[2, 2], p = cr$Prob) } else { out <-data.frame(feat1 = combs[1, i], feat2 = combs[2, i],rate1 =NA, rate2 =NA, p =NA) }return(out)})comp_rates <-do.call(rbind, comp_rates_list)cr <-CompareRates.multTrait(phy = matched$tree, x =as.matrix(matched$data)[, ])mvBM(tree, data)mv_ou1 <-mvOU(tree = matched$tree, data =as.matrix(matched$data),model ="OU1")mv_oum <-mvOU(tree = matched$tree, data =as.matrix(matched$data),model ="OUM")mv_bm <-mvBM(tree = matched$tree, data =as.matrix(matched$data),model ="OUM")ou_tree <-transf.branch.lengths(phy = matched$tree, model ="OUrandomRoot",parameters =list(alpha =0.2073415))$tree# Simulated datasetset.seed(14)# Generating a random treetree2 <-pbtree(n =50)# Setting the regime states of tip speciessta <-as.vector(c(rep("Forest", 20), rep("Savannah", 30)))names(sta) <- tree$tip.label# Making the simmap tree with mapped statestree3 <-make.simmap(tree2, sta, model ="ER", nsim =1)# col<-c('blue','orange'); names(col)<-c('Forest','Savannah')# Plot of the phylogeny for illustration# plotSimmap(tree,col,fsize=0.6,node.numbers=FALSE,lwd=3,# pts=FALSE)# Simulate the traitsalpha <-matrix(c(2, 0.5, 0.5, 1), 2)sigma <-matrix(c(0.1, 0.05, 0.05, 0.1), 2)theta <-c(2, 3, 1, 1.3)data <-mvSIM(tree3, param =list(sigma = sigma, alpha = alpha, ntraits =2,theta = theta, names_traits =c("head.size", "mouth.size")), model ="OUM",nsim =1)eem_mv <-estimate.evolutionary.model(phyltree = tree2, mData = data)a <-OUwie(tree2, data, model ="OU1")tre2 =transf.branch.lengths(phy = tree2, model ="OUrandomRoot",parameters =list(alpha =0.2073415))cr <-CompareRates.multTrait(phy = tree2, x = data)cr2 <-CompareRates.multTrait(phy = tre2$tree, x = data)library(phytools)alpha <-0.5tree_ou <-ouTree(tree, alpha)plot(tree_ou)eem_mv <-estimate.evolutionary.model(phyltree = matched$tree, mData =as.matrix(matched$data[,1:2]))
16.2 Compare evolutionary rates of acoustic features
Code
# get confidence intervalsfeat_grp <- features_df$restrictionnames(feat_grp) <- features_df$featuresmatched <-match_data_tree(mean_spec_features_matrix[, colnames(mean_spec_features_matrix) !="pc1.song"], tree, index =rownames(mean_spec_features_matrix))jck_EMR <-compare.multi.evol.rates(A =scale(matched$data), gp = feat_grp,Subset =TRUE, phy = matched$tree, iter =10000, print.progress =FALSE)jck_EMR
Call:
Observed Rate Ratio: 1.19565
P-value: 0.0101
Effect Size: 2.48711
Based on 10001 random permutations
The rate for group unrestricted is 0.186899209203047
The rate for group restricted is 0.156315560419451
evo_rates <-read.csv("./data/processed/evolutionary_rates.csv")cols <-viridis(10)# raincoud plot:fill_color <-adjustcolor(cols[[6]], 0.4)ggplot(evo_rates, aes(y = values, x = ind)) +# add half-violin from {ggdist} packagestat_halfeye(fill = fill_color,alpha =0.5,# custom bandwidthadjust = .5,# adjust heightwidth = .6,.width =0,# move geom to the crightjustification =-.2,point_colour =NA ) +geom_boxplot(fill = fill_color,width = .15,# remove outliersoutlier.shape =NA# `outlier.shape = NA` works as well ) +# add justified jitter from the {gghalves} packagegeom_half_point(color = fill_color,# draw jitter on the leftside ="l",# control range of jitterrange_scale = .4,# add some transparencyalpha = .5, ) +# ylim(c(-30, 310)) +labs(x ="Acoustic features", y ="Evolutionary rate") +theme(axis.text.x =element_text(angle =15, hjust =1)) +theme_classic(base_size =20)
16.3 Evolutionary integration
Code
feat_grp <- features_df$restrictionnames(feat_grp) <- features_df$featuresout <- warbleR:::pblapply_wrblr_int(trees, cl =20, function(tree) { matched <-match_data_tree(mean_spec_features_matrix, tree, index =rownames(mean_spec_features_matrix)) IT <-phylo.integration(A = matched$data, partition.gp = feat_grp,phy = matched$tree, iter =10000, print.progress =FALSE) out <-data.frame(effect_size = IT$Z, IT$P.value)return(out)})phylo_intr <-do.call(rbind, out)saveRDS(phylo_intr, "./data/processed/phylogenetic_integration.RDS")
Code
phylo_intr <-readRDS("./data/processed/phylogenetic_integration.RDS")# make a ggplot2 density plot overlaid with a histogram of the# effect size in phylo_intrggplot(phylo_intr, aes(x = effect_size)) +geom_density(fill ="#3B2F5E99",alpha =0.5) +geom_histogram(aes(y = ..density..), fill ="#54C9AD99",alpha =0.5, bins =20) +theme_classic(base_size =20) +labs(x ="Modularity effect size",y ="Density") +geom_vline(xintercept =0, color ="red", linetype ="dashed")
17 Find modules of traits
Code
matched <-match_data_tree(mean_spec_features_matrix, tree, index =rownames(mean_spec_features_matrix))# Correlation matrix of traits based on PCA loadingscor_matrix <-cor(matched$data)cor_matrix <- mean_corr# Create a network from the correlation matrixthreshold <-0.6# Set a threshold for correlation strengthadj_matrix <-abs(cor_matrix) > threshold # Adjacency matrix# Build the graphtrait_network <-graph_from_adjacency_matrix(adj_matrix, mode ="undirected",diag =FALSE)# Plot the network change color of nodes by restricted column in# features_dfplot(trait_network, vertex.label =colnames(matched$data), main ="Trait Network from PCA Loadings",vertex.color =ifelse(features_df$restriction =="restricted","red", "blue"))# Detect modules using Louvain methodcommunities <-cluster_louvain(trait_network)communities <-cluster_walktrap(trait_network)# Assign module membership to traitsV(trait_network)$color <- communities$membership # Color nodes by module# Plot the network with detected modulesplot(trait_network, vertex.label =colnames(matched$data), vertex.color =V(trait_network)$color,main ="Detected Trait Modules")