#### Script to organize hummingbid phenotypic data and run MCMCglmm
# taxonomy
tax <- read.table("data/Tax.txt", h = T, sep = "\t", stringsAsFactors = F)
tax$Nombre_cientifico <- gsub(pattern = " ", replacement = "_", x = tax$Nombre_cientifico)
### read all data tables and match names in each
# Mcguire et al. 2014 phylogeny
tree <- read.tree("data/mcguire_correctedID.tre")
# remove a duplicated P. rupurumii
tree <- drop.tip(tree, 30)
#### dichromatism data
dichro <- read.table("data/Dichromatism_Trochilidae.txt", h = T, sep = "\t")
dichro$Species <- tax[match(dichro$Species, tax$ID_taxonomia), "Nombre_cientifico"]
## Song data v2
song2 <- read.csv("data/song_complexity_parameters_hummingbirds_feb_2021.csv")
names(song2)[1] <- "Species"
song2[grep(pattern = "Doryfera_ludovicae", x = song2$Species), "Species"] <- "Doryfera_ludoviciae"
song2[grep(pattern = "Schistes_geoffroyi", x = song2$Species), "Species"] <- "Schistes_geoffroyii"
song2[grep(pattern = "Sephanoides_sephaniodes", x = song2$Species), "Species"] <- "Sephanoides_sephanoides"
song2[grep(pattern = "Campylopterus_villaviscensio", x = song2$Species), "Species"] <- "Campylopterus_villaviscencio"
song2[grep(pattern = "Juliamyia_julie", x = song2$Species), "Species"] <- "Damophila_julie"
song2[grep(pattern = "Chlorestes_notata", x = song2$Species), "Species"] <- "Chlorostilbon_notatus"
# species-average for all metrics
prom2 <- apply(song2[, 2:dim(song2)[2]], MARGIN = 2, FUN = tapply, INDEX = song2$Species,
mean, na.rm = T)
songsp2 <- data.frame(Species = rownames(prom2), prom2, row.names = NULL)
## acoustic PCA pca with all variables except btwn.song.var because of NAs but
## using btwn.song.var.IMP
pcaAc <- prcomp(songsp2[, c(2:4, 7)], center = T, scale. = T)
# loadings pcaAc$rotation importance of components summary(pcaAc)
acous <- data.frame(Species = songsp2$Species, PC1_variation = pcaAc$x[, 1], PC2_complexity = pcaAc$x[,
2])
##### Morphology data
gary <- read.table("data/morphology_humm.txt", h = T, sep = "\t", stringsAsFactors = FALSE)
gary[grep(pattern = "Amazlia_franciae", x = gary$Species), "Species"] <- "Amazilia_franciae"
gary[grep(pattern = "Chalybura_buffoni", x = gary$Species), "Species"] <- "Chalybura_buffonii"
gary[grep(pattern = "Chlorestes_notata", x = gary$Species), "Species"] <- "Chlorostilbon_notatus"
gary[grep(pattern = "Doryfera_ludovicae", x = gary$Species), "Species"] <- "Doryfera_ludoviciae"
gary[grep(pattern = "Eriocnemis_aline", x = gary$Species), "Species"] <- "Eriocnemis_alinae"
gary[grep(pattern = "Glaucis_hirsuta", x = gary$Species), "Species"] <- "Glaucis_hirsutus"
gary[grep(pattern = "Schistes_geoffroyi", x = gary$Species), "Species"] <- "Schistes_geoffroyii"
copia <- gary
# create column with sp and spp combination and clade column
copia$Comb <- paste(copia$Species, copia$Subsp, sep = "_")
copia$Clade <- tax[match(copia$Species, tax$Nombre_cientifico), "Clado"]
# split SexDim, males and females
sexdim <- copia[which(copia$Sex == "SexDim"), ]
males <- copia[which(copia$Sex == "Males"), ]
females <- copia[which(copia$Sex == "Females"), ]
# keep only those species with data in sexdim (species with data for both
# sexes)
males <- males[-which(is.na(match(males$Comb, sexdim$Comb))), ]
females <- females[-which(is.na(match(females$Comb, sexdim$Comb))), ]
both <- rbind(males, females)
# re-order
gary2 <- both[order(both$Comb), 1:24]
# correct WL values for E. macroura that were multiplied by 2
gary2[which(gary2$Species == "Eupetomena_macroura"), "WingLoad"] <- gary2[which(gary2$Species ==
"Eupetomena_macroura"), "WingLoad"]/2
# average among subspecies
avg <- apply(gary2[, c(6, 7, 9:dim(gary2)[2])], MARGIN = 2, FUN = tapply, INDEX = list(gary2$Sex,
gary2$Species), mean, na.rm = T)
avg2 <- data.frame(Species = rep(unique(gary2$Species), each = 2), Sex = rep(c("Females",
"Males"), dim(avg)[1]/2), avg, row.names = NULL)
# PCA using wing and bill variables with body mass for both sexes + TAIL
pcaMT <- prcomp(avg2[, c(4:17)], center = T, scale. = T)
## loadings and importance of components pcaMT$rotation summary(pcaMT)
morphspace2 <- data.frame(Species = avg2$Species, Sex = avg2$Sex, pcaMT$x)
# dimorphism as the euclidean distance between males and females of each
# species using all axes from the PCA unweighted by proportion of variation
# explained
dimo2 <- data.frame(matrix(NA, nrow = length(unique(morphspace2$Species)), ncol = 2))
names(dimo2) <- c("Species", "Distance")
for (i in 1:length(unique(morphspace2$Species))) {
dimo2$Species[i] <- unique(morphspace2$Species)[i]
mat <- morphspace2[which(morphspace2$Species == unique(morphspace2$Species)[i]),
-c(1:2)]
dimo2[i, "Distance"] <- dist(mat, method = "euclidean")
}
# remove species not in tree
dimoS <- dimo2[-which(is.na(match(dimo2$Species, tree$tip.label))), ]
#### elevation and habitat data
habitat <- read.table("data/Habitat_Data.txt", h = T, sep = "\t", stringsAsFactors = F)
habitat$SPECIES_NAME <- gsub(pattern = " ", replacement = "_", x = habitat$SPECIES_NAME)
habitat[grep(pattern = "Doryfera_ludovicae", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Doryfera_ludoviciae"
habitat[grep(pattern = "Eriocnemis_aline", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Eriocnemis_alinae"
habitat[grep(pattern = "Chlorestes_notata", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Chlorostilbon_notatus"
habitat[grep(pattern = "Schistes_geoffroyi", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Schistes_geoffroyii"
habitat[grep(pattern = "Sephanoides_sephaniodes", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Sephanoides_sephanoides"
habitat[grep(pattern = "Campylopterus_villaviscensio", x = habitat$SPECIES_NAME),
"SPECIES_NAME"] <- "Campylopterus_villaviscencio"
names(habitat)[2] <- "Species"
# Elevation from Rangel et al. 2015
Relev <- read.table("data/Hummingbirds - 304spp ElevRange.txt", h = T, sep = "\t",
stringsAsFactors = FALSE)
Relev[grep(pattern = "Doryfera_ludovicae", x = Relev$SppName), "SppName"] <- "Doryfera_ludoviciae"
Relev[grep(pattern = "Schistes_geoffroyi", x = Relev$SppName), "SppName"] <- "Schistes_geoffroyii"
Relev[grep(pattern = "Sephanoides_sephaniodes", x = Relev$SppName), "SppName"] <- "Sephanoides_sephanoides"
Relev[grep(pattern = "Aglaiocercus_kingi", x = Relev$SppName), "SppName"] <- "Aglaiocercus_kingii"
Relev[grep(pattern = "Sappho_sparganura", x = Relev$SppName), "SppName"] <- "Sappho_sparganurus"
Relev[grep(pattern = "Eriocnemis_nigriventris", x = Relev$SppName), "SppName"] <- "Eriocnemis_nigrivestis"
Relev[grep(pattern = "Chlorestes_notata", x = Relev$SppName), "SppName"] <- "Chlorostilbon_notatus"
Relev[grep(pattern = "Campylopterus_villaviscensio", x = Relev$SppName), "SppName"] <- "Campylopterus_villaviscencio"
Relev[grep(pattern = "Leucippus_chionogaster", x = Relev$SppName), "SppName"] <- "Amazilia_chionogaster"
Relev[grep(pattern = "Leucippus_viridicauda", x = Relev$SppName), "SppName"] <- "Amazilia_viridicauda"
Relev[grep(pattern = "Amazilia_saucerrottei", x = Relev$SppName), "SppName"] <- "Amazilia_saucerottei"
Relev[grep(pattern = "Stellula_calliope", x = Relev$SppName), "SppName"] <- "Selasphorus_calliope"
names(Relev)[1] <- "Species"
# remove species with no data
Relev <- Relev[-301, ]
Relev <- Relev[-104, ]
# merge all databases into one
df <- merge(dichro, acous)
df2 <- merge(df, dimoS)
df3 <- merge(df2, habitat)
df4 <- merge(df3, Relev)
# prune tree
tree2 <- drop.tip(tree, tree$tip.label[which(is.na(match(tree$tip.label, df4$Species)))])
# re-order data frame to match order in tree
df4 <- df4[match(tree2$tip.label, df4$Species), ]
row.names(df4) <- df4$Species
# select only the variables of interest
humm <- df4[, c(1, 6, 8:10, 12, 13, 15, 17, 19)]
# dichromatism standardization
humm2 <- cbind(Species = as.factor(humm[, 1]), Dichromatism_chro = scale(humm[, 2]),
humm[, 3:dim(humm)[2]])
names(humm2)[1] <- "animal"
# dichromatism and distance standardization
humm2 <- cbind(Species = as.factor(humm[, 1]), Dichromatism_chro = scale(humm[, 2]),
humm[, 3:4], Distance_sc = scale(humm[, 5]), humm[, 6:dim(humm)[2]])
names(humm2)[1] <- "animal"
humm2$Habitat <- as.factor(humm2$Habitat)
humm2$MaxAlt <- as.numeric(humm2$MaxAlt)
humm2$MinAlt <- as.numeric(humm2$MinAlt)
humm2$RangeAlt <- as.numeric(humm2$RangeAlt)
# write.table(humm2,file='output/humm2.txt',sep='\t')
# write.tree(tree2,file='output/tree2.tre')
iter <- 5e+05
thin <- 50
burnin <- 0.2
colors <- viridis(10)
total_dim <- read.table("./data/data_total_dim.txt", h = T, sep = "\t", stringsAsFactors = FALSE)
tree_total <- read.tree("./data/tree_total.tre")
rownames(total_dim) <- total_dim$Species
total_dim <- total_dim[, -1]
colnames(total_dim) <- c("dichromatism", "song_complexity", "total_dimorphism")
## Run multiple MCMC chains.
handle_1 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
thin = thin, burn = 0.3)
handle_2 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
thin = thin, burn = 0.3)
handle_3 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
thin = thin, burn = 0.3)
total_dim_handles <- list(handle_1 = handle_1, handle_2 = handle_2, handle_3 = handle_3)
saveRDS(total_dim_handles, "./data/3_chains_1_regime_ratematrix_total_dimorphism.RDS")
Results:
total_dim_handles <- readRDS("./data/3_chains_1_regime_ratematrix_total_dimorphism.RDS")
## Read both chains:
posterior_1 <- readMCMC(total_dim_handles$handle_1, burn = 0, thin = 1)
posterior_2 <- readMCMC(total_dim_handles$handle_2, burn = 0, thin = 1)
posterior_3 <- readMCMC(total_dim_handles$handle_3, burn = 0, thin = 1)
## Merge all posteriors in the list.
merged.posterior <- mergePosterior(posterior_1, posterior_2, posterior_3)
ex_cor <- extractCorrelation(merged.posterior)
# estimates:
post_df <- describe_posterior(as.data.frame(ex_cor), ci_method = "HDI")
df1 <- knitr::kable(post_df[, 1:5], row.names = TRUE, escape = FALSE, format = "html",
digits = 3)
df1 <- row_spec(df1, which(post_df[, "CI_low"] * post_df[, "CI_high"] > 0), background = adjustcolor(colors[9],
alpha.f = 0.3))
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, font_size = 12)
Parameter | Median | CI | CI_low | CI_high | |
---|---|---|---|---|---|
1 | song_complexity_x_dichromatism | 0.232 | 0.95 | 0.025 | 0.443 |
2 | total_dimorphism_x_dichromatism | -0.072 | 0.95 | -0.286 | 0.144 |
3 | total_dimorphism_x_song_complexity | -0.062 | 0.95 | -0.278 | 0.153 |
## Plot results:
plotRatematrix(merged.posterior, colors = colors[4])
## Plotting a single regime.
ex_cor_df <- stack(as.data.frame(ex_cor[[1]]))
ggplot(data = ex_cor_df, aes(y = ind, x = values, fill = stat(quantile))) + geom_density_ridges_gradient(quantile_lines = TRUE,
quantile_fun = HDInterval::hdi, vline_linetype = 2) + theme_classic(base_size = 20) +
xlim(-0.5, 0.5) + labs(x = "Correlation", y = "Variable dyad") + geom_vline(xintercept = 0,
col = "red") + scale_fill_manual(values = c("transparent", colors[7], "transparent"),
guide = "none") + ggtitle("Posterior distributions (95% HDI)")
Diagnostics:
## Check for convergence
checkConvergence(posterior_1, posterior_2, posterior_3)
## $gelman
## $gelman$root
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## dichromatism 1.00 1.00
## song_complexity 1.01 1.04
## total_dimorphism 1.00 1.00
##
##
## $gelman$ratematrix
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## 1,1 1.01 1.02
## 1,2 1.00 1.01
## 1,3 1.06 1.06
## 2,1 1.00 1.01
## 2,2 1.00 1.00
## 2,3 1.01 1.01
## 3,1 1.06 1.06
## 3,2 1.01 1.01
## 3,3 1.03 1.03
##
##
##
## $ess
## dichromatism song_complexity total_dimorphism matrix_cel_1,1
## 641.2068 203.3259 1312.5864 7387.1616
## matrix_cel_1,2 matrix_cel_1,3 matrix_cel_2,1 matrix_cel_2,2
## 14207.3184 27141.7973 14207.3184 6512.2019
## matrix_cel_2,3 matrix_cel_3,1 matrix_cel_3,2 matrix_cel_3,3
## 26220.0767 27141.7973 26220.0767 10383.4396
tail_dim <- read.table("./data/data_tail_dim.txt", h = T, sep = "\t", stringsAsFactors = FALSE)
tree_tail <- read.tree("./data/tree_tail.tre")
rownames(tail_dim) <- tail_dim$Species
tail_dim <- tail_dim[, -1]
colnames(tail_dim) <- c("dichromatism", "song_complexity", "tail_dimorphism")
## Run multiple MCMC chains.
handle_1_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
thin = thin, burn = burnin)
handle_2_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
thin = thin, burn = burnin)
handle_3_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
thin = thin, burn = burnin)
tail_dim_handles <- list(handle_1 = handle_1, handle_2 = handle_2, handle_3 = handle_3)
saveRDS(tail_dim_handles, "./data/3_chains_1_regime_ratematrix_tail_dimorphism.RDS")
tail_dim_handles <- readRDS("./data/3_chains_1_regime_ratematrix_tail_dimorphism.RDS")
## Read both chains:
posterior_1 <- readMCMC(tail_dim_handles$handle_1, burn = burnin, thin = thin)
posterior_2 <- readMCMC(tail_dim_handles$handle_2, burn = burnin, thin = thin)
posterior_3 <- readMCMC(tail_dim_handles$handle_3, burn = burnin, thin = thin)
## Merge all posteriors in the list.
merged.posterior <- mergePosterior(posterior_1, posterior_2, posterior_3)
ex_cor <- extractCorrelation(merged.posterior)
# estimates:
post_df <- describe_posterior(as.data.frame(ex_cor), ci_method = "HDI")
df1 <- knitr::kable(post_df[, 1:5], row.names = TRUE, escape = FALSE, format = "html",
digits = 3)
df1 <- row_spec(df1, which(post_df[, "CI_low"] * post_df[, "CI_high"] > 0), background = adjustcolor(colors[9],
alpha.f = 0.3))
kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"),
full_width = FALSE, font_size = 12)
Parameter | Median | CI | CI_low | CI_high | |
---|---|---|---|---|---|
1 | song_complexity_x_dichromatism | 0.231 | 0.95 | 0.026 | 0.443 |
2 | total_dimorphism_x_dichromatism | -0.072 | 0.95 | -0.290 | 0.138 |
3 | total_dimorphism_x_song_complexity | -0.060 | 0.95 | -0.266 | 0.161 |
## Plot results:
plotRatematrix(merged.posterior, colors = colors[4])
## Plotting a single regime.
ex_cor_df <- stack(as.data.frame(ex_cor[[1]]))
ggplot(data = ex_cor_df, aes(y = ind, x = values, fill = stat(quantile))) + geom_density_ridges_gradient(quantile_lines = TRUE,
quantile_fun = HDInterval::hdi, vline_linetype = 2) + theme_classic(base_size = 20) +
xlim(-0.5, 0.5) + labs(x = "Correlation", y = "Variable dyad") + geom_vline(xintercept = 0,
col = "red") + scale_fill_manual(values = c("transparent", colors[7], "transparent"),
guide = "none") + ggtitle("Posterior distributions (95% HDI)")
Diagnostics:
## Check for convergence
checkConvergence(posterior_1, posterior_2, posterior_3)
## $gelman
## $gelman$root
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## dichromatism 1.01 1.02
## song_complexity 1.03 1.10
## total_dimorphism 1.00 1.01
##
##
## $gelman$ratematrix
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## 1,1 1 1
## 1,2 1 1
## 1,3 1 1
## 2,1 1 1
## 2,2 1 1
## 2,3 1 1
## 3,1 1 1
## 3,2 1 1
## 3,3 1 1
##
##
##
## $ess
## dichromatism song_complexity total_dimorphism matrix_cel_1,1
## 512.8690 165.2514 1057.1066 4471.4172
## matrix_cel_1,2 matrix_cel_1,3 matrix_cel_2,1 matrix_cel_2,2
## 8030.2900 18211.8121 8030.2900 4630.7591
## matrix_cel_2,3 matrix_cel_3,1 matrix_cel_3,2 matrix_cel_3,3
## 16837.8287 18211.8121 16837.8287 4719.2413
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bayestestR_0.11.5 HDInterval_0.2.2 ggridges_0.5.3 nlme_3.1-152
## [5] viridis_0.6.1 viridisLite_0.4.0 ratematrix_1.2.3 MCMCglmm_2.32
## [9] ape_5.5 coda_0.19-4 Matrix_1.3-4 kableExtra_1.3.4
## [13] knitr_1.36 ggplot2_3.3.5
##
## loaded via a namespace (and not attached):
## [1] subplex_1.6 insight_0.14.5 webshot_0.5.2
## [4] httr_1.4.2 numDeriv_2016.8-1.1 tensorA_0.36.2
## [7] tools_4.1.0 bslib_0.2.5.1 utf8_1.2.2
## [10] R6_2.5.1 DBI_1.1.1 colorspace_2.0-2
## [13] withr_2.4.2 tidyselect_1.1.1 gridExtra_2.3
## [16] mnormt_2.0.2 phangorn_2.7.1 compiler_4.1.0
## [19] rvest_1.0.0 formatR_1.11 expm_0.999-6
## [22] xml2_1.3.2 labeling_0.4.2 sass_0.4.0
## [25] scales_1.1.1 mvtnorm_1.1-2 readr_1.4.0
## [28] quadprog_1.5-8 systemfonts_1.0.2 stringr_1.4.0
## [31] digest_0.6.28 rmarkdown_2.9 svglite_2.0.0
## [34] pkgconfig_2.0.3 htmltools_0.5.2 parallelly_1.26.1
## [37] plotrix_3.8-1 geiger_2.0.7 highr_0.9
## [40] fastmap_1.1.0 maps_3.3.0 rlang_0.4.11
## [43] rstudioapi_0.13 phylolm_2.6.2 farver_2.1.0
## [46] jquerylib_0.1.4 generics_0.1.0 combinat_0.0-8
## [49] jsonlite_1.7.2 dplyr_1.0.7 magrittr_2.0.1
## [52] dotCall64_1.0-1 Rcpp_1.0.7 munsell_0.5.0
## [55] fansi_0.5.0 lifecycle_1.0.1 scatterplot3d_0.3-41
## [58] stringi_1.7.5 yaml_2.2.1 clusterGeneration_1.3.7
## [61] MASS_7.3-54 plyr_1.8.6 grid_4.1.0
## [64] parallel_4.1.0 listenv_0.8.0 crayon_1.4.1
## [67] lattice_0.20-44 hms_1.1.0 tmvnsim_1.0-2
## [70] pillar_1.6.3 igraph_1.2.6 cubature_2.0.4.2
## [73] corpcor_1.6.9 future.apply_1.7.0 codetools_0.2-18
## [76] fastmatch_1.1-0 glue_1.4.2 evaluate_0.14
## [79] deSolve_1.28 vctrs_0.3.8 spam_2.7-0
## [82] gtable_0.3.0 purrr_0.3.4 datawizard_0.2.1
## [85] future_1.21.0 assertthat_0.2.1 xfun_0.26
## [88] mvMORPH_1.1.4 glassoFast_1.0 phytools_0.7-80
## [91] tibble_3.1.5 pbmcapply_1.5.0 ellipse_0.4.2
## [94] globals_0.14.0 ellipsis_0.3.2