#### Script to organize hummingbid phenotypic data and run MCMCglmm

# taxonomy
tax <- read.table("data/Tax.txt", h = T, sep = "\t", stringsAsFactors = F)
tax$Nombre_cientifico <- gsub(pattern = " ", replacement = "_", x = tax$Nombre_cientifico)

### read all data tables and match names in each

# Mcguire et al. 2014 phylogeny
tree <- read.tree("data/mcguire_correctedID.tre")
# remove a duplicated P. rupurumii
tree <- drop.tip(tree, 30)

#### dichromatism data
dichro <- read.table("data/Dichromatism_Trochilidae.txt", h = T, sep = "\t")
dichro$Species <- tax[match(dichro$Species, tax$ID_taxonomia), "Nombre_cientifico"]

## Song data v2
song2 <- read.csv("data/song_complexity_parameters_hummingbirds_feb_2021.csv")
names(song2)[1] <- "Species"

song2[grep(pattern = "Doryfera_ludovicae", x = song2$Species), "Species"] <- "Doryfera_ludoviciae"
song2[grep(pattern = "Schistes_geoffroyi", x = song2$Species), "Species"] <- "Schistes_geoffroyii"
song2[grep(pattern = "Sephanoides_sephaniodes", x = song2$Species), "Species"] <- "Sephanoides_sephanoides"
song2[grep(pattern = "Campylopterus_villaviscensio", x = song2$Species), "Species"] <- "Campylopterus_villaviscencio"
song2[grep(pattern = "Juliamyia_julie", x = song2$Species), "Species"] <- "Damophila_julie"
song2[grep(pattern = "Chlorestes_notata", x = song2$Species), "Species"] <- "Chlorostilbon_notatus"

# species-average for all metrics
prom2 <- apply(song2[, 2:dim(song2)[2]], MARGIN = 2, FUN = tapply, INDEX = song2$Species,
    mean, na.rm = T)
songsp2 <- data.frame(Species = rownames(prom2), prom2, row.names = NULL)

## acoustic PCA pca with all variables except btwn.song.var because of NAs but
## using btwn.song.var.IMP
pcaAc <- prcomp(songsp2[, c(2:4, 7)], center = T, scale. = T)

# loadings pcaAc$rotation importance of components summary(pcaAc)

acous <- data.frame(Species = songsp2$Species, PC1_variation = pcaAc$x[, 1], PC2_complexity = pcaAc$x[,
    2])


##### Morphology data
gary <- read.table("data/morphology_humm.txt", h = T, sep = "\t", stringsAsFactors = FALSE)

gary[grep(pattern = "Amazlia_franciae", x = gary$Species), "Species"] <- "Amazilia_franciae"
gary[grep(pattern = "Chalybura_buffoni", x = gary$Species), "Species"] <- "Chalybura_buffonii"
gary[grep(pattern = "Chlorestes_notata", x = gary$Species), "Species"] <- "Chlorostilbon_notatus"
gary[grep(pattern = "Doryfera_ludovicae", x = gary$Species), "Species"] <- "Doryfera_ludoviciae"
gary[grep(pattern = "Eriocnemis_aline", x = gary$Species), "Species"] <- "Eriocnemis_alinae"
gary[grep(pattern = "Glaucis_hirsuta", x = gary$Species), "Species"] <- "Glaucis_hirsutus"
gary[grep(pattern = "Schistes_geoffroyi", x = gary$Species), "Species"] <- "Schistes_geoffroyii"

copia <- gary
# create column with sp and spp combination and clade column
copia$Comb <- paste(copia$Species, copia$Subsp, sep = "_")
copia$Clade <- tax[match(copia$Species, tax$Nombre_cientifico), "Clado"]

# split SexDim, males and females
sexdim <- copia[which(copia$Sex == "SexDim"), ]
males <- copia[which(copia$Sex == "Males"), ]
females <- copia[which(copia$Sex == "Females"), ]

# keep only those species with data in sexdim (species with data for both
# sexes)
males <- males[-which(is.na(match(males$Comb, sexdim$Comb))), ]
females <- females[-which(is.na(match(females$Comb, sexdim$Comb))), ]
both <- rbind(males, females)

# re-order
gary2 <- both[order(both$Comb), 1:24]

# correct WL values for E. macroura that were multiplied by 2
gary2[which(gary2$Species == "Eupetomena_macroura"), "WingLoad"] <- gary2[which(gary2$Species ==
    "Eupetomena_macroura"), "WingLoad"]/2

# average among subspecies
avg <- apply(gary2[, c(6, 7, 9:dim(gary2)[2])], MARGIN = 2, FUN = tapply, INDEX = list(gary2$Sex,
    gary2$Species), mean, na.rm = T)
avg2 <- data.frame(Species = rep(unique(gary2$Species), each = 2), Sex = rep(c("Females",
    "Males"), dim(avg)[1]/2), avg, row.names = NULL)

# PCA using wing and bill variables with body mass for both sexes + TAIL
pcaMT <- prcomp(avg2[, c(4:17)], center = T, scale. = T)

## loadings and importance of components pcaMT$rotation summary(pcaMT)

morphspace2 <- data.frame(Species = avg2$Species, Sex = avg2$Sex, pcaMT$x)

# dimorphism as the euclidean distance between males and females of each
# species using all axes from the PCA unweighted by proportion of variation
# explained
dimo2 <- data.frame(matrix(NA, nrow = length(unique(morphspace2$Species)), ncol = 2))
names(dimo2) <- c("Species", "Distance")

for (i in 1:length(unique(morphspace2$Species))) {

    dimo2$Species[i] <- unique(morphspace2$Species)[i]
    mat <- morphspace2[which(morphspace2$Species == unique(morphspace2$Species)[i]),
        -c(1:2)]
    dimo2[i, "Distance"] <- dist(mat, method = "euclidean")

}

# remove species not in tree
dimoS <- dimo2[-which(is.na(match(dimo2$Species, tree$tip.label))), ]


#### elevation and habitat data
habitat <- read.table("data/Habitat_Data.txt", h = T, sep = "\t", stringsAsFactors = F)
habitat$SPECIES_NAME <- gsub(pattern = " ", replacement = "_", x = habitat$SPECIES_NAME)

habitat[grep(pattern = "Doryfera_ludovicae", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Doryfera_ludoviciae"
habitat[grep(pattern = "Eriocnemis_aline", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Eriocnemis_alinae"
habitat[grep(pattern = "Chlorestes_notata", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Chlorostilbon_notatus"
habitat[grep(pattern = "Schistes_geoffroyi", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Schistes_geoffroyii"
habitat[grep(pattern = "Sephanoides_sephaniodes", x = habitat$SPECIES_NAME), "SPECIES_NAME"] <- "Sephanoides_sephanoides"
habitat[grep(pattern = "Campylopterus_villaviscensio", x = habitat$SPECIES_NAME),
    "SPECIES_NAME"] <- "Campylopterus_villaviscencio"

names(habitat)[2] <- "Species"

# Elevation from Rangel et al. 2015
Relev <- read.table("data/Hummingbirds - 304spp ElevRange.txt", h = T, sep = "\t",
    stringsAsFactors = FALSE)
Relev[grep(pattern = "Doryfera_ludovicae", x = Relev$SppName), "SppName"] <- "Doryfera_ludoviciae"
Relev[grep(pattern = "Schistes_geoffroyi", x = Relev$SppName), "SppName"] <- "Schistes_geoffroyii"
Relev[grep(pattern = "Sephanoides_sephaniodes", x = Relev$SppName), "SppName"] <- "Sephanoides_sephanoides"
Relev[grep(pattern = "Aglaiocercus_kingi", x = Relev$SppName), "SppName"] <- "Aglaiocercus_kingii"
Relev[grep(pattern = "Sappho_sparganura", x = Relev$SppName), "SppName"] <- "Sappho_sparganurus"
Relev[grep(pattern = "Eriocnemis_nigriventris", x = Relev$SppName), "SppName"] <- "Eriocnemis_nigrivestis"
Relev[grep(pattern = "Chlorestes_notata", x = Relev$SppName), "SppName"] <- "Chlorostilbon_notatus"
Relev[grep(pattern = "Campylopterus_villaviscensio", x = Relev$SppName), "SppName"] <- "Campylopterus_villaviscencio"
Relev[grep(pattern = "Leucippus_chionogaster", x = Relev$SppName), "SppName"] <- "Amazilia_chionogaster"
Relev[grep(pattern = "Leucippus_viridicauda", x = Relev$SppName), "SppName"] <- "Amazilia_viridicauda"
Relev[grep(pattern = "Amazilia_saucerrottei", x = Relev$SppName), "SppName"] <- "Amazilia_saucerottei"
Relev[grep(pattern = "Stellula_calliope", x = Relev$SppName), "SppName"] <- "Selasphorus_calliope"

names(Relev)[1] <- "Species"

# remove species with no data
Relev <- Relev[-301, ]
Relev <- Relev[-104, ]

# merge all databases into one
df <- merge(dichro, acous)
df2 <- merge(df, dimoS)
df3 <- merge(df2, habitat)
df4 <- merge(df3, Relev)


# prune tree
tree2 <- drop.tip(tree, tree$tip.label[which(is.na(match(tree$tip.label, df4$Species)))])
# re-order data frame to match order in tree
df4 <- df4[match(tree2$tip.label, df4$Species), ]
row.names(df4) <- df4$Species

# select only the variables of interest
humm <- df4[, c(1, 6, 8:10, 12, 13, 15, 17, 19)]

# dichromatism standardization
humm2 <- cbind(Species = as.factor(humm[, 1]), Dichromatism_chro = scale(humm[, 2]),
    humm[, 3:dim(humm)[2]])
names(humm2)[1] <- "animal"

# dichromatism and distance standardization
humm2 <- cbind(Species = as.factor(humm[, 1]), Dichromatism_chro = scale(humm[, 2]),
    humm[, 3:4], Distance_sc = scale(humm[, 5]), humm[, 6:dim(humm)[2]])
names(humm2)[1] <- "animal"

humm2$Habitat <- as.factor(humm2$Habitat)
humm2$MaxAlt <- as.numeric(humm2$MaxAlt)
humm2$MinAlt <- as.numeric(humm2$MinAlt)
humm2$RangeAlt <- as.numeric(humm2$RangeAlt)

# write.table(humm2,file='output/humm2.txt',sep='\t')
# write.tree(tree2,file='output/tree2.tre')

Ratematrix

MCMC parameters

iter <- 5e+05
thin <- 50
burnin <- 0.2
colors <- viridis(10)

Overall dimorphism

total_dim <- read.table("./data/data_total_dim.txt", h = T, sep = "\t", stringsAsFactors = FALSE)

tree_total <- read.tree("./data/tree_total.tre")

rownames(total_dim) <- total_dim$Species

total_dim <- total_dim[, -1]
colnames(total_dim) <- c("dichromatism", "song_complexity", "total_dimorphism")

## Run multiple MCMC chains.
handle_1 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
    thin = thin, burn = 0.3)

handle_2 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
    thin = thin, burn = 0.3)

handle_3 <- ratematrixMCMC(data = total_dim, phy = tree_total, gen = iter, dir = "./data",
    thin = thin, burn = 0.3)

total_dim_handles <- list(handle_1 = handle_1, handle_2 = handle_2, handle_3 = handle_3)

saveRDS(total_dim_handles, "./data/3_chains_1_regime_ratematrix_total_dimorphism.RDS")

Results:

total_dim_handles <- readRDS("./data/3_chains_1_regime_ratematrix_total_dimorphism.RDS")

## Read both chains:
posterior_1 <- readMCMC(total_dim_handles$handle_1, burn = 0, thin = 1)
posterior_2 <- readMCMC(total_dim_handles$handle_2, burn = 0, thin = 1)
posterior_3 <- readMCMC(total_dim_handles$handle_3, burn = 0, thin = 1)

## Merge all posteriors in the list.
merged.posterior <- mergePosterior(posterior_1, posterior_2, posterior_3)

ex_cor <- extractCorrelation(merged.posterior)

# estimates:
post_df <- describe_posterior(as.data.frame(ex_cor), ci_method = "HDI")

df1 <- knitr::kable(post_df[, 1:5], row.names = TRUE, escape = FALSE, format = "html",
    digits = 3)

df1 <- row_spec(df1, which(post_df[, "CI_low"] * post_df[, "CI_high"] > 0), background = adjustcolor(colors[9],
    alpha.f = 0.3))

kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

	Parameter	Median	CI	CI_low	CI_high
1	song_complexity_x_dichromatism	0.232	0.95	0.025	0.443
2	total_dimorphism_x_dichromatism	-0.072	0.95	-0.286	0.144
3	total_dimorphism_x_song_complexity	-0.062	0.95	-0.278	0.153

## Plot results:
plotRatematrix(merged.posterior, colors = colors[4])

## Plotting a single regime.

ex_cor_df <- stack(as.data.frame(ex_cor[[1]]))

ggplot(data = ex_cor_df, aes(y = ind, x = values, fill = stat(quantile))) + geom_density_ridges_gradient(quantile_lines = TRUE,
    quantile_fun = HDInterval::hdi, vline_linetype = 2) + theme_classic(base_size = 20) +
    xlim(-0.5, 0.5) + labs(x = "Correlation", y = "Variable dyad") + geom_vline(xintercept = 0,
    col = "red") + scale_fill_manual(values = c("transparent", colors[7], "transparent"),
    guide = "none") + ggtitle("Posterior distributions (95% HDI)")

Diagnostics:

## Check for convergence
checkConvergence(posterior_1, posterior_2, posterior_3)

## $gelman
## $gelman$root
## Potential scale reduction factors:
## 
##                  Point est. Upper C.I.
## dichromatism           1.00       1.00
## song_complexity        1.01       1.04
## total_dimorphism       1.00       1.00
## 
## 
## $gelman$ratematrix
## Potential scale reduction factors:
## 
##     Point est. Upper C.I.
## 1,1       1.01       1.02
## 1,2       1.00       1.01
## 1,3       1.06       1.06
## 2,1       1.00       1.01
## 2,2       1.00       1.00
## 2,3       1.01       1.01
## 3,1       1.06       1.06
## 3,2       1.01       1.01
## 3,3       1.03       1.03
## 
## 
## 
## $ess
##     dichromatism  song_complexity total_dimorphism   matrix_cel_1,1 
##         641.2068         203.3259        1312.5864        7387.1616 
##   matrix_cel_1,2   matrix_cel_1,3   matrix_cel_2,1   matrix_cel_2,2 
##       14207.3184       27141.7973       14207.3184        6512.2019 
##   matrix_cel_2,3   matrix_cel_3,1   matrix_cel_3,2   matrix_cel_3,3 
##       26220.0767       27141.7973       26220.0767       10383.4396

Tail dimorphism

tail_dim <- read.table("./data/data_tail_dim.txt", h = T, sep = "\t", stringsAsFactors = FALSE)

tree_tail <- read.tree("./data/tree_tail.tre")

rownames(tail_dim) <- tail_dim$Species

tail_dim <- tail_dim[, -1]
colnames(tail_dim) <- c("dichromatism", "song_complexity", "tail_dimorphism")

## Run multiple MCMC chains.
handle_1_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
    thin = thin, burn = burnin)

handle_2_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
    thin = thin, burn = burnin)

handle_3_tail <- ratematrixMCMC(data = tail_dim, phy = tree_tail, gen = iter, dir = "./data",
    thin = thin, burn = burnin)

tail_dim_handles <- list(handle_1 = handle_1, handle_2 = handle_2, handle_3 = handle_3)

saveRDS(tail_dim_handles, "./data/3_chains_1_regime_ratematrix_tail_dimorphism.RDS")

tail_dim_handles <- readRDS("./data/3_chains_1_regime_ratematrix_tail_dimorphism.RDS")

## Read both chains:
posterior_1 <- readMCMC(tail_dim_handles$handle_1, burn = burnin, thin = thin)
posterior_2 <- readMCMC(tail_dim_handles$handle_2, burn = burnin, thin = thin)
posterior_3 <- readMCMC(tail_dim_handles$handle_3, burn = burnin, thin = thin)

## Merge all posteriors in the list.
merged.posterior <- mergePosterior(posterior_1, posterior_2, posterior_3)

ex_cor <- extractCorrelation(merged.posterior)

# estimates:
post_df <- describe_posterior(as.data.frame(ex_cor), ci_method = "HDI")

df1 <- knitr::kable(post_df[, 1:5], row.names = TRUE, escape = FALSE, format = "html",
    digits = 3)

df1 <- row_spec(df1, which(post_df[, "CI_low"] * post_df[, "CI_high"] > 0), background = adjustcolor(colors[9],
    alpha.f = 0.3))

kable_styling(df1, bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    full_width = FALSE, font_size = 12)

	Parameter	Median	CI	CI_low	CI_high
1	song_complexity_x_dichromatism	0.231	0.95	0.026	0.443
2	total_dimorphism_x_dichromatism	-0.072	0.95	-0.290	0.138
3	total_dimorphism_x_song_complexity	-0.060	0.95	-0.266	0.161

## Plot results:
plotRatematrix(merged.posterior, colors = colors[4])

## Plotting a single regime.

ex_cor_df <- stack(as.data.frame(ex_cor[[1]]))

ggplot(data = ex_cor_df, aes(y = ind, x = values, fill = stat(quantile))) + geom_density_ridges_gradient(quantile_lines = TRUE,
    quantile_fun = HDInterval::hdi, vline_linetype = 2) + theme_classic(base_size = 20) +
    xlim(-0.5, 0.5) + labs(x = "Correlation", y = "Variable dyad") + geom_vline(xintercept = 0,
    col = "red") + scale_fill_manual(values = c("transparent", colors[7], "transparent"),
    guide = "none") + ggtitle("Posterior distributions (95% HDI)")

Diagnostics:

## Check for convergence
checkConvergence(posterior_1, posterior_2, posterior_3)

## $gelman
## $gelman$root
## Potential scale reduction factors:
## 
##                  Point est. Upper C.I.
## dichromatism           1.01       1.02
## song_complexity        1.03       1.10
## total_dimorphism       1.00       1.01
## 
## 
## $gelman$ratematrix
## Potential scale reduction factors:
## 
##     Point est. Upper C.I.
## 1,1          1          1
## 1,2          1          1
## 1,3          1          1
## 2,1          1          1
## 2,2          1          1
## 2,3          1          1
## 3,1          1          1
## 3,2          1          1
## 3,3          1          1
## 
## 
## 
## $ess
##     dichromatism  song_complexity total_dimorphism   matrix_cel_1,1 
##         512.8690         165.2514        1057.1066        4471.4172 
##   matrix_cel_1,2   matrix_cel_1,3   matrix_cel_2,1   matrix_cel_2,2 
##        8030.2900       18211.8121        8030.2900        4630.7591 
##   matrix_cel_2,3   matrix_cel_3,1   matrix_cel_3,2   matrix_cel_3,3 
##       16837.8287       18211.8121       16837.8287        4719.2413

Session info

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
## LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3
## 
## locale:
##  [1] LC_CTYPE=pt_BR.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=pt_BR.UTF-8    
##  [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=pt_BR.UTF-8   
##  [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bayestestR_0.11.5 HDInterval_0.2.2  ggridges_0.5.3    nlme_3.1-152     
##  [5] viridis_0.6.1     viridisLite_0.4.0 ratematrix_1.2.3  MCMCglmm_2.32    
##  [9] ape_5.5           coda_0.19-4       Matrix_1.3-4      kableExtra_1.3.4 
## [13] knitr_1.36        ggplot2_3.3.5    
## 
## loaded via a namespace (and not attached):
##  [1] subplex_1.6             insight_0.14.5          webshot_0.5.2          
##  [4] httr_1.4.2              numDeriv_2016.8-1.1     tensorA_0.36.2         
##  [7] tools_4.1.0             bslib_0.2.5.1           utf8_1.2.2             
## [10] R6_2.5.1                DBI_1.1.1               colorspace_2.0-2       
## [13] withr_2.4.2             tidyselect_1.1.1        gridExtra_2.3          
## [16] mnormt_2.0.2            phangorn_2.7.1          compiler_4.1.0         
## [19] rvest_1.0.0             formatR_1.11            expm_0.999-6           
## [22] xml2_1.3.2              labeling_0.4.2          sass_0.4.0             
## [25] scales_1.1.1            mvtnorm_1.1-2           readr_1.4.0            
## [28] quadprog_1.5-8          systemfonts_1.0.2       stringr_1.4.0          
## [31] digest_0.6.28           rmarkdown_2.9           svglite_2.0.0          
## [34] pkgconfig_2.0.3         htmltools_0.5.2         parallelly_1.26.1      
## [37] plotrix_3.8-1           geiger_2.0.7            highr_0.9              
## [40] fastmap_1.1.0           maps_3.3.0              rlang_0.4.11           
## [43] rstudioapi_0.13         phylolm_2.6.2           farver_2.1.0           
## [46] jquerylib_0.1.4         generics_0.1.0          combinat_0.0-8         
## [49] jsonlite_1.7.2          dplyr_1.0.7             magrittr_2.0.1         
## [52] dotCall64_1.0-1         Rcpp_1.0.7              munsell_0.5.0          
## [55] fansi_0.5.0             lifecycle_1.0.1         scatterplot3d_0.3-41   
## [58] stringi_1.7.5           yaml_2.2.1              clusterGeneration_1.3.7
## [61] MASS_7.3-54             plyr_1.8.6              grid_4.1.0             
## [64] parallel_4.1.0          listenv_0.8.0           crayon_1.4.1           
## [67] lattice_0.20-44         hms_1.1.0               tmvnsim_1.0-2          
## [70] pillar_1.6.3            igraph_1.2.6            cubature_2.0.4.2       
## [73] corpcor_1.6.9           future.apply_1.7.0      codetools_0.2-18       
## [76] fastmatch_1.1-0         glue_1.4.2              evaluate_0.14          
## [79] deSolve_1.28            vctrs_0.3.8             spam_2.7-0             
## [82] gtable_0.3.0            purrr_0.3.4             datawizard_0.2.1       
## [85] future_1.21.0           assertthat_0.2.1        xfun_0.26              
## [88] mvMORPH_1.1.4           glassoFast_1.0          phytools_0.7-80        
## [91] tibble_3.1.5            pbmcapply_1.5.0         ellipse_0.4.2          
## [94] globals_0.14.0          ellipsis_0.3.2

Dimorphism and mating trait evolution in hummingbird

Marcelo Araya-Salas, PhD

“2021-11-29”

Ratematrix

MCMC parameters

Overall dimorphism

Tail dimorphism

Session info