---
title: Acoustic analysis
subtitle: Cape parrot vocal dialects
author: <a href="https://marce10.github.io/">Marcelo Araya-Salas</a>
date: "`r Sys.Date()`"
toc: true
toc-depth: 2
toc-location: left
number-sections: true
highlight-style: pygments
format:
html:
df-print: kable
code-fold: true
code-tools: true
css: qmd.css
editor_options:
chunk_output_type: console
---
<!-- this code add line numbers to code blocks -->
<!-- only works when code folding is not used in yaml (code_folding: show) -->
```{=html}
<style>
body
{ counter-reset: source-line 0; }
pre.numberSource code
{ counter-reset: none; }
</style>
```
```{r set root directory, echo = FALSE}
# set working directory as project directory or one directory above,
knitr::opts_knit$set(root.dir = "..")
```
```{r add link to github repo, echo = FALSE, results='asis'}
# print link to github repo if any
if (file.exists("./.git/config")){
config <- readLines("./.git/config")
url <- grep("url", config, value = TRUE)
url <- gsub("\\turl = |.git$", "", url)
cat("\nSource code and data found at [", url, "](", url, ")", sep = "")
}
```
```{r setup style, echo = FALSE, message = FALSE, warning=FALSE}
# options to customize chunk outputs
knitr::opts_chunk$set(
class.source = "numberLines lineAnchors", # for code line numbers
tidy.opts = list(width.cutoff = 65),
tidy = TRUE,
message = FALSE
)
```
<!-- skyblue box -->
<div class="alert alert-info">
# Purpose
- Measure acoustic structure of cape parrot contact calls
- Compare acoustic dissimilarity between individuals from different localities
</div>
<!-- light brown box -->
<div class="alert alert-warning">
# Report overview
- [ Acoustic analysis ](#acoustic-analysis)
- [ Statistical analysis ](#statistical-analysis)
</div>
# Load packages {.unnumbered .unlisted}
```{r load packages}
# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code
# install/ load packages
sketchy::load_packages(
packages = c(
"knitr",
"formatR",
"viridis",
"warbleR",
github = "maRce10/PhenotypeSpace",
"ggplot2",
"randomForest",
"mlbench",
"caret",
"pbapply",
"vegan",
"umap",
"ecodist",
"maRce10/ohun"
)
)
source("~/Dropbox/Projects/geographic_call_variation_yellow-naped_amazon/scripts/MRM2.R")
```
# Acoustic analysis
## Format data
```{r 1, eval = FALSE}
dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")
dat <- dat[grep("cape", dat$Species, ignore.case = T),]
names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Regions..3.", names(dat))] <- "Regions3"
table(dat$Species)
nrow(dat)
# all(dat$New_Name %in% st$sound.files)
# table(dat$Sorted)
# dat <- dat[dat$Sorted != "delete", ]
unique(dat$New_Name)
ohun::feature_acoustic_data(path = "./data/raw/consolidated_files")
warbleR_options(path = "./data/raw/consolidated_files")
st <-selection_table(whole.recs = TRUE)
st <- st[st$sound.files %in% dat$New_Name, ]
nrow(st)
nrow(dat)
st$sorted <- sapply(st$sound.files, function(x) dat$Sorted[dat$New_Name == x][1])
table(st$sorted)
# spectrograms(st, wl = 512, flim = c(0, 10), dest.path = "./data/processed/spectrograms", pal = viridis, collevels = seq(-100, 0, 5))
#
# spectrograms(st[st$sorted == "unsorted", ], wl = 512, flim = c(0, 10), dest.path = "./data/processed/unsorted_spectrograms", pal = viridis, collevels = seq(-100, 0, 5))
#
#
# tailor_sels(st, auto.next = TRUE, flim = c(0, 8), collevels = seq(-100, 0, 5))
```
## Make selection table
```{r 2, eval = FALSE}
sel_tab <- selection_table(path = "./data/raw/consolidated_files/", whole.recs = TRUE)
tailored <- read.csv("./data/raw/consolidated_files/seltailor_output.csv")
tailored <- tailored[tailored$tailored == "y", ]
non_tailored <- sel_tab[!sel_tab$sound.files %in% tailored$sound.files, ]
non_tailored$tailored <- "n"
tailored$top.freq[is.na(tailored$bottom.freq)] <- non_tailored$bottom.freq <- min(tailored$bottom.freq, na.rm = TRUE)
tailored$top.freq[is.na(tailored$top.freq)] <- non_tailored$top.freq <- max(tailored$top.freq, na.rm = TRUE)
comm_names <- intersect(names(tailored), names(non_tailored))
all_sels <- rbind(tailored[, comm_names], non_tailored[, comm_names])
write.csv(all_sels, "./data/processed/selection_table_entire_sound_files.csv", row.names = FALSE)
```
## Run cross-correlation
```{r 3, eval = FALSE}
sel_tab <- read.csv("./data/processed/selection_table_entire_sound_files.csv")
xcorr <- cross_correlation(X = sel_tab, path = "./data/raw/consolidated_files/", method = 2, parallel = 1)
rownames(xcorr) <- gsub("-1$", "", rownames(xcorr))
colnames(xcorr) <- gsub("-1$", "", colnames(xcorr))
saveRDS(xcorr, "./data/processed/cross_correlation_matrix.RDS")
# less than 0.1% were undefined
sum(is.infinite(xcorr))/length(xcorr)
# convert infinite to mean xcorr
xcorr[is.infinite(xcorr)] <- mean(xcorr[!is.infinite(xcorr) & xcorr < 1])
xcorr_mds <- cmdscale(d = as.dist(xcorr), k = 2)
rownames(xcorr_mds) <- gsub("-1$", "", rownames(xcorr_mds))
saveRDS(xcorr_mds, "./data/processed/cross_correlation_MDS.RDS")
```
# Data description
```{r 4}
# dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_CURATED.csv")
# add data from second location
dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")
dat <- dat[grep("cape", dat$Species, ignore.case = T),]
names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Region..3.", names(dat))] <- "Regions3"
names(dat) <- gsub("..cluster.", ".for.cluster", names(dat))
dat <- dat[grepl("cape", dat$Species, ignore.case = T) & !is.na(dat$Location.for.cluster) & !is.na(dat$Longitude.for.cluster) & !is.na(dat$Latitude.for.cluster),]
# dat$region3 <- sapply(dat$New_Name, function(x) dat_loc2$region3s[dat_loc2$New_Name == x])
# dat$region <- dat$site
dat$Species <- "Cape parrot"
```
- `r nrow(dat)` calls
- `r length(unique(dat$Location.for.cluster))` localities
::: {.panel-tabset}
## 3 Regions
- `r length(unique(dat$Regions3))` regions
- Number of localities per region:
```{r 5}
agg <- aggregate(Location.for.cluster ~ Regions3, dat, function(x) length(unique(x)))
names(agg) <- c("region", "localities")
agg$calls <- aggregate(Location.for.cluster ~ Regions3, dat, length)[,2]
agg$localities <- aggregate(Location.for.cluster ~ Regions3, dat, function(x) paste(unique(x), collapse = "-"))[, 2]
agg
```
- Region of each locality:
```{r}
agg <- aggregate (Regions3 ~ Location.for.cluster, dat, function (x) paste (unique (x), collapse = "-" ))
names (agg) <- c ("localities" , "region" )
agg[order (agg$ region),2 : 1 ]
```
## 4 Regions
- `r length(unique(dat$Regions4))` regions
- Number of localities per region:
```{r}
agg <- aggregate (Location.for.cluster ~ Regions4, dat, function (x) length (unique (x)))
names (agg) <- c ("region" , "localities" )
agg$ calls <- aggregate (Location.for.cluster ~ Regions4, dat, length)[,2 ]
agg$ localities <- aggregate (Location.for.cluster ~ Regions4, dat, function (x) paste (unique (x), collapse = "-" ))[, 2 ]
agg
```
- Region of each locality:
```{r}
agg <- aggregate (Regions4 ~ Location.for.cluster, dat, function (x) paste (unique (x), collapse = "-" ))
names (agg) <- c ("localities" , "region" )
agg[order (agg$ region),2 : 1 ]
```
:::
# Statistical analysis
Two approaches:
- [ Multiple Regression on distance Matrices ](https://search.r-project.org/CRAN/refmans/ecodist/html/MRM.html)
- Partial Mantel test
## Multiple Regression on distance Matrices
- Model:
\begin{align*}
Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance
\end{align*}
- Response values scaled to make effect sizes comparable across models
- Locality was coded as pairwise binary matrix in which 0 means that calls in a dyad belong to the same locality and 1 means calls belong to different locality
::: {.panel-tabset}
### 3 Regions
```{r, eval = TRUE}
xcorr <- readRDS("./data/processed/cross_correlation_matrix.RDS")
xcorr_mds <- readRDS("./data/processed/cross_correlation_MDS.RDS")
sub_xcorr <- xcorr[rownames(xcorr) %in% dat$New_Name, colnames(xcorr) %in% dat$New_Name]
sub_xcorr_mds <- xcorr_mds[rownames(xcorr_mds) %in% dat$New_Name, ]
sub_dat <- dat[dat$New_Name %in% rownames(sub_xcorr_mds), ]
sub_dat <- sub_dat[match(rownames(sub_xcorr), sub_dat$New_Name), ]
location <- sapply(rownames(sub_xcorr), function(x) sub_dat$Location.for.cluster[sub_dat$New_Name == x])
region3 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions3[sub_dat$New_Name == x])
region4 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions4[sub_dat$New_Name == x])
```
```{r, eval = FALSE}
loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))
samp_bi_tri <- as.dist(binary_triangular_matrix(group = region3))
geo_dist <- dist(sub_dat[ , c("Latitude.for.cluster","Longitude.for.cluster")])
rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)
colnames(rect_var) <- c("fourier_xc", "geo_distance", "location", "region3")
rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]
xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region3, nperm = 10000, mat = rect_var)
saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS")
```
```{r}
readRDS ("./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS" )
```
### 4 Regions
```{r, eval = FALSE}
loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))
samp_bi_tri <- as.dist(binary_triangular_matrix(group = region4))
geo_dist <- dist(sub_dat[ , c("Latitude.for.cluster","Longitude.for.cluster")])
rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)
colnames(rect_var) <- c("fourier_xc", "geo_distance", "location", "region4")
rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]
xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region4, nperm = 10000, mat = rect_var)
saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS")
```
```{r}
readRDS ("./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS" )
```
:::
# Simulation
## Simulate data with different patterns of geographic variation in call structure
- 26 localities (10 individuals each) along a longitudinal range
- Simulated patterns following [ Podos & Warren (2007) ](https://d1wqtxts1xzle7.cloudfront.net/53993414/The_Evolution_of_Geographic_Variation_in20170727-7403-7je2az-libre.pdf?1501183774=&response-content-disposition=inline%3B+filename%3DThe_Evolution_of_Geographic_Variation_in.pdf&Expires=1689718560&Signature=c9HxG2TqaqonkjDKlotzx1jrLJl25sX1V2Jj0DpDFTtDNsSi7ykNv~Ii4XFFe-yEh4fUF6HX0MYzT9ImbUJykmwIJXumf8VELyHp5tj5QYjR9FmpCftHXf1JdwBh-vCm2zaQ2VOh~iaHcMqTqjnDQR1W3FMQDU1dVeduLpkSma~8eLQ15e-SAHPYVwvSu-R1a2Q9KDibQyq84MbEFbLgbz9T3eBxZpsUAnxrGUrlHWo9CtbBEMtORbh9id4KG-RaAVyA38v4FtS8qofXX9zpcZNQILVGkhIwXeCbYkRVBbC9w5OieTVvIUQBXxEwZltgQ~ZWzbkzI1~wmHRnkbhYzQ__&Key-Pair-Id=APKAJLOHF5GGSLRBV4ZA)

```{r, fig.cap="Geographic location of individuals/localities in the simulated data set. Letters highlight locality ids", out.width="%100", eval = TRUE}
# seed to make it reproducible
set.seed(123)
# number of groups
n_locality <- length(unique(sub_dat$Location.for.cluster))
# number of individuals per group
n_indiv <- table(sub_dat$Location.for.cluster)
# create locality labels
localities <- sample(LETTERS[1:n_locality])
# simulated individuals per group
locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x], each = n_indiv[x])))
# simulate geographic coordinates along a gradient
lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) + rnorm(n = length(locality_label), sd = 0.1)
lat <- rnorm(n = length(locality_label), sd = 0.1)
# put all together in a data frame
data <- data.frame(locality = locality_label, lat, lon, color = viridis(n_locality)[as.numeric(as.factor(locality_label))])
# add sampling site (broader geographic mozaic)
data <- data[order(data$lon), ]
data$site_num <- 2
for(i in 2:nrow(data)){
data$site_num[i] <- if (data$locality[i-1] == data$locality[i]) data$site_num[i - 1] else data$site_num[i - 1] + 1
}
# make last location the same as th previous one
data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] - 1
data$region3 <- sample(letters)[floor(data$site_num / 2)]
# plot localities along longitude
plot(data[, c("lon", "lat")], col = data$color, cex = 1.8, ylim = range(data$lat) + c(-.2, .2), pch = as.numeric(as.factor(data$region3)))
lon_loc <- tapply(data$lon, data$locality, mean)
text(x = lon_loc, y = rep(0.3, n_locality), labels = names(lon_loc), cex = 2.5)
abline(v= 1:30 - 0.5)
```
## Simulate acoustic variation:
- Clinal: acoustic structure vector increases with longitude
- Dialect-type: acoustic structure vector similar within a locality but varies randomly between neighnoring localities
- 2 levels: locality and region
- Region create by grouping 2 adjacent localities with the same label
- Random: acoustic structure vector varies randomly between individuals regardless of locality or longitude
```{r, fig.cap="Types of vocal geographic variation that were simulated", out.height="100%", eval = TRUE}
# seed to make it reproducible
set.seed(123)
# simulate acoustic structure vector
# clinal variation
data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)
# dialect type variation
data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data), sd = 0.2)
data$dialect_site <- as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data), sd = 0.2)
# random variation
data$random <- rnorm(n = nrow(data), sd = 0.2)
# sort so lines look good in plot
data <- data[order(data$lon), ]
lng_data <- stack(data[, c("clinal", "dialect", "dialect_site", "random")])
lng_data$locality <- data$locality
lng_data$lat <- data$lat
lng_data$lat <- data$lon
lng_data$lon_seq <- 1:nrow(data)
ggplot(lng_data, aes(x = lon_seq, y = values)) +
geom_line(color = "#3E4A89FF", linewidth = 1.6) +
facet_wrap(~ ind, scales = "free_y", nrow = 3) +
theme_classic(base_size = 25) +
labs(x = "Longitude", y = "Acoustic feature")
```
## Stats on simulated data
- Model:
\begin{align*}
Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance
\end{align*}
Predict acoustic structure based on geographic distance and locality membership using multiple Regression on distance matrices
Data is z-transformed so all predictors have similar variance and effect sizes are comparable.
Variance for each predictor:
```{r, eval = TRUE}
# create distance matrices
loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
clinal_dist <- dist(data$clinal)
dialect_loc_dist <- dist(data$dialect)
dialect_site_dist <- dist(data$dialect_site)
random_dist <- dist(data$random)
geo_dist <- dist(data[, c("lat", "lon")])
# regression models
# set data format
rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)), dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)), geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary, site_member_binary = site_member_binary)
apply(rect_var,2, var)
```
### Dialect-type variation model at locality level
```{r, eval = TRUE}
nperm <- 100
# model predicting dialect variation
(mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))
```
### Dialect-type variation model at region level
```{r, eval = TRUE}
nperm <- 100
# model predicting dialect variation
(mod_dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist + loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))
```
### Clinal variation model
```{r, eval = TRUE}
# model predicting clinal variation
(mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))
```
### Random variation model
```{r, eval = TRUE}
# model predicting random variation
(mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))
```
### Combined results
```{r, eval = TRUE}
mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect, mod_dialect_site = mod_dialect_site, mod_random = mod_random)
names(mods) <- c("Clinal", "Dialect locality", "Dialect site", "Random")
estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
Y <- data.frame(mods[[x]]$coef[-1, ])
Y$rel_coef <- Y[, 1]/max(Y[, 1])
Y$mod <- names(mods)[x]
Y$predictor <- rownames(Y)
names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
return(Y)
}))
estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")
ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
high = viridis(10)[7], guide = "none") + geom_text(aes(label = round(coef,
3), color = signif)) + scale_color_manual(values = c("black",
"gray")) + labs(x = "", y = "", color = "P value") + theme_classic() +
theme(axis.text.x = element_text(color = "black", size = 11, angle = 30,
vjust = 0.8, hjust = 0.8))
```
## Replicate simulation 100 times
```{r, eval = FALSE}
nperm <- 100
reps <- 100
rep_models <- pbreplicate(reps, cl = 1, expr = {
# number of groups
n_locality <- length(unique(sub_dat$Location.for.cluster))
# number of individuals per group
n_indiv <- table(sub_dat$Location.for.cluster)
# create locality labels
localities <- sample(LETTERS[1:n_locality])
# simulated individuals per group
locality_label <-
unlist(sapply(1:length(localities), function(x)
rep(localities[x], each = n_indiv[x])))
# simulate geographic coordinates along a gradient
lon <-
unlist(sapply(1:n_locality, function(x)
rep(x, each = n_indiv[x]))) + rnorm(n = length(locality_label), sd = 0.1)
lat <- rnorm(n = length(locality_label), sd = 0.1)
# locality_label <- localities[n_indivs]
# put all together in a data frame
data <- data.frame(locality = locality_label, lat, lon)
# add region (broader geographic mozaic)
data <- data[order(data$lon), ]
data$site_num <- 2
for(i in 2:nrow(data)){
data$site_num[i] <- if (data$locality[i-1] == data$locality[i]) data$site_num[i - 1] else data$site_num[i - 1] + 1
}
# make last location the same as the previous one
data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] - 1
data$region3 <- sample(letters)[floor(data$site_num / 2)]
# simulate acoustic structure vector
# clinal variation
data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)
# dialect type variation locality level
data$dialect <-
as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data), sd = 0.2)
# dialect type variation
data$dialect_site <-
as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data), sd = 0.2)
# random variation
data$random <- rnorm(n = nrow(data), sd = 0.2)
loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
clinal_dist <- dist(data$clinal)
dialect_loc_dist <- dist(data$dialect)
dialect_site_dist <- dist(data$dialect_site)
random_dist <- dist(data$random)
geo_dist <- dist(data[, c("lat", "lon")])
# regression models
# set data format
rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)), dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)), geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary, site_member_binary = site_member_binary)
# model predicting dialect variation
mod_dialect <-
MRM2(
formula = dialect_loc_dist ~ geo_dist + loc_member_binary + site_member_binary,
nperm = nperm,
mat = rect_var[, c("dialect_loc_dist", "geo_dist", "loc_member_binary", "site_member_binary")]
)
# model predicting clinal variation
mod_clinal <-
MRM2(
formula = clinal_dist ~ geo_dist + loc_member_binary + site_member_binary,
nperm = nperm,
mat = rect_var[, c("clinal_dist", "geo_dist", "loc_member_binary", "site_member_binary")]
)
# # model predicting dialect site variation
dialect_site <-
MRM2(
formula = dialect_site_dist ~ geo_dist + loc_member_binary + site_member_binary,
nperm = nperm,
mat = rect_var[, c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")]
)
# model predicting random variation
mod_random <-
MRM2(
formula = random_dist ~ geo_dist + loc_member_binary + site_member_binary,
nperm = nperm,
mat = rect_var[, c("random_dist", "geo_dist", "loc_member_binary", "site_member_binary")]
)
mods <-
list(
mod_clinal = mod_clinal,
mod_dialect = mod_dialect,
dialect_site = dialect_site,
mod_random = mod_random
)
names(mods) <-
c("Clinal", "Dialect locality", "Dialect site", "Random")
estimates <-
do.call(rbind, lapply(seq_along(mods), function(x) {
Y <- data.frame(mods[[x]]$coef[-1, ])
Y$rel_coef <- Y[, 1] / max(Y[, 1])
Y$mod <- names(mods)[x]
Y$predictor <- rownames(Y)
names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
return(Y)
}))
estimates
}, simplify = FALSE)
coeffs <- do.call(cbind, lapply(rep_models, function(x)
x[, 1]))
ps <- do.call(cbind, lapply(rep_models, function(x)
x[, 2]))
estimates <- rep_models[[1]]
estimates$coef <- rowMeans(coeffs)
estimates$sd <- apply(coeffs, 1, sd)
estimates$p <- 1 - (apply(ps, 1, function(x) sum(x < 0.05)) / reps)
estimates$rel_coef <- estimates[, 1] / max(estimates[, 1])
estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
0)
estimates$signif <-
ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")
estimates$coef_sd <- paste0(round(estimates$coef, 3), "\n(", round(estimates$sd, 4), ")")
saveRDS(estimates,
"./data/processed/estimates_for_replicated_simulation_dialects.RDS")
```
```{r, fig.cap="Mean estimates (and SD) of replicated simulation regression results. P values "}
estimates <- readRDS("./data/processed/estimates_for_replicated_simulation_dialects.RDS")
ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
high = viridis(10)[7], guide = "none") + geom_text(aes(label = coef_sd, color = signif)) + scale_color_manual(values = c("black",
"gray")) + labs(x = "", y = "", color = "P value") + theme_classic() +
theme(axis.text.x = element_text(color = "black", size = 11, angle = 30,
vjust = 0.8, hjust = 0.8))
```
## Mantel correlogram at different distances
```{r, eval = FALSE}
geo_vect <- geo_dist[lower.tri(geo_dist)]
geo_vect <- geo_vect[!is.na(geo_vect)]
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)
dists <- 1:10
mantelcorrlg <- function(i) {
classes <- seq(0, max(geo_vect), i)
# length(classes)
# Run a mantel correlation on the data
correl_SPCC <- vegan::mantel.correlog(D.eco = xc_dist, D.geo = geo_dist,
break.pts = classes, cutoff = FALSE, r.type = "pearson", nperm = 1,
mult = "holm", progressive = FALSE)
mantel.res <- correl_SPCC$mantel.res[, 1:3]
mantel.res <- cbind(mantel.res, break.size = i)
return(mantel.res)
}
mantel_list <- warbleR:::pblapply_wrblr_int(dists, cl = 1, function(x) try(mantelcorrlg(x), silent = TRUE))
mantel_list <- mantel_list[sapply(mantel_list, class) != "try-error"]
mantel_list <- lapply(mantel_list, as.data.frame)
# # Save the correlation as an .RDS file so you don't have to
# run it multiple times in the future
saveRDS(mantel_list, paste0("./data/processed/correl_SPCC_several_distances.RDS"))
```
```{r, warning=FALSE}
mantel_list <- readRDS(paste0("./data/processed/correl_SPCC_several_distances.RDS"))
mantels_df <- as.data.frame(do.call(rbind, mantel_list))
combined_dists <- sort(unique(mantels_df$class.index))
# interpolate
interpol_mantel_list <- lapply(mantel_list, function(x) {
appx <- approx(x = x$class.index[x$n.dist > 0], y = x$Mantel.cor[x$n.dist >
0], xout = combined_dists, method = "linear")
return(appx$y)
})
interpol_mantel_mat <- do.call(cbind, interpol_mantel_list)
interpol_mantel_df <- data.frame(combined_dists, mean.cor = apply(interpol_mantel_mat,
1, mean, na.rm = TRUE), sd.cor = apply(interpol_mantel_mat, 1,
sd, na.rm = TRUE))
ggplot(data = interpol_mantel_df, mapping = aes(x = combined_dists,
y = mean.cor)) + geom_ribbon(data = interpol_mantel_df, aes(ymin = mean.cor -
sd.cor, ymax = mean.cor + sd.cor), fill = "gray", alpha = 0.3) +
geom_point(col = viridis(10, alpha = 0.5)[7], size = 2.5) + geom_line(col = viridis(10,
alpha = 0.5)[7], size = 2) + xlim(c(0, 4)) + ylim(c(-0.025,
0.2)) + geom_point(size = 3, color = "transparent") + theme_classic(base_size = 20) +
labs(x = "Pairwise geographic distance (Degrees)", y = "Correlation coefficient")
```
## Dialect discrimination with Random Forest
- Data split in training and testing subsets (75% and 25% respectively)
- Accuracy measured on test subset
### Discriminating localities
```{r, eval = FALSE, fig.width=10}
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)
xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)
xcorr_mds <-
data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$location <-
sapply(xcorr_mds$sound.files, function(x)
sub_dat$Location.for.cluster[sub_dat$New_Name == x])
xcorr_mds$location <- as.factor(make.names(xcorr_mds$location,
unique = FALSE, allow_ = TRUE))
# Create data subsets
partition <- createDataPartition(
y = xcorr_mds$location,
times = 1,
p = 0.75,
list = FALSE
)
trainset <- xcorr_mds[partition,-1]
testset <- xcorr_mds[-partition,-1]
trcontrol <-
trainControl(
method = "repeatedcv",
number = 20,
savePredictions = TRUE,
sampling = "down",
classProbs = TRUE,
returnResamp = "all"
)
pred_model <- train(
location ~ .,
data = trainset,
method = "rf",
trControl = trcontrol,
metric = "Accuracy",
preProcess = "BoxCox",
proximity = TRUE
)
# save confusion matrix
conf_mat <-
confusionMatrix(predict(pred_model, testset), testset$location)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <-
sapply(conf_df$Reference, function(x)
sum(testset$location ==
x))
conf_df$proportion <- conf_df$Freq / conf_df$total
# fit model on complete data set
best_rf_model <- pred_model$finalModel
all_rf_model <- randomForest(
location ~ .,
data = xcorr_mds[,-1], # Your entire dataset
proximity = TRUE, # Include proximity matrix
ntree = best_rf_model$ntree, # Number of trees
mtry = best_rf_model$mtry, # Number of variables tried for splitting at each node
nodesize = best_rf_model$nodesize, # Minimum size of terminal nodes
maxnodes = best_rf_model$maxnodes # Maximum number of terminal nodes
)
saveRDS(
list(
pred_model_bb = pred_model,
conf_mat_bb = conf_mat,
confusion_df_bb = conf_df,
testset_bb = testset,
all_rf_model = all_rf_model
),
"./data/processed/random_forest_model_results_locations.RDS"
)
```
```{r}
rf_model_results <- readRDS ("./data/processed/random_forest_model_results_locations.RDS" )
# print confusion matrix results
rf_model_results$ conf_mat_bb$ overall
confusion_df <- rf_model_results$ confusion_df_bb
ggplot (confusion_df, aes (x = Reference, y = Prediction, fill = proportion)) +
geom_tile () + theme_bw () + coord_equal () + scale_fill_distiller (palette = "Greens" ,
direction = 1 ) + geom_text (aes (label = round (proportion, 2 )),
color = "black" , size = 3 ) + theme_classic () + theme (axis.text.x = element_text (color = "black" ,
size = 11 , angle = 30 , vjust = 0.8 , hjust = 0.8 ))
```
### Projecting random forest proximity with UMAP
```{r, warning=FALSE, eval = FALSE}
umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15, n_components = 2)
# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[,1], UMAP2 = umap_result$layout[,2], location = xcorr_mds$location, sound.files = xcorr_mds$sound.files)
umap_df$pred.locality <- predict(object = rf_model_results$all_rf_model, xcorr_mds[,grep("X", names(xcorr_mds))])
write.csv(umap_df, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv", row.names = FALSE)
```
```{r, warning=FALSE}
umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv")
# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location, fill = location)) +
geom_point(size = 4) +
ylim(c(-7, 7)) +
scale_color_viridis_d(alpha = 0.3, begin = 0.1, end = 0.8) +
scale_fill_viridis_d(alpha = 0.2, begin = 0.1, end = 0.8) +
theme_classic(base_size = 20) +
labs(x ="UMAP1", y = "UMAP2", color = "Locality", fill = "Locality")
gg_umap_loc
```
## Discriminating regions
::: {.panel-tabset}
### 3 regions
```{r, eval = TRUE, fig.width=10}
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)
xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)
xcorr_mds <-
data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region3 <-
sapply(xcorr_mds$sound.files, function(x)
sub_dat$Regions3[sub_dat$New_Name == x])
xcorr_mds$region3 <-
as.factor(make.names(
xcorr_mds$region3,
unique = FALSE,
allow_ = TRUE
))
```
```{r, eval = FALSE, fig.width=10}
# Create data subsets
partition <- createDataPartition(
y = xcorr_mds$region3,
times = 1,
p = 0.75,
list = FALSE
)
trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]
trcontrol <-
trainControl(
method = "repeatedcv",
number = 20,
savePredictions = TRUE,
classProbs = TRUE,
returnResamp = "all",
sampling = "down"
)
pred_model <-
train(
region3 ~ .,
data = trainset,
method = "rf",
trControl = trcontrol,
metric = "Accuracy",
preProcess = "BoxCox",
proximity = TRUE
)
# save confusion matrix
conf_mat <-
confusionMatrix(predict(pred_model, testset), testset$region3)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <-
sapply(conf_df$Reference, function(x)
sum(testset$region3 ==
x))
conf_df$proportion <- conf_df$Freq / conf_df$total
# fit model on complete data set
best_rf_model <- pred_model$finalModel
all_rf_model <- randomForest(
region3 ~ .,
data = xcorr_mds[,-1], # Your entire dataset
proximity = TRUE, # Include proximity matrix
ntree = best_rf_model$ntree, # Number of trees
mtry = best_rf_model$mtry, # Number of variables tried for splitting at each node
nodesize = best_rf_model$nodesize, # Minimum size of terminal nodes
maxnodes = best_rf_model$maxnodes # Maximum number of terminal nodes
)
saveRDS(
list(
pred_model_bb = pred_model,
conf_mat_bb = conf_mat,
confusion_df_bb = conf_df,
testset_bb = testset,
all_rf_model = all_rf_model
),
"./data/processed/random_forest_model_regions_results_3_regions.RDS"
)
```
```{r}
rf_model_results_3 <- readRDS ("./data/processed/random_forest_model_regions_results_3_regions.RDS" )
# print confusion matrix results
rf_model_results_3$ conf_mat_bb$ overall
confusion_df <- rf_model_results_3$ confusion_df_bb
ggplot (confusion_df, aes (x = Reference, y = Prediction, fill = proportion)) +
geom_tile () +
coord_equal () +
scale_fill_distiller (palette = "Greens" , direction = 1 ) +
geom_text (aes (label = round (proportion, 2 )), color = "black" , size = 3 ) +
theme_classic (base_size = 20 ) +
theme (axis.text.x = element_text (color = "black" , angle = 30 , vjust = 0.8 , hjust = 0.8 )) +
scale_x_discrete (labels = c (central= "Central" , northern = "Northern" , southern = "Southern" )) +
scale_y_discrete (labels = c (central= "Central" , northern = "Northern" , southern = "Southern" )) +
labs (x = "Region" , y = "Predicted region" , fill = "Accuracy" )
```
#### Projecting random forest proximity with UMAP
```{r, warning=FALSE, eval = FALSE}
umap_result <- umap(rf_model_results_3$all_rf_model$proximity, n_neighbors = 15, n_components = 2)
# Create a data frame with the UMAP results
umap_df_3 <- data.frame(UMAP1 = umap_result$layout[,1], UMAP2 = umap_result$layout[,2], region3 = xcorr_mds$region3, sound.files = xcorr_mds$sound.files)
umap_df_3$pred.site <- predict(object = rf_model_results_3$all_rf_model, xcorr_mds[,grep("X", names(xcorr_mds))])
write.csv(umap_df_3, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv", row.names = FALSE)
```
```{r, warning=FALSE}
umap_df_3 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv")
umap_df_3$region3 <- factor(umap_df_3$region3, levels = c("central", "northern", "southern"))
# Create a scatterplot
gg_umap_site <- ggplot(umap_df_3, aes(x = UMAP1, y = UMAP2, color = region3, fill = region3, shape = region3)) +
geom_point(size = 4) +
ylim(c(-7, 7)) +
scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8, labels = c(central= "Central", northern = "Northern", southern = "Southern")) +
scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c(central= "Central", northern = "Northern", southern = "Southern")) +
scale_shape_manual(values = 21:24, labels = c(central= "Central", northern = "Northern", southern = "Southern")) +
guides(shape = guide_legend(override.aes = list(size = 5))) +
theme_classic(base_size = 20) +
labs(x ="UMAP1", y = "UMAP2", color = "Region", shape = "Region", fill = "Region")
gg_umap_site
```
#### Combined plots
```{r, warning=FALSE}
# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1, rel_widths = c(1.1, 1))
#
# cowplot::ggsave2("./output/UMAP_scatterplots.png", dpi = 300, width = 22, height = 7)
cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)
cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17, height = 17)
```
### 4 regions
```{r, eval = TRUE, fig.width=10}
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)
xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)
xcorr_mds <-
data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region4 <-
sapply(xcorr_mds$sound.files, function(x)
sub_dat$Regions4[sub_dat$New_Name == x])
xcorr_mds$region4 <-
as.factor(make.names(
xcorr_mds$region4,
unique = FALSE,
allow_ = TRUE
))
```
```{r, eval = FALSE, fig.width=10}
# Create data subsets
partition <- createDataPartition(
y = xcorr_mds$region4,
times = 1,
p = 0.75,
list = FALSE
)
trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]
trcontrol <-
trainControl(
method = "repeatedcv",
number = 20,
savePredictions = TRUE,
classProbs = TRUE,
returnResamp = "all",
sampling = "down"
)
pred_model <-
train(
region4 ~ .,
data = trainset,
method = "rf",
trControl = trcontrol,
metric = "Accuracy",
preProcess = "BoxCox",
proximity = TRUE
)
# save confusion matrix
conf_mat <-
confusionMatrix(predict(pred_model, testset), testset$region4)
conf_df <- as.data.frame(conf_mat$table)
conf_df$total <-
sapply(conf_df$Reference, function(x)
sum(testset$region4 ==
x))
conf_df$proportion <- conf_df$Freq / conf_df$total
# fit model on complete data set
best_rf_model <- pred_model$finalModel
all_rf_model <- randomForest(
region4 ~ .,
data = xcorr_mds[,-1], # Your entire dataset
proximity = TRUE, # Include proximity matrix
ntree = best_rf_model$ntree, # Number of trees
mtry = best_rf_model$mtry, # Number of variables tried for splitting at each node
nodesize = best_rf_model$nodesize, # Minimum size of terminal nodes
maxnodes = best_rf_model$maxnodes # Maximum number of terminal nodes
)
saveRDS(
list(
pred_model_bb = pred_model,
conf_mat_bb = conf_mat,
confusion_df_bb = conf_df,
testset_bb = testset,
all_rf_model = all_rf_model
),
"./data/processed/random_forest_model_regions_results_4_regions.RDS"
)
```
```{r}
rf_model_results_4 <- readRDS ("./data/processed/random_forest_model_regions_results_4_regions.RDS" )
# print confusion matrix results
rf_model_results_4$ conf_mat_bb$ overall
confusion_df <- rf_model_results_4$ confusion_df_bb
ggplot (confusion_df, aes (x = Reference, y = Prediction, fill = proportion)) +
geom_tile () +
coord_equal () +
scale_fill_distiller (palette = "Greens" , direction = 1 ) +
geom_text (aes (label = round (proportion, 2 )), color = "black" , size = 3 ) +
theme_classic (base_size = 20 ) +
theme (axis.text.x = element_text (color = "black" , angle = 30 , vjust = 0.8 , hjust = 0.8 )) +
scale_x_discrete (labels = c (central.coast = "Central coast" , central.inland = "Central inland" , northern = "Northern" , southern = "Southern" )) +
scale_y_discrete (labels = c (central.coast = "Central coast" , central.inland = "Central inland" , northern = "Northern" , southern = "Southern" )) +
labs (x = "Region" , y = "Predicted region" , fill = "Accuracy" )
```
#### Projecting random forest proximity with UMAP
```{r, warning=FALSE, eval = FALSE}
umap_result <- umap(rf_model_results_4$all_rf_model$proximity, n_neighbors = 15, n_components = 2)
# Create a data frame with the UMAP results
umap_df_4 <- data.frame(UMAP1 = umap_result$layout[,1], UMAP2 = umap_result$layout[, 2], region4 = xcorr_mds$region4, sound.files = xcorr_mds$sound.files)
umap_df_4$pred.site <- predict(object = rf_model_results_4$all_rf_model, xcorr_mds[,grep("X", names(xcorr_mds))])
write.csv(umap_df_4, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv", row.names = FALSE)
```
```{r, warning=FALSE}
umap_df_4 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv")
umap_df_4$region4 <- factor(umap_df_4$region4, levels = c("central.coast", "central.inland", "northern", "southern"))
# Create a scatterplot
gg_umap_site <- ggplot(umap_df_4, aes(x = UMAP1, y = UMAP2, color = region4, fill = region4, shape = region4)) +
geom_point(size = 4) +
ylim(c(-7, 7)) +
scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8, labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
scale_shape_manual(values = 21:24, labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
guides(shape = guide_legend(override.aes = list(size = 5))) +
theme_classic(base_size = 20) +
labs(x ="UMAP1", y = "UMAP2", color = "Region", shape = "Region", fill = "Region")
gg_umap_site
```
#### Combined plots
```{r, warning=FALSE}
# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1, rel_widths = c(1.1, 1))
#
# cowplot::ggsave2("./output/UMAP_scatterplots.png", dpi = 300, width = 22, height = 7)
cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)
cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17, height = 17)
```
:::
# Spectrograms of close-to-centroid calls for each region
```{r, eval = TRUE, out.width="100%", fig.height=8, fig.width=6, fig.dpi=100}
# keep only those correctly predicted
# umap_df_4 <- umap_df_4[umap_df_4$region4 == umap_df_4$pred.site, ]
# find prepresentative
y <- 96 #37, 50, 57, 47
# for ( i in 1:100){
# y <- y + 1
repren_calls_list <- lapply(unique(umap_df_4$region4), function(x){
X <- umap_df_4[umap_df_4$region4 == x, ]
centroid <- sapply(X[,c("UMAP1", "UMAP2")], mean)
X$centroid_dist <- sapply(seq_len(nrow(X)), function(y) dist(rbind(centroid, X[y, c("UMAP1", "UMAP2")])))
set.seed(y)
out <- X[order(X$centroid_dist, decreasing = FALSE), ][sample(1:30, 2), ]
})
repren_calls <- do.call(rbind, repren_calls_list)
repren_calls$selec <- 1
repren_calls$start <- rep(0, nrow(repren_calls))
repren_calls$end <- rep(0.5, nrow(repren_calls))
repren_calls <- repren_calls[c(1, 3, 5, 7, 2, 4, 6, 8), ]
lf <- rep(c(0.06, 0.5), each = 4)
rg <- rep(c(0.5, 0.95), each = 4)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- rep(horiz[-1], 2)
tp <- rep(horiz[-length(horiz)], 2)
m <- cbind(lf, rg, btm, tp)
m <- m[(1:8),]
lf <- c(rep(0.95, each = 4), 0.06, 0.5, 0, 0)
rg <- c(rep(1, each = 4), 0.5, 0.95, 0.05, 1)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- c(horiz[-1], 0.95, 0.95, 0.075, 0)
tp <- c(horiz[-length(horiz)], 1, 1, 0.95, 0.075)
m2 <- cbind(lf, rg, btm, tp)
m <- rbind(m, m2)
# png(filename = paste0("./output/spectrograms_by_site", y, ".png"), res = 300, width = 4000, height = 3000)
ss <- split.screen(figs = m)
# # testing layout screens
# for(i in 1:nrow(m))
# {screen(i)
# par( mar = rep(0, 4))
# plot(0.5, xlim = c(0,1), ylim = c(0,1), type = "n", axes = FALSE, xlab = "", ylab = "", xaxt = "n", yaxt = "n")
# box()
# text(x = 0.5, y = 0.5, labels = i)
# }
# close.screen(all.screens = T)
ovlp <- 95
colls <- seq(-110, 0, 5)
wl <- 200
lab_bg <- viridis(10, alpha = 0.25)[8]
lab_bg <- "#3A3B39"
# frequency label
screen(15)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(
1,
frame.plot = FALSE,
type = "n",
yaxt = "n",
xaxt = "n"
)
text(
x = 0.9,
y = 1,
"Frequency (kHz)",
srt = 90,
cex = 1.6
)
# time label
screen(16)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(
1,
frame.plot = FALSE,
type = "n",
yaxt = "n",
xaxt = "n"
)
text(x = 1,
y = 0.75,
"Time (s)",
cex = 1.6)
for (i in seq_len(nrow(repren_calls))) {
# print(i)
screen(i)
par(mar = c(0, 0, 0, 0))
warbleR:::spectro_wrblr_int2(
wave = read_wave(X = repren_calls,
index = i,
path = "./data/raw/consolidated_files/"
),
collevels = colls,
ovlp = ovlp,
wl = wl,
flim = c(1, 9),
palette = viridis,
axisX = FALSE,
axisY = FALSE,
grid = FALSE
)
# add frequency axis
if (i %in% 1:4)
axis(2, at = c(seq(2, 10, 2)))
# add time axis
if (i %in% c(4, 8))
axis(1)
}
vlabs <- c("Central\nMountains", "Southern", "Northern", "Central\nCoast")
par(mar = c(0, 0, 0, 0),
bg = lab_bg,
new = TRUE)
# add vertical labels
for (i in 9:12) {
screen(i)
# par(mar = c(0, 0, 0, 0))
par(mar = c(0, 0, 0, 0),
bg = lab_bg,
new = TRUE)
plot(
1,
frame.plot = FALSE,
type = "n",
yaxt = "n",
xaxt = "n"
)
text(
x = 1,
y = 1,
vlabs[i - 8],
srt = 270,
cex = 1.6,
col = "white",
font = 1
)
box()
}
# dev.off()
# }
```
<div class="alert alert-success">
# Takeaways {.unnumbered .unlisted}
- Acoustic similarity is higher between calls from the same region, but lower between calls from the same location
- Acoustic similarity decreases with distances (although it may be an artifact of the analysis)
- Acoustic similarity decreases sharply after 2 units of distance (?)
- Calls are poorly discriminated based on their locality (0.45)
- Calls can be discriminated based on their region with good precision (0.77)
</div>
<!-- '---' adds a gray vertical line -->
# To do list
- rename localities to "region-#" in UMAP scatterplot
---
<!-- add packages used, system details and versions -->
# Session information {.unnumbered .unlisted}
```{r session info, echo=F}
sessionInfo()
```