Acoustic analysis

Cape parrot vocal dialects

Author

Marcelo Araya-Salas

Published

June 13, 2024

Source code and data found at https://github.com/maRce10/cape_parrot_dialects

1 Purpose

Measure acoustic structure of cape parrot contact calls
Compare acoustic dissimilarity between individuals from different localities

2 Report overview

Acoustic analysis
Statistical analysis

Load packages

Code

# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "viridis",
    "warbleR", github = "maRce10/PhenotypeSpace", "ggplot2", "randomForest",
    "mlbench", "caret", "pbapply", "vegan", "umap", "ecodist", "maRce10/ohun"))

Warning: replacing previous import 'proxy::as.dist' by 'stats::as.dist' when
loading 'PhenotypeSpace'

Warning: replacing previous import 'proxy::dist' by 'stats::dist' when loading
'PhenotypeSpace'

Code

source("~/Dropbox/Projects/geographic_call_variation_yellow-naped_amazon/scripts/MRM2.R")

3 Acoustic analysis

3.1 Format data

Code

dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")

dat <- dat[grep("cape", dat$Species, ignore.case = T), ]

names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Regions..3.", names(dat))] <- "Regions3"

table(dat$Species)

nrow(dat)

# all(dat$New_Name %in% st$sound.files) table(dat$Sorted)

# dat <- dat[dat$Sorted != 'delete', ]

unique(dat$New_Name)
ohun::feature_acoustic_data(path = "./data/raw/consolidated_files")

warbleR_options(path = "./data/raw/consolidated_files")
st <- selection_table(whole.recs = TRUE)

st <- st[st$sound.files %in% dat$New_Name, ]
nrow(st)
nrow(dat)

st$sorted <- sapply(st$sound.files, function(x) dat$Sorted[dat$New_Name ==
    x][1])

table(st$sorted)

# spectrograms(st, wl = 512, flim = c(0, 10), dest.path =
# './data/processed/spectrograms', pal = viridis, collevels =
# seq(-100, 0, 5)) spectrograms(st[st$sorted == 'unsorted', ],
# wl = 512, flim = c(0, 10), dest.path =
# './data/processed/unsorted_spectrograms', pal = viridis,
# collevels = seq(-100, 0, 5)) tailor_sels(st, auto.next = TRUE,
# flim = c(0, 8), collevels = seq(-100, 0, 5))

3.2 Make selection table

Code

sel_tab <- selection_table(path = "./data/raw/consolidated_files/",
    whole.recs = TRUE)

tailored <- read.csv("./data/raw/consolidated_files/seltailor_output.csv")

tailored <- tailored[tailored$tailored == "y", ]


non_tailored <- sel_tab[!sel_tab$sound.files %in% tailored$sound.files,
    ]
non_tailored$tailored <- "n"


tailored$top.freq[is.na(tailored$bottom.freq)] <- non_tailored$bottom.freq <- min(tailored$bottom.freq,
    na.rm = TRUE)
tailored$top.freq[is.na(tailored$top.freq)] <- non_tailored$top.freq <- max(tailored$top.freq,
    na.rm = TRUE)

comm_names <- intersect(names(tailored), names(non_tailored))

all_sels <- rbind(tailored[, comm_names], non_tailored[, comm_names])

write.csv(all_sels, "./data/processed/selection_table_entire_sound_files.csv",
    row.names = FALSE)

3.3 Run cross-correlation

Code

sel_tab <- read.csv("./data/processed/selection_table_entire_sound_files.csv")

xcorr <- cross_correlation(X = sel_tab, path = "./data/raw/consolidated_files/",
    method = 2, parallel = 1)

rownames(xcorr) <- gsub("-1$", "", rownames(xcorr))

colnames(xcorr) <- gsub("-1$", "", colnames(xcorr))

saveRDS(xcorr, "./data/processed/cross_correlation_matrix.RDS")

# less than 0.1% were undefined
sum(is.infinite(xcorr))/length(xcorr)

# convert infinite to mean xcorr
xcorr[is.infinite(xcorr)] <- mean(xcorr[!is.infinite(xcorr) & xcorr <
    1])

xcorr_mds <- cmdscale(d = as.dist(xcorr), k = 2)

rownames(xcorr_mds) <- gsub("-1$", "", rownames(xcorr_mds))

saveRDS(xcorr_mds, "./data/processed/cross_correlation_MDS.RDS")

4 Data description

Code

# dat <-
# read.csv('./data/raw/consolidated_sound_files_CPV_contact_calls_CURATED.csv')

# add data from second location
dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")

dat <- dat[grep("cape", dat$Species, ignore.case = T), ]

names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Region..3.", names(dat))] <- "Regions3"

names(dat) <- gsub("..cluster.", ".for.cluster", names(dat))

dat <- dat[grepl("cape", dat$Species, ignore.case = T) & !is.na(dat$Location.for.cluster) &
    !is.na(dat$Longitude.for.cluster) & !is.na(dat$Latitude.for.cluster),
    ]

# dat$region3 <- sapply(dat$New_Name, function(x)
# dat_loc2$region3s[dat_loc2$New_Name == x]) dat$region <-
# dat$site

dat$Species <- "Cape parrot"

6287 calls
13 localities

3 regions
Number of localities per region:

Code

agg <- aggregate(Location.for.cluster ~ Regions3, dat, function(x) length(unique(x)))

names(agg) <- c("region", "localities")

agg$calls <- aggregate(Location.for.cluster ~ Regions3, dat, length)[,
    2]

agg$localities <- aggregate(Location.for.cluster ~ Regions3, dat,
    function(x) paste(unique(x), collapse = "-"))[, 2]

agg

region	localities	calls
central	Port St John’s-Polela Sawmill-iGxalingenwa Nature Reserve-Marutswa Forest-Salt Spring Farm-Hoha Forest	1769
northern	Amorentia	717
southern	Hogsback-Schwarzwald Forest-Alice Pecan Orchard-Stutterheim-King William’s Town-Baddaford Farm	3801

Region of each locality:

Code

agg <- aggregate(Regions3 ~ Location.for.cluster, dat, function(x) paste(unique(x),
    collapse = "-"))

names(agg) <- c("localities", "region")

agg[order(agg$region), 2:1]

	region	localities
5	central	Hoha Forest
6	central	iGxalingenwa Nature Reserve
8	central	Marutswa Forest
9	central	Polela Sawmill
10	central	Port St John’s
11	central	Salt Spring Farm
2	northern	Amorentia
1	southern	Alice Pecan Orchard
3	southern	Baddaford Farm
4	southern	Hogsback
7	southern	King William’s Town
12	southern	Schwarzwald Forest
13	southern	Stutterheim

4 regions
Number of localities per region:

Code

agg <- aggregate(Location.for.cluster ~ Regions4, dat, function(x) length(unique(x)))

names(agg) <- c("region", "localities")

agg$calls <- aggregate(Location.for.cluster ~ Regions4, dat, length)[,
    2]

agg$localities <- aggregate(Location.for.cluster ~ Regions4, dat,
    function(x) paste(unique(x), collapse = "-"))[, 2]

agg

region	localities	calls
central coast	Port St John’s	687
central inland	Polela Sawmill-iGxalingenwa Nature Reserve-Marutswa Forest-Salt Spring Farm-Hoha Forest	1082
northern	Amorentia	717
southern	Hogsback-Schwarzwald Forest-Alice Pecan Orchard-Stutterheim-King William’s Town-Baddaford Farm	3801

Region of each locality:

Code

agg <- aggregate(Regions4 ~ Location.for.cluster, dat, function(x) paste(unique(x),
    collapse = "-"))

names(agg) <- c("localities", "region")

agg[order(agg$region), 2:1]

	region	localities
10	central coast	Port St John’s
5	central inland	Hoha Forest
6	central inland	iGxalingenwa Nature Reserve
8	central inland	Marutswa Forest
9	central inland	Polela Sawmill
11	central inland	Salt Spring Farm
2	northern	Amorentia
1	southern	Alice Pecan Orchard
3	southern	Baddaford Farm
4	southern	Hogsback
7	southern	King William’s Town
12	southern	Schwarzwald Forest
13	southern	Stutterheim

5 Statistical analysis

Two approaches:

Multiple Regression on distance Matrices
Partial Mantel test

5.1 Multiple Regression on distance Matrices

Model:
\[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]
Response values scaled to make effect sizes comparable across models
Locality was coded as pairwise binary matrix in which 0 means that calls in a dyad belong to the same locality and 1 means calls belong to different locality

Code

xcorr <- readRDS("./data/processed/cross_correlation_matrix.RDS")

xcorr_mds <- readRDS("./data/processed/cross_correlation_MDS.RDS")

sub_xcorr <- xcorr[rownames(xcorr) %in% dat$New_Name, colnames(xcorr) %in%
    dat$New_Name]
sub_xcorr_mds <- xcorr_mds[rownames(xcorr_mds) %in% dat$New_Name,
    ]

sub_dat <- dat[dat$New_Name %in% rownames(sub_xcorr_mds), ]
sub_dat <- sub_dat[match(rownames(sub_xcorr), sub_dat$New_Name), ]

location <- sapply(rownames(sub_xcorr), function(x) sub_dat$Location.for.cluster[sub_dat$New_Name ==
    x])
region3 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions3[sub_dat$New_Name ==
    x])
region4 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions4[sub_dat$New_Name ==
    x])

Code

loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))

samp_bi_tri <- as.dist(binary_triangular_matrix(group = region3))

geo_dist <- dist(sub_dat[, c("Latitude.for.cluster", "Longitude.for.cluster")])

rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)

colnames(rect_var) <- c("fourier_xc", "geo_distance", "location",
    "region3")

rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]

xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region3,
    nperm = 10000, mat = rect_var)

saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS")

Code

readRDS("./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS")

$coef
              fourier_xc  pval
Int          0.508817365 1e+00
geo_distance 0.006329931 1e-04
location     0.015584051 1e-04
region3      0.021221681 1e-04

$r.squared
        R2       pval 
0.06189599 0.00010000 

$F.test
          F      F.pval 
433482.8893      0.0001 

$n
[1] 6279

Code

loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))

samp_bi_tri <- as.dist(binary_triangular_matrix(group = region4))

geo_dist <- dist(sub_dat[, c("Latitude.for.cluster", "Longitude.for.cluster")])

rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)

colnames(rect_var) <- c("fourier_xc", "geo_distance", "location",
    "region4")

rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]

xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region4,
    nperm = 10000, mat = rect_var)

saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS")

Code

readRDS("./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS")

$coef
              fourier_xc  pval
Int          0.508817365 1e+00
geo_distance 0.005126500 1e-04
location     0.007441299 3e-04
region4      0.037044395 1e-04

$r.squared
        R2       pval 
0.06624306 0.00010000 

$F.test
          F      F.pval 
466086.9749      0.0001 

$n
[1] 6279

6 Simulation

6.1 Simulate data with different patterns of geographic variation in call structure

26 localities (10 individuals each) along a longitudinal range
Simulated patterns following Podos & Warren (2007)

Code

# seed to make it reproducible
set.seed(123)

# number of groups
n_locality <- length(unique(sub_dat$Location.for.cluster))

# number of individuals per group
n_indiv <- table(sub_dat$Location.for.cluster)

# create locality labels
localities <- sample(LETTERS[1:n_locality])

# simulated individuals per group
locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
    each = n_indiv[x])))

# simulate geographic coordinates along a gradient
lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
    rnorm(n = length(locality_label), sd = 0.1)
lat <- rnorm(n = length(locality_label), sd = 0.1)


# put all together in a data frame
data <- data.frame(locality = locality_label, lat, lon, color = viridis(n_locality)[as.numeric(as.factor(locality_label))])

# add sampling site (broader geographic mozaic)
data <- data[order(data$lon), ]

data$site_num <- 2

for (i in 2:nrow(data)) {

    data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
        data$site_num[i - 1] else data$site_num[i - 1] + 1

}

# make last location the same as th previous one
data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
    1

data$region3 <- sample(letters)[floor(data$site_num/2)]

# plot localities along longitude
plot(data[, c("lon", "lat")], col = data$color, cex = 1.8, ylim = range(data$lat) +
    c(-0.2, 0.2), pch = as.numeric(as.factor(data$region3)))

lon_loc <- tapply(data$lon, data$locality, mean)
text(x = lon_loc, y = rep(0.3, n_locality), labels = names(lon_loc),
    cex = 2.5)

abline(v = 1:30 - 0.5)

Geographic location of individuals/localities in the simulated data set. Letters highlight locality ids

6.2 Simulate acoustic variation:

Clinal: acoustic structure vector increases with longitude
Dialect-type: acoustic structure vector similar within a locality but varies randomly between neighnoring localities
- 2 levels: locality and region
- Region create by grouping 2 adjacent localities with the same label
Random: acoustic structure vector varies randomly between individuals regardless of locality or longitude

Code

# seed to make it reproducible
set.seed(123)

# simulate acoustic structure vector clinal variation
data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

# dialect type variation
data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
    sd = 0.2)

data$dialect_site <- as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data),
    sd = 0.2)

# random variation
data$random <- rnorm(n = nrow(data), sd = 0.2)

# sort so lines look good in plot
data <- data[order(data$lon), ]

lng_data <- stack(data[, c("clinal", "dialect", "dialect_site", "random")])

lng_data$locality <- data$locality
lng_data$lat <- data$lat
lng_data$lat <- data$lon
lng_data$lon_seq <- 1:nrow(data)

ggplot(lng_data, aes(x = lon_seq, y = values)) + geom_line(color = "#3E4A89FF",
    linewidth = 1.6) + facet_wrap(~ind, scales = "free_y", nrow = 3) +
    theme_classic(base_size = 25) + labs(x = "Longitude", y = "Acoustic feature")

Types of vocal geographic variation that were simulated

6.3 Stats on simulated data

Model:
\[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]

Predict acoustic structure based on geographic distance and locality membership using multiple Regression on distance matrices

Data is z-transformed so all predictors have similar variance and effect sizes are comparable.

Variance for each predictor:

Code

# create distance matrices
loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
clinal_dist <- dist(data$clinal)
dialect_loc_dist <- dist(data$dialect)
dialect_site_dist <- dist(data$dialect_site)
random_dist <- dist(data$random)
geo_dist <- dist(data[, c("lat", "lon")])

# regression models set data format
rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
    dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
    geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
    site_member_binary = site_member_binary)

apply(rect_var, 2, var)

       clinal_dist   dialect_loc_dist  dialect_site_dist        random_dist 
         1.1978350          0.8737510          1.0052258          1.0051923 
          geo_dist  loc_member_binary site_member_binary 
         1.1976810          0.1721933          0.1930169

6.3.1 Dialect-type variation model at locality level

Code

nperm <- 100

# model predicting dialect variation
(mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

$coef
                   dialect_loc_dist pval
Int                      -0.9761392 0.01
geo_dist                  0.0374462 0.01
loc_member_binary         1.8678865 0.01
site_member_binary       -0.6915435 0.01

$r.squared
       R2      pval 
0.3358995 0.0100000 

$F.test
         F     F.pval 
3331516.48       0.01 

$n
[1] 6287

6.3.2 Dialect-type variation model at region level

Code

nperm <- 100

# model predicting dialect variation
(mod_dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
    loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
    c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))

$coef
                   dialect_site_dist pval
Int                       -1.3674760 0.01
geo_dist                  -0.3500067 0.01
loc_member_binary         -0.2026910 0.01
site_member_binary         2.0974315 0.01

$r.squared
       R2      pval 
0.4373079 0.0100000 

$F.test
         F     F.pval 
5118974.83       0.01 

$n
[1] 6287

6.3.3 Clinal variation model

Code

# model predicting clinal variation
(mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

$coef
                    clinal_dist pval
Int                 0.016170807 0.01
geo_dist            0.999862672 0.01
loc_member_binary  -0.017742887 0.01
site_member_binary -0.002608847 0.07

$r.squared
       R2      pval 
0.9904731 0.0100000 

$F.test
          F      F.pval 
6.84793e+08 1.00000e-02 

$n
[1] 6287

6.3.4 Random variation model

Code

# model predicting random variation
(mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

$coef
                    random_dist pval
Int                -0.006341367 0.47
geo_dist            0.001651007 0.75
loc_member_binary   0.025101261 0.11
site_member_binary -0.016276746 0.23

$r.squared
         R2        pval 
3.42318e-05 2.50000e-01 

$F.test
       F   F.pval 
225.4816   0.2500 

$n
[1] 6287

6.3.5 Combined results

Code

mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect, mod_dialect_site = mod_dialect_site,
    mod_random = mod_random)

names(mods) <- c("Clinal", "Dialect locality", "Dialect site", "Random")

estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
    Y <- data.frame(mods[[x]]$coef[-1, ])
    Y$rel_coef <- Y[, 1]/max(Y[, 1])
    Y$mod <- names(mods)[x]
    Y$predictor <- rownames(Y)
    names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
    return(Y)
}))

estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = round(coef,
    3), color = signif)) + scale_color_manual(values = c("black",
    "gray")) + labs(x = "", y = "", color = "P value") + theme_classic() +
    theme(axis.text.x = element_text(color = "black", size = 11, angle = 30,
        vjust = 0.8, hjust = 0.8))

6.4 Replicate simulation 100 times

Code

nperm <- 100
reps <- 100
rep_models <- pbreplicate(reps, cl = 1, expr = {
    # number of groups
    n_locality <- length(unique(sub_dat$Location.for.cluster))

    # number of individuals per group
    n_indiv <- table(sub_dat$Location.for.cluster)

    # create locality labels
    localities <- sample(LETTERS[1:n_locality])

    # simulated individuals per group
    locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
        each = n_indiv[x])))

    # simulate geographic coordinates along a gradient
    lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
        rnorm(n = length(locality_label), sd = 0.1)
    lat <- rnorm(n = length(locality_label), sd = 0.1)

    # locality_label <- localities[n_indivs]

    # put all together in a data frame
    data <- data.frame(locality = locality_label, lat, lon)

    # add region (broader geographic mozaic)
    data <- data[order(data$lon), ]

    data$site_num <- 2

    for (i in 2:nrow(data)) {

        data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
            data$site_num[i - 1] else data$site_num[i - 1] + 1

    }

    # make last location the same as the previous one
    data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
        1

    data$region3 <- sample(letters)[floor(data$site_num/2)]

    # simulate acoustic structure vector clinal variation
    data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

    # dialect type variation locality level
    data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
        sd = 0.2)

    # dialect type variation
    data$dialect_site <- as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data),
        sd = 0.2)

    # random variation
    data$random <- rnorm(n = nrow(data), sd = 0.2)

    loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
    site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
    clinal_dist <- dist(data$clinal)
    dialect_loc_dist <- dist(data$dialect)
    dialect_site_dist <- dist(data$dialect_site)
    random_dist <- dist(data$random)
    geo_dist <- dist(data[, c("lat", "lon")])

    # regression models set data format
    rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
        dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
        geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
        site_member_binary = site_member_binary)

    # model predicting dialect variation
    mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting clinal variation
    mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # # model predicting dialect site variation
    dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
        loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
        c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting random variation
    mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect,
        dialect_site = dialect_site, mod_random = mod_random)

    names(mods) <- c("Clinal", "Dialect locality", "Dialect site",
        "Random")

    estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
        Y <- data.frame(mods[[x]]$coef[-1, ])
        Y$rel_coef <- Y[, 1]/max(Y[, 1])
        Y$mod <- names(mods)[x]
        Y$predictor <- rownames(Y)
        names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
        return(Y)
    }))

    estimates
}, simplify = FALSE)

coeffs <- do.call(cbind, lapply(rep_models, function(x) x[, 1]))
ps <- do.call(cbind, lapply(rep_models, function(x) x[, 2]))

estimates <- rep_models[[1]]
estimates$coef <- rowMeans(coeffs)
estimates$sd <- apply(coeffs, 1, sd)
estimates$p <- 1 - (apply(ps, 1, function(x) sum(x < 0.05))/reps)
estimates$rel_coef <- estimates[, 1]/max(estimates[, 1])
estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

estimates$coef_sd <- paste0(round(estimates$coef, 3), "\n(", round(estimates$sd,
    4), ")")

saveRDS(estimates, "./data/processed/estimates_for_replicated_simulation_dialects.RDS")

Code

estimates <- readRDS("./data/processed/estimates_for_replicated_simulation_dialects.RDS")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = coef_sd,
    color = signif)) + scale_color_manual(values = c("black", "gray")) +
    labs(x = "", y = "", color = "P value") + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

Mean estimates (and SD) of replicated simulation regression results. P values

6.5 Mantel correlogram at different distances

Code

geo_vect <- geo_dist[lower.tri(geo_dist)]
geo_vect <- geo_vect[!is.na(geo_vect)]

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)


dists <- 1:10

mantelcorrlg <- function(i) {

    classes <- seq(0, max(geo_vect), i)
    # length(classes)

    # Run a mantel correlation on the data
    correl_SPCC <- vegan::mantel.correlog(D.eco = xc_dist, D.geo = geo_dist,
        break.pts = classes, cutoff = FALSE, r.type = "pearson", nperm = 1,
        mult = "holm", progressive = FALSE)

    mantel.res <- correl_SPCC$mantel.res[, 1:3]
    mantel.res <- cbind(mantel.res, break.size = i)

    return(mantel.res)

}

mantel_list <- warbleR:::pblapply_wrblr_int(dists, cl = 1, function(x) try(mantelcorrlg(x),
    silent = TRUE))

mantel_list <- mantel_list[sapply(mantel_list, class) != "try-error"]


mantel_list <- lapply(mantel_list, as.data.frame)

# # Save the correlation as an .RDS file so you don't have to
# run it multiple times in the future
saveRDS(mantel_list, paste0("./data/processed/correl_SPCC_several_distances.RDS"))

Code

mantel_list <- readRDS(paste0("./data/processed/correl_SPCC_several_distances.RDS"))

mantels_df <- as.data.frame(do.call(rbind, mantel_list))

combined_dists <- sort(unique(mantels_df$class.index))

# interpolate
interpol_mantel_list <- lapply(mantel_list, function(x) {

    appx <- approx(x = x$class.index[x$n.dist > 0], y = x$Mantel.cor[x$n.dist >
        0], xout = combined_dists, method = "linear")

    return(appx$y)
})


interpol_mantel_mat <- do.call(cbind, interpol_mantel_list)


interpol_mantel_df <- data.frame(combined_dists, mean.cor = apply(interpol_mantel_mat,
    1, mean, na.rm = TRUE), sd.cor = apply(interpol_mantel_mat, 1,
    sd, na.rm = TRUE))


ggplot(data = interpol_mantel_df, mapping = aes(x = combined_dists,
    y = mean.cor)) + geom_ribbon(data = interpol_mantel_df, aes(ymin = mean.cor -
    sd.cor, ymax = mean.cor + sd.cor), fill = "gray", alpha = 0.3) +
    geom_point(col = viridis(10, alpha = 0.5)[7], size = 2.5) + geom_line(col = viridis(10,
    alpha = 0.5)[7], size = 2) + xlim(c(0, 4)) + ylim(c(-0.025, 0.2)) +
    geom_point(size = 3, color = "transparent") + theme_classic(base_size = 20) +
    labs(x = "Pairwise geographic distance (Degrees)", y = "Correlation coefficient")

6.6 Dialect discrimination with Random Forest

Data split in training and testing subsets (75% and 25% respectively)
Accuracy measured on test subset

6.6.1 Discriminating localities

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
    mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <-
    data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$location <-
    sapply(xcorr_mds$sound.files, function(x)
        sub_dat$Location.for.cluster[sub_dat$New_Name == x])

xcorr_mds$location <- as.factor(make.names(xcorr_mds$location,
                                           unique = FALSE, allow_ = TRUE))


# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$location,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition,-1]
testset <- xcorr_mds[-partition,-1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    location ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "BoxCox",
    proximity = TRUE    
)

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$location)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$location ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  location ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)

saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_results_locations.RDS"
)

Code

rf_model_results <- readRDS("./data/processed/random_forest_model_results_locations.RDS")

# print confusion matrix results
rf_model_results$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
     0.4205488      0.3538397      0.3959564      0.4454395      0.4135290 
AccuracyPValue  McnemarPValue 
     0.2947211            NaN

Code

confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

6.6.2 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], location = xcorr_mds$location, sound.files = xcorr_mds$sound.files)

umap_df$pred.locality <- predict(object = rf_model_results$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv",
    row.names = FALSE)

Code

umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv")

# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location,
    fill = location)) + geom_point(size = 4) + ylim(c(-7, 7)) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "UMAP1",
    y = "UMAP2", color = "Locality", fill = "Locality")

gg_umap_loc

6.7 Discriminating regions

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <- data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region3 <- sapply(xcorr_mds$sound.files, function(x) sub_dat$Regions3[sub_dat$New_Name ==
    x])

xcorr_mds$region3 <- as.factor(make.names(xcorr_mds$region3, unique = FALSE,
    allow_ = TRUE))

Code

# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$region3,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        classProbs = TRUE,
        returnResamp = "all",
        sampling = "down"
    )

pred_model <-
    train(
        region3 ~ .,
        data = trainset,
        method = "rf",
        trControl = trcontrol,
        metric = "Accuracy",
        preProcess = "BoxCox", 
        proximity = TRUE
    )

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$region3)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$region3 ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  region3 ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)


saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_regions_results_3_regions.RDS"
)

Code

rf_model_results_3 <- readRDS("./data/processed/random_forest_model_regions_results_3_regions.RDS")

# print confusion matrix results
rf_model_results_3$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  8.236792e-01   6.927952e-01   8.039166e-01   8.422237e-01   6.047104e-01 
AccuracyPValue  McnemarPValue 
  1.107149e-78   2.174519e-15

Code

confusion_df <- rf_model_results_3$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic(base_size = 20) + theme(axis.text.x = element_text(color = "black",
    angle = 30, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels = c(central = "Central",
    northern = "Northern", southern = "Southern")) + scale_y_discrete(labels = c(central = "Central",
    northern = "Northern", southern = "Southern")) + labs(x = "Region",
    y = "Predicted region", fill = "Accuracy")

6.7.0.1 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results_3$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)


# Create a data frame with the UMAP results
umap_df_3 <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], region3 = xcorr_mds$region3, sound.files = xcorr_mds$sound.files)

umap_df_3$pred.site <- predict(object = rf_model_results_3$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df_3, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv",
    row.names = FALSE)

Code

umap_df_3 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv")


umap_df_3$region3 <- factor(umap_df_3$region3, levels = c("central",
    "northern", "southern"))
# Create a scatterplot

gg_umap_site <- ggplot(umap_df_3, aes(x = UMAP1, y = UMAP2, color = region3,
    fill = region3, shape = region3)) + geom_point(size = 4) + ylim(c(-7,
    7)) + scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8,
    labels = c(central = "Central", northern = "Northern", southern = "Southern")) +
    scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c(central = "Central",
        northern = "Northern", southern = "Southern")) + scale_shape_manual(values = 21:24,
    labels = c(central = "Central", northern = "Northern", southern = "Southern")) +
    guides(shape = guide_legend(override.aes = list(size = 5))) +
    theme_classic(base_size = 20) + labs(x = "UMAP1", y = "UMAP2",
    color = "Region", shape = "Region", fill = "Region")


gg_umap_site

6.7.0.2 Combined plots

Code

# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1,
# rel_widths = c(1.1, 1))
# cowplot::ggsave2('./output/UMAP_scatterplots.png', dpi = 300,
# width = 22, height = 7)

cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)

Code

cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17,
    height = 17)

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <- data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region4 <- sapply(xcorr_mds$sound.files, function(x) sub_dat$Regions4[sub_dat$New_Name ==
    x])

xcorr_mds$region4 <- as.factor(make.names(xcorr_mds$region4, unique = FALSE,
    allow_ = TRUE))

Code

# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$region4,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        classProbs = TRUE,
        returnResamp = "all",
        sampling = "down"
    )

pred_model <-
    train(
        region4 ~ .,
        data = trainset,
        method = "rf",
        trControl = trcontrol,
        metric = "Accuracy",
        preProcess = "BoxCox", 
        proximity = TRUE
    )

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$region4)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$region4 ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  region4 ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)


saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_regions_results_4_regions.RDS"
)

Code

rf_model_results_4 <- readRDS("./data/processed/random_forest_model_regions_results_4_regions.RDS")

# print confusion matrix results
rf_model_results_4$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  8.019108e-01   6.823047e-01   7.813208e-01   8.213651e-01   6.050955e-01 
AccuracyPValue  McnemarPValue 
  6.531545e-63   7.342280e-17

Code

confusion_df <- rf_model_results_4$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic(base_size = 20) + theme(axis.text.x = element_text(color = "black",
    angle = 30, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels = c(central.coast = "Central coast",
    central.inland = "Central inland", northern = "Northern", southern = "Southern")) +
    scale_y_discrete(labels = c(central.coast = "Central coast", central.inland = "Central inland",
        northern = "Northern", southern = "Southern")) + labs(x = "Region",
    y = "Predicted region", fill = "Accuracy")

6.7.0.3 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results_4$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df_4 <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], region4 = xcorr_mds$region4, sound.files = xcorr_mds$sound.files)

umap_df_4$pred.site <- predict(object = rf_model_results_4$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df_4, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv",
    row.names = FALSE)

Code

umap_df_4 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv")


umap_df_4$region4 <- factor(umap_df_4$region4, levels = c("central.coast",
    "central.inland", "northern", "southern"))
# Create a scatterplot

gg_umap_site <- ggplot(umap_df_4, aes(x = UMAP1, y = UMAP2, color = region4,
    fill = region4, shape = region4)) + geom_point(size = 4) + ylim(c(-7,
    7)) + scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8,
    labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
    scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c("Central Coast",
        "Central Mountains", "Northern", "Southern")) + scale_shape_manual(values = 21:24,
    labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
    guides(shape = guide_legend(override.aes = list(size = 5))) +
    theme_classic(base_size = 20) + labs(x = "UMAP1", y = "UMAP2",
    color = "Region", shape = "Region", fill = "Region")

gg_umap_site

6.7.0.4 Combined plots

Code

# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1,
# rel_widths = c(1.1, 1))
# cowplot::ggsave2('./output/UMAP_scatterplots.png', dpi = 300,
# width = 22, height = 7)

cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)

Code

cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17,
    height = 17)

7 Spectrograms of close-to-centroid calls for each region

Code

# keep only those correctly predicted umap_df_4 <-
# umap_df_4[umap_df_4$region4 == umap_df_4$pred.site, ]

# find prepresentative
y <- 96  #37, 50, 57, 47

# for ( i in 1:100){ y <- y + 1
repren_calls_list <- lapply(unique(umap_df_4$region4), function(x) {
    X <- umap_df_4[umap_df_4$region4 == x, ]
    centroid <- sapply(X[, c("UMAP1", "UMAP2")], mean)
    X$centroid_dist <- sapply(seq_len(nrow(X)), function(y) dist(rbind(centroid,
        X[y, c("UMAP1", "UMAP2")])))

    set.seed(y)
    out <- X[order(X$centroid_dist, decreasing = FALSE), ][sample(1:30,
        2), ]
})


repren_calls <- do.call(rbind, repren_calls_list)
repren_calls$selec <- 1
repren_calls$start <- rep(0, nrow(repren_calls))
repren_calls$end <- rep(0.5, nrow(repren_calls))

repren_calls <- repren_calls[c(1, 3, 5, 7, 2, 4, 6, 8), ]

lf <- rep(c(0.06, 0.5), each = 4)
rg <- rep(c(0.5, 0.95), each = 4)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- rep(horiz[-1], 2)
tp <- rep(horiz[-length(horiz)], 2)

m <- cbind(lf, rg, btm, tp)
m <- m[(1:8), ]
lf <- c(rep(0.95, each = 4), 0.06, 0.5, 0, 0)
rg <- c(rep(1, each = 4), 0.5, 0.95, 0.05, 1)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- c(horiz[-1], 0.95, 0.95, 0.075, 0)
tp <- c(horiz[-length(horiz)], 1, 1, 0.95, 0.075)

m2 <- cbind(lf, rg, btm, tp)
m <- rbind(m, m2)



# png(filename = paste0('./output/spectrograms_by_site', y,
# '.png'), res = 300, width = 4000, height = 3000)

ss <- split.screen(figs = m)

# # testing layout screens for(i in 1:nrow(m)) {screen(i) par(
# mar = rep(0, 4)) plot(0.5, xlim = c(0,1), ylim = c(0,1), type
# = 'n', axes = FALSE, xlab = '', ylab = '', xaxt = 'n', yaxt =
# 'n') box() text(x = 0.5, y = 0.5, labels = i) }
# close.screen(all.screens = T)


ovlp <- 95
colls <- seq(-110, 0, 5)
wl <- 200

lab_bg <- viridis(10, alpha = 0.25)[8]

lab_bg <- "#3A3B39"



# frequency label
screen(15)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 0.9, y = 1, "Frequency (kHz)", srt = 90, cex = 1.6)

# time label
screen(16)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 1, y = 0.75, "Time (s)", cex = 1.6)

for (i in seq_len(nrow(repren_calls))) {
    # print(i)
    screen(i)
    par(mar = c(0, 0, 0, 0))

    warbleR:::spectro_wrblr_int2(wave = read_wave(X = repren_calls,
        index = i, path = "./data/raw/consolidated_files/"), collevels = colls,
        ovlp = ovlp, wl = wl, flim = c(1, 9), palette = viridis, axisX = FALSE,
        axisY = FALSE, grid = FALSE)

    # add frequency axis
    if (i %in% 1:4)
        axis(2, at = c(seq(2, 10, 2)))

    # add time axis
    if (i %in% c(4, 8))
        axis(1)
}

vlabs <- c("Central\nMountains", "Southern", "Northern", "Central\nCoast")

par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
# add vertical labels
for (i in 9:12) {
    screen(i)
    # par(mar = c(0, 0, 0, 0))
    par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
    plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
    text(x = 1, y = 1, vlabs[i - 8], srt = 270, cex = 1.6, col = "white",
        font = 1)
    box()
}

Code

# dev.off() }

Takeaways

Acoustic similarity is higher between calls from the same region, but lower between calls from the same location
Acoustic similarity decreases with distances (although it may be an artifact of the analysis)
Acoustic similarity decreases sharply after 2 units of distance (?)
Calls are poorly discriminated based on their locality (0.45)
Calls can be discriminated based on their region with good precision (0.77)

8 To do list

rename localities to “region-#” in UMAP scatterplot

Session information

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ohun_1.0.2           ecodist_2.1.3        umap_0.2.10.0       
 [4] vegan_2.6-6.1        permute_0.9-7        pbapply_1.7-2       
 [7] caret_6.0-90         lattice_0.20-45      mlbench_2.1-3       
[10] randomForest_4.7-1.1 ggplot2_3.5.1        PhenotypeSpace_0.1.0
[13] warbleR_1.1.30       NatureSounds_1.0.4   seewave_2.2.3       
[16] tuneR_1.4.6          viridis_0.6.5        viridisLite_0.4.2   
[19] formatR_1.14         knitr_1.46          

loaded via a namespace (and not attached):
  [1] backports_1.4.1       systemfonts_1.0.6     plyr_1.8.9           
  [4] igraph_2.0.3          sp_2.1-4              splines_4.1.2        
  [7] listenv_0.9.1         digest_0.6.35         foreach_1.5.2        
 [10] htmltools_0.5.8.1     fansi_1.0.6           magrittr_2.0.3       
 [13] checkmate_2.3.1       tensor_1.5            cluster_2.1.2        
 [16] remotes_2.5.0         recipes_0.1.17        globals_0.16.3       
 [19] gower_1.0.0           askpass_1.1           timechange_0.3.0     
 [22] spatstat.sparse_2.1-0 colorspace_2.1-0      signal_1.8-0         
 [25] textshaping_0.3.6     xfun_0.43             dplyr_1.1.4          
 [28] crayon_1.5.2          RCurl_1.98-1.14       jsonlite_1.8.8       
 [31] spatstat.data_3.0-4   survival_3.2-13       iterators_1.0.14     
 [34] glue_1.7.0            polyclip_1.10-6       gtable_0.3.5         
 [37] ipred_0.9-12          future.apply_1.11.1   abind_1.4-5          
 [40] scales_1.3.0          DBI_1.2.2             xaringanExtra_0.7.0  
 [43] Rcpp_1.0.12           dtw_1.23-1            units_0.8-5          
 [46] reticulate_1.35.0     spatstat.core_2.3-2   proxy_0.4-27         
 [49] stats4_4.1.2          lava_1.6.10           prodlim_2019.11.13   
 [52] htmlwidgets_1.6.4     RColorBrewer_1.1-3    pkgconfig_2.0.3      
 [55] farver_2.1.2          nnet_7.3-17           deldir_2.0-4         
 [58] utf8_1.2.4            tidyselect_1.2.1      labeling_0.4.3       
 [61] rlang_1.1.4           reshape2_1.4.4        munsell_0.5.1        
 [64] tools_4.1.2           cli_3.6.2             generics_0.1.3       
 [67] evaluate_0.23         stringr_1.5.1         fastmap_1.1.1        
 [70] ragg_1.2.1            yaml_2.3.8            goftest_1.2-3        
 [73] ModelMetrics_1.2.2.2  purrr_1.0.2           packrat_0.9.2        
 [76] future_1.33.1         nlme_3.1-155          brio_1.1.4           
 [79] compiler_4.1.2        rstudioapi_0.15.0     png_0.1-7            
 [82] e1071_1.7-14          testthat_3.2.1        sketchy_1.0.3        
 [85] spatstat.utils_3.0-4  tibble_3.2.1          stringi_1.8.4        
 [88] RSpectra_0.16-0       rgeos_0.6-4           Matrix_1.6-5         
 [91] classInt_0.4-10       fftw_1.0-8            vctrs_0.6.5          
 [94] pillar_1.9.0          lifecycle_1.0.4       spatstat.geom_3.2-9  
 [97] cowplot_1.1.3         data.table_1.14.2     bitops_1.0-7         
[100] raster_3.5-15         R6_2.5.1              KernSmooth_2.23-20   
[103] gridExtra_2.3         parallelly_1.37.1     codetools_0.2-18     
[106] MASS_7.3-55           openssl_2.1.1         rjson_0.2.21         
[109] withr_3.0.0           mgcv_1.8-39           parallel_4.1.2       
[112] terra_1.5-21          grid_4.1.2            rpart_4.1.16         
[115] timeDate_3043.102     class_7.3-20          rmarkdown_2.26       
[118] sf_1.0-6              pROC_1.18.0           lubridate_1.9.3