Cape parrot vocal dialects

Author
Published

June 13, 2024

Source code and data found at https://github.com/maRce10/cape_parrot_dialects

 

1 Purpose

  • Measure acoustic structure of cape parrot contact calls

  • Compare acoustic dissimilarity between individuals from different localities

 

2 Report overview

 

Load packages

Code
# knitr is require for creating html/pdf/word reports formatR is
# used for soft-wrapping code

# install/ load packages
sketchy::load_packages(packages = c("knitr", "formatR", "viridis",
    "warbleR", github = "maRce10/PhenotypeSpace", "ggplot2", "randomForest",
    "mlbench", "caret", "pbapply", "vegan", "umap", "ecodist", "maRce10/ohun"))
Warning: replacing previous import 'proxy::as.dist' by 'stats::as.dist' when
loading 'PhenotypeSpace'
Warning: replacing previous import 'proxy::dist' by 'stats::dist' when loading
'PhenotypeSpace'
Code
source("~/Dropbox/Projects/geographic_call_variation_yellow-naped_amazon/scripts/MRM2.R")

3 Acoustic analysis

3.1 Format data

Code
dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")

dat <- dat[grep("cape", dat$Species, ignore.case = T), ]

names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Regions..3.", names(dat))] <- "Regions3"

table(dat$Species)

nrow(dat)

# all(dat$New_Name %in% st$sound.files) table(dat$Sorted)

# dat <- dat[dat$Sorted != 'delete', ]

unique(dat$New_Name)
ohun::feature_acoustic_data(path = "./data/raw/consolidated_files")

warbleR_options(path = "./data/raw/consolidated_files")
st <- selection_table(whole.recs = TRUE)

st <- st[st$sound.files %in% dat$New_Name, ]
nrow(st)
nrow(dat)

st$sorted <- sapply(st$sound.files, function(x) dat$Sorted[dat$New_Name ==
    x][1])

table(st$sorted)

# spectrograms(st, wl = 512, flim = c(0, 10), dest.path =
# './data/processed/spectrograms', pal = viridis, collevels =
# seq(-100, 0, 5)) spectrograms(st[st$sorted == 'unsorted', ],
# wl = 512, flim = c(0, 10), dest.path =
# './data/processed/unsorted_spectrograms', pal = viridis,
# collevels = seq(-100, 0, 5)) tailor_sels(st, auto.next = TRUE,
# flim = c(0, 8), collevels = seq(-100, 0, 5))

3.2 Make selection table

Code
sel_tab <- selection_table(path = "./data/raw/consolidated_files/",
    whole.recs = TRUE)

tailored <- read.csv("./data/raw/consolidated_files/seltailor_output.csv")

tailored <- tailored[tailored$tailored == "y", ]


non_tailored <- sel_tab[!sel_tab$sound.files %in% tailored$sound.files,
    ]
non_tailored$tailored <- "n"


tailored$top.freq[is.na(tailored$bottom.freq)] <- non_tailored$bottom.freq <- min(tailored$bottom.freq,
    na.rm = TRUE)
tailored$top.freq[is.na(tailored$top.freq)] <- non_tailored$top.freq <- max(tailored$top.freq,
    na.rm = TRUE)

comm_names <- intersect(names(tailored), names(non_tailored))

all_sels <- rbind(tailored[, comm_names], non_tailored[, comm_names])

write.csv(all_sels, "./data/processed/selection_table_entire_sound_files.csv",
    row.names = FALSE)

3.3 Run cross-correlation

Code
sel_tab <- read.csv("./data/processed/selection_table_entire_sound_files.csv")

xcorr <- cross_correlation(X = sel_tab, path = "./data/raw/consolidated_files/",
    method = 2, parallel = 1)

rownames(xcorr) <- gsub("-1$", "", rownames(xcorr))

colnames(xcorr) <- gsub("-1$", "", colnames(xcorr))

saveRDS(xcorr, "./data/processed/cross_correlation_matrix.RDS")

# less than 0.1% were undefined
sum(is.infinite(xcorr))/length(xcorr)

# convert infinite to mean xcorr
xcorr[is.infinite(xcorr)] <- mean(xcorr[!is.infinite(xcorr) & xcorr <
    1])

xcorr_mds <- cmdscale(d = as.dist(xcorr), k = 2)

rownames(xcorr_mds) <- gsub("-1$", "", rownames(xcorr_mds))

saveRDS(xcorr_mds, "./data/processed/cross_correlation_MDS.RDS")

4 Data description

Code
# dat <-
# read.csv('./data/raw/consolidated_sound_files_CPV_contact_calls_CURATED.csv')

# add data from second location
dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_UPDATED_dec-2023.csv")

dat <- dat[grep("cape", dat$Species, ignore.case = T), ]

names(dat)[grep("Regions..4.", names(dat))] <- "Regions4"
names(dat)[grep("Region..3.", names(dat))] <- "Regions3"

names(dat) <- gsub("..cluster.", ".for.cluster", names(dat))

dat <- dat[grepl("cape", dat$Species, ignore.case = T) & !is.na(dat$Location.for.cluster) &
    !is.na(dat$Longitude.for.cluster) & !is.na(dat$Latitude.for.cluster),
    ]

# dat$region3 <- sapply(dat$New_Name, function(x)
# dat_loc2$region3s[dat_loc2$New_Name == x]) dat$region <-
# dat$site

dat$Species <- "Cape parrot"
  • 6287 calls
  • 13 localities
  • 3 regions
  • Number of localities per region:
Code
agg <- aggregate(Location.for.cluster ~ Regions3, dat, function(x) length(unique(x)))

names(agg) <- c("region", "localities")

agg$calls <- aggregate(Location.for.cluster ~ Regions3, dat, length)[,
    2]

agg$localities <- aggregate(Location.for.cluster ~ Regions3, dat,
    function(x) paste(unique(x), collapse = "-"))[, 2]

agg
region localities calls
central Port St John’s-Polela Sawmill-iGxalingenwa Nature Reserve-Marutswa Forest-Salt Spring Farm-Hoha Forest 1769
northern Amorentia 717
southern Hogsback-Schwarzwald Forest-Alice Pecan Orchard-Stutterheim-King William’s Town-Baddaford Farm 3801
  • Region of each locality:
Code
agg <- aggregate(Regions3 ~ Location.for.cluster, dat, function(x) paste(unique(x),
    collapse = "-"))

names(agg) <- c("localities", "region")

agg[order(agg$region), 2:1]
region localities
5 central Hoha Forest
6 central iGxalingenwa Nature Reserve
8 central Marutswa Forest
9 central Polela Sawmill
10 central Port St John’s
11 central Salt Spring Farm
2 northern Amorentia
1 southern Alice Pecan Orchard
3 southern Baddaford Farm
4 southern Hogsback
7 southern King William’s Town
12 southern Schwarzwald Forest
13 southern Stutterheim
  • 4 regions
  • Number of localities per region:
Code
agg <- aggregate(Location.for.cluster ~ Regions4, dat, function(x) length(unique(x)))

names(agg) <- c("region", "localities")

agg$calls <- aggregate(Location.for.cluster ~ Regions4, dat, length)[,
    2]

agg$localities <- aggregate(Location.for.cluster ~ Regions4, dat,
    function(x) paste(unique(x), collapse = "-"))[, 2]

agg
region localities calls
central coast Port St John’s 687
central inland Polela Sawmill-iGxalingenwa Nature Reserve-Marutswa Forest-Salt Spring Farm-Hoha Forest 1082
northern Amorentia 717
southern Hogsback-Schwarzwald Forest-Alice Pecan Orchard-Stutterheim-King William’s Town-Baddaford Farm 3801
  • Region of each locality:
Code
agg <- aggregate(Regions4 ~ Location.for.cluster, dat, function(x) paste(unique(x),
    collapse = "-"))

names(agg) <- c("localities", "region")

agg[order(agg$region), 2:1]
region localities
10 central coast Port St John’s
5 central inland Hoha Forest
6 central inland iGxalingenwa Nature Reserve
8 central inland Marutswa Forest
9 central inland Polela Sawmill
11 central inland Salt Spring Farm
2 northern Amorentia
1 southern Alice Pecan Orchard
3 southern Baddaford Farm
4 southern Hogsback
7 southern King William’s Town
12 southern Schwarzwald Forest
13 southern Stutterheim

5 Statistical analysis

Two approaches:

5.1 Multiple Regression on distance Matrices

  • Model:
    \[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]
  • Response values scaled to make effect sizes comparable across models
  • Locality was coded as pairwise binary matrix in which 0 means that calls in a dyad belong to the same locality and 1 means calls belong to different locality
Code
xcorr <- readRDS("./data/processed/cross_correlation_matrix.RDS")

xcorr_mds <- readRDS("./data/processed/cross_correlation_MDS.RDS")

sub_xcorr <- xcorr[rownames(xcorr) %in% dat$New_Name, colnames(xcorr) %in%
    dat$New_Name]
sub_xcorr_mds <- xcorr_mds[rownames(xcorr_mds) %in% dat$New_Name,
    ]

sub_dat <- dat[dat$New_Name %in% rownames(sub_xcorr_mds), ]
sub_dat <- sub_dat[match(rownames(sub_xcorr), sub_dat$New_Name), ]

location <- sapply(rownames(sub_xcorr), function(x) sub_dat$Location.for.cluster[sub_dat$New_Name ==
    x])
region3 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions3[sub_dat$New_Name ==
    x])
region4 <- sapply(rownames(sub_xcorr), function(x) sub_dat$Regions4[sub_dat$New_Name ==
    x])
Code
loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))

samp_bi_tri <- as.dist(binary_triangular_matrix(group = region3))

geo_dist <- dist(sub_dat[, c("Latitude.for.cluster", "Longitude.for.cluster")])

rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)

colnames(rect_var) <- c("fourier_xc", "geo_distance", "location",
    "region3")

rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]

xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region3,
    nperm = 10000, mat = rect_var)

saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS")
Code
readRDS("./data/processed/matrix_correlation_fourier_cross_correlation_3_regions.RDS")
$coef
              fourier_xc  pval
Int          0.508817365 1e+00
geo_distance 0.006329931 1e-04
location     0.015584051 1e-04
region3      0.021221681 1e-04

$r.squared
        R2       pval 
0.06189599 0.00010000 

$F.test
          F      F.pval 
433482.8893      0.0001 

$n
[1] 6279
Code
loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))

samp_bi_tri <- as.dist(binary_triangular_matrix(group = region4))

geo_dist <- dist(sub_dat[, c("Latitude.for.cluster", "Longitude.for.cluster")])

rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)

colnames(rect_var) <- c("fourier_xc", "geo_distance", "location",
    "region4")

rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]

xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + region4,
    nperm = 10000, mat = rect_var)

saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS")
Code
readRDS("./data/processed/matrix_correlation_fourier_cross_correlation_4_regions.RDS")
$coef
              fourier_xc  pval
Int          0.508817365 1e+00
geo_distance 0.005126500 1e-04
location     0.007441299 3e-04
region4      0.037044395 1e-04

$r.squared
        R2       pval 
0.06624306 0.00010000 

$F.test
          F      F.pval 
466086.9749      0.0001 

$n
[1] 6279

6 Simulation

6.1 Simulate data with different patterns of geographic variation in call structure

  • 26 localities (10 individuals each) along a longitudinal range
  • Simulated patterns following Podos & Warren (2007)

Figure from Podos & Warren (2007)
Code
# seed to make it reproducible
set.seed(123)

# number of groups
n_locality <- length(unique(sub_dat$Location.for.cluster))

# number of individuals per group
n_indiv <- table(sub_dat$Location.for.cluster)

# create locality labels
localities <- sample(LETTERS[1:n_locality])

# simulated individuals per group
locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
    each = n_indiv[x])))

# simulate geographic coordinates along a gradient
lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
    rnorm(n = length(locality_label), sd = 0.1)
lat <- rnorm(n = length(locality_label), sd = 0.1)


# put all together in a data frame
data <- data.frame(locality = locality_label, lat, lon, color = viridis(n_locality)[as.numeric(as.factor(locality_label))])

# add sampling site (broader geographic mozaic)
data <- data[order(data$lon), ]

data$site_num <- 2

for (i in 2:nrow(data)) {

    data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
        data$site_num[i - 1] else data$site_num[i - 1] + 1

}

# make last location the same as th previous one
data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
    1

data$region3 <- sample(letters)[floor(data$site_num/2)]

# plot localities along longitude
plot(data[, c("lon", "lat")], col = data$color, cex = 1.8, ylim = range(data$lat) +
    c(-0.2, 0.2), pch = as.numeric(as.factor(data$region3)))

lon_loc <- tapply(data$lon, data$locality, mean)
text(x = lon_loc, y = rep(0.3, n_locality), labels = names(lon_loc),
    cex = 2.5)

abline(v = 1:30 - 0.5)

Geographic location of individuals/localities in the simulated data set. Letters highlight locality ids

6.2 Simulate acoustic variation:

  • Clinal: acoustic structure vector increases with longitude
  • Dialect-type: acoustic structure vector similar within a locality but varies randomly between neighnoring localities
    • 2 levels: locality and region
    • Region create by grouping 2 adjacent localities with the same label
  • Random: acoustic structure vector varies randomly between individuals regardless of locality or longitude
Code
# seed to make it reproducible
set.seed(123)

# simulate acoustic structure vector clinal variation
data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

# dialect type variation
data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
    sd = 0.2)

data$dialect_site <- as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data),
    sd = 0.2)

# random variation
data$random <- rnorm(n = nrow(data), sd = 0.2)

# sort so lines look good in plot
data <- data[order(data$lon), ]

lng_data <- stack(data[, c("clinal", "dialect", "dialect_site", "random")])

lng_data$locality <- data$locality
lng_data$lat <- data$lat
lng_data$lat <- data$lon
lng_data$lon_seq <- 1:nrow(data)

ggplot(lng_data, aes(x = lon_seq, y = values)) + geom_line(color = "#3E4A89FF",
    linewidth = 1.6) + facet_wrap(~ind, scales = "free_y", nrow = 3) +
    theme_classic(base_size = 25) + labs(x = "Longitude", y = "Acoustic feature")

Types of vocal geographic variation that were simulated

6.3 Stats on simulated data

  • Model:
    \[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]

Predict acoustic structure based on geographic distance and locality membership using multiple Regression on distance matrices

Data is z-transformed so all predictors have similar variance and effect sizes are comparable.

Variance for each predictor:

Code
# create distance matrices
loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
clinal_dist <- dist(data$clinal)
dialect_loc_dist <- dist(data$dialect)
dialect_site_dist <- dist(data$dialect_site)
random_dist <- dist(data$random)
geo_dist <- dist(data[, c("lat", "lon")])

# regression models set data format
rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
    dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
    geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
    site_member_binary = site_member_binary)

apply(rect_var, 2, var)
       clinal_dist   dialect_loc_dist  dialect_site_dist        random_dist 
         1.1978350          0.8737510          1.0052258          1.0051923 
          geo_dist  loc_member_binary site_member_binary 
         1.1976810          0.1721933          0.1930169 

6.3.1 Dialect-type variation model at locality level

Code
nperm <- 100

# model predicting dialect variation
(mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))
$coef
                   dialect_loc_dist pval
Int                      -0.9761392 0.01
geo_dist                  0.0374462 0.01
loc_member_binary         1.8678865 0.01
site_member_binary       -0.6915435 0.01

$r.squared
       R2      pval 
0.3358995 0.0100000 

$F.test
         F     F.pval 
3331516.48       0.01 

$n
[1] 6287

6.3.2 Dialect-type variation model at region level

Code
nperm <- 100

# model predicting dialect variation
(mod_dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
    loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
    c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))
$coef
                   dialect_site_dist pval
Int                       -1.3674760 0.01
geo_dist                  -0.3500067 0.01
loc_member_binary         -0.2026910 0.01
site_member_binary         2.0974315 0.01

$r.squared
       R2      pval 
0.4373079 0.0100000 

$F.test
         F     F.pval 
5118974.83       0.01 

$n
[1] 6287

6.3.3 Clinal variation model

Code
# model predicting clinal variation
(mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))
$coef
                    clinal_dist pval
Int                 0.016170807 0.01
geo_dist            0.999862672 0.01
loc_member_binary  -0.017742887 0.01
site_member_binary -0.002608847 0.07

$r.squared
       R2      pval 
0.9904731 0.0100000 

$F.test
          F      F.pval 
6.84793e+08 1.00000e-02 

$n
[1] 6287

6.3.4 Random variation model

Code
# model predicting random variation
(mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))
$coef
                    random_dist pval
Int                -0.006341367 0.47
geo_dist            0.001651007 0.75
loc_member_binary   0.025101261 0.11
site_member_binary -0.016276746 0.23

$r.squared
         R2        pval 
3.42318e-05 2.50000e-01 

$F.test
       F   F.pval 
225.4816   0.2500 

$n
[1] 6287

6.3.5 Combined results

Code
mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect, mod_dialect_site = mod_dialect_site,
    mod_random = mod_random)

names(mods) <- c("Clinal", "Dialect locality", "Dialect site", "Random")

estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
    Y <- data.frame(mods[[x]]$coef[-1, ])
    Y$rel_coef <- Y[, 1]/max(Y[, 1])
    Y$mod <- names(mods)[x]
    Y$predictor <- rownames(Y)
    names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
    return(Y)
}))

estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = round(coef,
    3), color = signif)) + scale_color_manual(values = c("black",
    "gray")) + labs(x = "", y = "", color = "P value") + theme_classic() +
    theme(axis.text.x = element_text(color = "black", size = 11, angle = 30,
        vjust = 0.8, hjust = 0.8))

6.4 Replicate simulation 100 times

Code
nperm <- 100
reps <- 100
rep_models <- pbreplicate(reps, cl = 1, expr = {
    # number of groups
    n_locality <- length(unique(sub_dat$Location.for.cluster))

    # number of individuals per group
    n_indiv <- table(sub_dat$Location.for.cluster)

    # create locality labels
    localities <- sample(LETTERS[1:n_locality])

    # simulated individuals per group
    locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
        each = n_indiv[x])))

    # simulate geographic coordinates along a gradient
    lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
        rnorm(n = length(locality_label), sd = 0.1)
    lat <- rnorm(n = length(locality_label), sd = 0.1)

    # locality_label <- localities[n_indivs]

    # put all together in a data frame
    data <- data.frame(locality = locality_label, lat, lon)

    # add region (broader geographic mozaic)
    data <- data[order(data$lon), ]

    data$site_num <- 2

    for (i in 2:nrow(data)) {

        data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
            data$site_num[i - 1] else data$site_num[i - 1] + 1

    }

    # make last location the same as the previous one
    data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
        1

    data$region3 <- sample(letters)[floor(data$site_num/2)]

    # simulate acoustic structure vector clinal variation
    data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

    # dialect type variation locality level
    data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
        sd = 0.2)

    # dialect type variation
    data$dialect_site <- as.numeric(as.factor(data$region3)) + rnorm(n = nrow(data),
        sd = 0.2)

    # random variation
    data$random <- rnorm(n = nrow(data), sd = 0.2)

    loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
    site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$region3))
    clinal_dist <- dist(data$clinal)
    dialect_loc_dist <- dist(data$dialect)
    dialect_site_dist <- dist(data$dialect_site)
    random_dist <- dist(data$random)
    geo_dist <- dist(data[, c("lat", "lon")])

    # regression models set data format
    rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
        dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
        geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
        site_member_binary = site_member_binary)

    # model predicting dialect variation
    mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting clinal variation
    mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # # model predicting dialect site variation
    dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
        loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
        c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting random variation
    mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect,
        dialect_site = dialect_site, mod_random = mod_random)

    names(mods) <- c("Clinal", "Dialect locality", "Dialect site",
        "Random")

    estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
        Y <- data.frame(mods[[x]]$coef[-1, ])
        Y$rel_coef <- Y[, 1]/max(Y[, 1])
        Y$mod <- names(mods)[x]
        Y$predictor <- rownames(Y)
        names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
        return(Y)
    }))

    estimates
}, simplify = FALSE)

coeffs <- do.call(cbind, lapply(rep_models, function(x) x[, 1]))
ps <- do.call(cbind, lapply(rep_models, function(x) x[, 2]))

estimates <- rep_models[[1]]
estimates$coef <- rowMeans(coeffs)
estimates$sd <- apply(coeffs, 1, sd)
estimates$p <- 1 - (apply(ps, 1, function(x) sum(x < 0.05))/reps)
estimates$rel_coef <- estimates[, 1]/max(estimates[, 1])
estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

estimates$coef_sd <- paste0(round(estimates$coef, 3), "\n(", round(estimates$sd,
    4), ")")

saveRDS(estimates, "./data/processed/estimates_for_replicated_simulation_dialects.RDS")
Code
estimates <- readRDS("./data/processed/estimates_for_replicated_simulation_dialects.RDS")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = coef_sd,
    color = signif)) + scale_color_manual(values = c("black", "gray")) +
    labs(x = "", y = "", color = "P value") + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

Mean estimates (and SD) of replicated simulation regression results. P values

6.5 Mantel correlogram at different distances

Code
geo_vect <- geo_dist[lower.tri(geo_dist)]
geo_vect <- geo_vect[!is.na(geo_vect)]

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)


dists <- 1:10

mantelcorrlg <- function(i) {

    classes <- seq(0, max(geo_vect), i)
    # length(classes)

    # Run a mantel correlation on the data
    correl_SPCC <- vegan::mantel.correlog(D.eco = xc_dist, D.geo = geo_dist,
        break.pts = classes, cutoff = FALSE, r.type = "pearson", nperm = 1,
        mult = "holm", progressive = FALSE)

    mantel.res <- correl_SPCC$mantel.res[, 1:3]
    mantel.res <- cbind(mantel.res, break.size = i)

    return(mantel.res)

}

mantel_list <- warbleR:::pblapply_wrblr_int(dists, cl = 1, function(x) try(mantelcorrlg(x),
    silent = TRUE))

mantel_list <- mantel_list[sapply(mantel_list, class) != "try-error"]


mantel_list <- lapply(mantel_list, as.data.frame)

# # Save the correlation as an .RDS file so you don't have to
# run it multiple times in the future
saveRDS(mantel_list, paste0("./data/processed/correl_SPCC_several_distances.RDS"))
Code
mantel_list <- readRDS(paste0("./data/processed/correl_SPCC_several_distances.RDS"))

mantels_df <- as.data.frame(do.call(rbind, mantel_list))

combined_dists <- sort(unique(mantels_df$class.index))

# interpolate
interpol_mantel_list <- lapply(mantel_list, function(x) {

    appx <- approx(x = x$class.index[x$n.dist > 0], y = x$Mantel.cor[x$n.dist >
        0], xout = combined_dists, method = "linear")

    return(appx$y)
})


interpol_mantel_mat <- do.call(cbind, interpol_mantel_list)


interpol_mantel_df <- data.frame(combined_dists, mean.cor = apply(interpol_mantel_mat,
    1, mean, na.rm = TRUE), sd.cor = apply(interpol_mantel_mat, 1,
    sd, na.rm = TRUE))


ggplot(data = interpol_mantel_df, mapping = aes(x = combined_dists,
    y = mean.cor)) + geom_ribbon(data = interpol_mantel_df, aes(ymin = mean.cor -
    sd.cor, ymax = mean.cor + sd.cor), fill = "gray", alpha = 0.3) +
    geom_point(col = viridis(10, alpha = 0.5)[7], size = 2.5) + geom_line(col = viridis(10,
    alpha = 0.5)[7], size = 2) + xlim(c(0, 4)) + ylim(c(-0.025, 0.2)) +
    geom_point(size = 3, color = "transparent") + theme_classic(base_size = 20) +
    labs(x = "Pairwise geographic distance (Degrees)", y = "Correlation coefficient")

6.6 Dialect discrimination with Random Forest

  • Data split in training and testing subsets (75% and 25% respectively)
  • Accuracy measured on test subset

6.6.1 Discriminating localities

Code
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
    mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <-
    data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$location <-
    sapply(xcorr_mds$sound.files, function(x)
        sub_dat$Location.for.cluster[sub_dat$New_Name == x])

xcorr_mds$location <- as.factor(make.names(xcorr_mds$location,
                                           unique = FALSE, allow_ = TRUE))


# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$location,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition,-1]
testset <- xcorr_mds[-partition,-1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    location ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "BoxCox",
    proximity = TRUE    
)

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$location)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$location ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  location ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)

saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_results_locations.RDS"
)
Code
rf_model_results <- readRDS("./data/processed/random_forest_model_results_locations.RDS")

# print confusion matrix results
rf_model_results$conf_mat_bb$overall
      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
     0.4205488      0.3538397      0.3959564      0.4454395      0.4135290 
AccuracyPValue  McnemarPValue 
     0.2947211            NaN 
Code
confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

6.6.2 Projecting random forest proximity with UMAP

Code
umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], location = xcorr_mds$location, sound.files = xcorr_mds$sound.files)

umap_df$pred.locality <- predict(object = rf_model_results$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv",
    row.names = FALSE)
Code
umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv")

# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location,
    fill = location)) + geom_point(size = 4) + ylim(c(-7, 7)) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "UMAP1",
    y = "UMAP2", color = "Locality", fill = "Locality")

gg_umap_loc

6.7 Discriminating regions

Code
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <- data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region3 <- sapply(xcorr_mds$sound.files, function(x) sub_dat$Regions3[sub_dat$New_Name ==
    x])

xcorr_mds$region3 <- as.factor(make.names(xcorr_mds$region3, unique = FALSE,
    allow_ = TRUE))
Code
# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$region3,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        classProbs = TRUE,
        returnResamp = "all",
        sampling = "down"
    )

pred_model <-
    train(
        region3 ~ .,
        data = trainset,
        method = "rf",
        trControl = trcontrol,
        metric = "Accuracy",
        preProcess = "BoxCox", 
        proximity = TRUE
    )

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$region3)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$region3 ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  region3 ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)


saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_regions_results_3_regions.RDS"
)
Code
rf_model_results_3 <- readRDS("./data/processed/random_forest_model_regions_results_3_regions.RDS")

# print confusion matrix results
rf_model_results_3$conf_mat_bb$overall
      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  8.236792e-01   6.927952e-01   8.039166e-01   8.422237e-01   6.047104e-01 
AccuracyPValue  McnemarPValue 
  1.107149e-78   2.174519e-15 
Code
confusion_df <- rf_model_results_3$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic(base_size = 20) + theme(axis.text.x = element_text(color = "black",
    angle = 30, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels = c(central = "Central",
    northern = "Northern", southern = "Southern")) + scale_y_discrete(labels = c(central = "Central",
    northern = "Northern", southern = "Southern")) + labs(x = "Region",
    y = "Predicted region", fill = "Accuracy")

6.7.0.1 Projecting random forest proximity with UMAP

Code
umap_result <- umap(rf_model_results_3$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)


# Create a data frame with the UMAP results
umap_df_3 <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], region3 = xcorr_mds$region3, sound.files = xcorr_mds$sound.files)

umap_df_3$pred.site <- predict(object = rf_model_results_3$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df_3, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv",
    row.names = FALSE)
Code
umap_df_3 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_3_regions.csv")


umap_df_3$region3 <- factor(umap_df_3$region3, levels = c("central",
    "northern", "southern"))
# Create a scatterplot

gg_umap_site <- ggplot(umap_df_3, aes(x = UMAP1, y = UMAP2, color = region3,
    fill = region3, shape = region3)) + geom_point(size = 4) + ylim(c(-7,
    7)) + scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8,
    labels = c(central = "Central", northern = "Northern", southern = "Southern")) +
    scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c(central = "Central",
        northern = "Northern", southern = "Southern")) + scale_shape_manual(values = 21:24,
    labels = c(central = "Central", northern = "Northern", southern = "Southern")) +
    guides(shape = guide_legend(override.aes = list(size = 5))) +
    theme_classic(base_size = 20) + labs(x = "UMAP1", y = "UMAP2",
    color = "Region", shape = "Region", fill = "Region")


gg_umap_site

6.7.0.2 Combined plots

Code
# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1,
# rel_widths = c(1.1, 1))
# cowplot::ggsave2('./output/UMAP_scatterplots.png', dpi = 300,
# width = 22, height = 7)

cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)

Code
cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17,
    height = 17)
Code
xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <- data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$region4 <- sapply(xcorr_mds$sound.files, function(x) sub_dat$Regions4[sub_dat$New_Name ==
    x])

xcorr_mds$region4 <- as.factor(make.names(xcorr_mds$region4, unique = FALSE,
    allow_ = TRUE))
Code
# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$region4,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition, -1]
testset <- xcorr_mds[-partition, -1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        classProbs = TRUE,
        returnResamp = "all",
        sampling = "down"
    )

pred_model <-
    train(
        region4 ~ .,
        data = trainset,
        method = "rf",
        trControl = trcontrol,
        metric = "Accuracy",
        preProcess = "BoxCox", 
        proximity = TRUE
    )

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$region4)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$region4 ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  region4 ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)


saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_regions_results_4_regions.RDS"
)
Code
rf_model_results_4 <- readRDS("./data/processed/random_forest_model_regions_results_4_regions.RDS")

# print confusion matrix results
rf_model_results_4$conf_mat_bb$overall
      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
  8.019108e-01   6.823047e-01   7.813208e-01   8.213651e-01   6.050955e-01 
AccuracyPValue  McnemarPValue 
  6.531545e-63   7.342280e-17 
Code
confusion_df <- rf_model_results_4$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic(base_size = 20) + theme(axis.text.x = element_text(color = "black",
    angle = 30, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels = c(central.coast = "Central coast",
    central.inland = "Central inland", northern = "Northern", southern = "Southern")) +
    scale_y_discrete(labels = c(central.coast = "Central coast", central.inland = "Central inland",
        northern = "Northern", southern = "Southern")) + labs(x = "Region",
    y = "Predicted region", fill = "Accuracy")

6.7.0.3 Projecting random forest proximity with UMAP

Code
umap_result <- umap(rf_model_results_4$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df_4 <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], region4 = xcorr_mds$region4, sound.files = xcorr_mds$sound.files)

umap_df_4$pred.site <- predict(object = rf_model_results_4$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df_4, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv",
    row.names = FALSE)
Code
umap_df_4 <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_4_regions.csv")


umap_df_4$region4 <- factor(umap_df_4$region4, levels = c("central.coast",
    "central.inland", "northern", "southern"))
# Create a scatterplot

gg_umap_site <- ggplot(umap_df_4, aes(x = UMAP1, y = UMAP2, color = region4,
    fill = region4, shape = region4)) + geom_point(size = 4) + ylim(c(-7,
    7)) + scale_color_viridis_d(alpha = 0, begin = 0.1, end = 0.8,
    labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
    scale_fill_viridis_d(alpha = 0.25, begin = 0.1, end = 0.8, labels = c("Central Coast",
        "Central Mountains", "Northern", "Southern")) + scale_shape_manual(values = 21:24,
    labels = c("Central Coast", "Central Mountains", "Northern", "Southern")) +
    guides(shape = guide_legend(override.aes = list(size = 5))) +
    theme_classic(base_size = 20) + labs(x = "UMAP1", y = "UMAP2",
    color = "Region", shape = "Region", fill = "Region")

gg_umap_site

6.7.0.4 Combined plots

Code
# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1,
# rel_widths = c(1.1, 1))
# cowplot::ggsave2('./output/UMAP_scatterplots.png', dpi = 300,
# width = 22, height = 7)

cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)

Code
cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17,
    height = 17)

7 Spectrograms of close-to-centroid calls for each region

Code
# keep only those correctly predicted umap_df_4 <-
# umap_df_4[umap_df_4$region4 == umap_df_4$pred.site, ]

# find prepresentative
y <- 96  #37, 50, 57, 47

# for ( i in 1:100){ y <- y + 1
repren_calls_list <- lapply(unique(umap_df_4$region4), function(x) {
    X <- umap_df_4[umap_df_4$region4 == x, ]
    centroid <- sapply(X[, c("UMAP1", "UMAP2")], mean)
    X$centroid_dist <- sapply(seq_len(nrow(X)), function(y) dist(rbind(centroid,
        X[y, c("UMAP1", "UMAP2")])))

    set.seed(y)
    out <- X[order(X$centroid_dist, decreasing = FALSE), ][sample(1:30,
        2), ]
})


repren_calls <- do.call(rbind, repren_calls_list)
repren_calls$selec <- 1
repren_calls$start <- rep(0, nrow(repren_calls))
repren_calls$end <- rep(0.5, nrow(repren_calls))

repren_calls <- repren_calls[c(1, 3, 5, 7, 2, 4, 6, 8), ]

lf <- rep(c(0.06, 0.5), each = 4)
rg <- rep(c(0.5, 0.95), each = 4)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- rep(horiz[-1], 2)
tp <- rep(horiz[-length(horiz)], 2)

m <- cbind(lf, rg, btm, tp)
m <- m[(1:8), ]
lf <- c(rep(0.95, each = 4), 0.06, 0.5, 0, 0)
rg <- c(rep(1, each = 4), 0.5, 0.95, 0.05, 1)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- c(horiz[-1], 0.95, 0.95, 0.075, 0)
tp <- c(horiz[-length(horiz)], 1, 1, 0.95, 0.075)

m2 <- cbind(lf, rg, btm, tp)
m <- rbind(m, m2)



# png(filename = paste0('./output/spectrograms_by_site', y,
# '.png'), res = 300, width = 4000, height = 3000)

ss <- split.screen(figs = m)

# # testing layout screens for(i in 1:nrow(m)) {screen(i) par(
# mar = rep(0, 4)) plot(0.5, xlim = c(0,1), ylim = c(0,1), type
# = 'n', axes = FALSE, xlab = '', ylab = '', xaxt = 'n', yaxt =
# 'n') box() text(x = 0.5, y = 0.5, labels = i) }
# close.screen(all.screens = T)


ovlp <- 95
colls <- seq(-110, 0, 5)
wl <- 200

lab_bg <- viridis(10, alpha = 0.25)[8]

lab_bg <- "#3A3B39"



# frequency label
screen(15)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 0.9, y = 1, "Frequency (kHz)", srt = 90, cex = 1.6)

# time label
screen(16)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 1, y = 0.75, "Time (s)", cex = 1.6)

for (i in seq_len(nrow(repren_calls))) {
    # print(i)
    screen(i)
    par(mar = c(0, 0, 0, 0))

    warbleR:::spectro_wrblr_int2(wave = read_wave(X = repren_calls,
        index = i, path = "./data/raw/consolidated_files/"), collevels = colls,
        ovlp = ovlp, wl = wl, flim = c(1, 9), palette = viridis, axisX = FALSE,
        axisY = FALSE, grid = FALSE)

    # add frequency axis
    if (i %in% 1:4)
        axis(2, at = c(seq(2, 10, 2)))

    # add time axis
    if (i %in% c(4, 8))
        axis(1)
}

vlabs <- c("Central\nMountains", "Southern", "Northern", "Central\nCoast")

par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
# add vertical labels
for (i in 9:12) {
    screen(i)
    # par(mar = c(0, 0, 0, 0))
    par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
    plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
    text(x = 1, y = 1, vlabs[i - 8], srt = 270, cex = 1.6, col = "white",
        font = 1)
    box()
}

Code
# dev.off() }

Takeaways

  • Acoustic similarity is higher between calls from the same region, but lower between calls from the same location
  • Acoustic similarity decreases with distances (although it may be an artifact of the analysis)
  • Acoustic similarity decreases sharply after 2 units of distance (?)
  • Calls are poorly discriminated based on their locality (0.45)
  • Calls can be discriminated based on their region with good precision (0.77)

 

8 To do list

  • rename localities to “region-#” in UMAP scatterplot

 

Session information

R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ohun_1.0.2           ecodist_2.1.3        umap_0.2.10.0       
 [4] vegan_2.6-6.1        permute_0.9-7        pbapply_1.7-2       
 [7] caret_6.0-90         lattice_0.20-45      mlbench_2.1-3       
[10] randomForest_4.7-1.1 ggplot2_3.5.1        PhenotypeSpace_0.1.0
[13] warbleR_1.1.30       NatureSounds_1.0.4   seewave_2.2.3       
[16] tuneR_1.4.6          viridis_0.6.5        viridisLite_0.4.2   
[19] formatR_1.14         knitr_1.46          

loaded via a namespace (and not attached):
  [1] backports_1.4.1       systemfonts_1.0.6     plyr_1.8.9           
  [4] igraph_2.0.3          sp_2.1-4              splines_4.1.2        
  [7] listenv_0.9.1         digest_0.6.35         foreach_1.5.2        
 [10] htmltools_0.5.8.1     fansi_1.0.6           magrittr_2.0.3       
 [13] checkmate_2.3.1       tensor_1.5            cluster_2.1.2        
 [16] remotes_2.5.0         recipes_0.1.17        globals_0.16.3       
 [19] gower_1.0.0           askpass_1.1           timechange_0.3.0     
 [22] spatstat.sparse_2.1-0 colorspace_2.1-0      signal_1.8-0         
 [25] textshaping_0.3.6     xfun_0.43             dplyr_1.1.4          
 [28] crayon_1.5.2          RCurl_1.98-1.14       jsonlite_1.8.8       
 [31] spatstat.data_3.0-4   survival_3.2-13       iterators_1.0.14     
 [34] glue_1.7.0            polyclip_1.10-6       gtable_0.3.5         
 [37] ipred_0.9-12          future.apply_1.11.1   abind_1.4-5          
 [40] scales_1.3.0          DBI_1.2.2             xaringanExtra_0.7.0  
 [43] Rcpp_1.0.12           dtw_1.23-1            units_0.8-5          
 [46] reticulate_1.35.0     spatstat.core_2.3-2   proxy_0.4-27         
 [49] stats4_4.1.2          lava_1.6.10           prodlim_2019.11.13   
 [52] htmlwidgets_1.6.4     RColorBrewer_1.1-3    pkgconfig_2.0.3      
 [55] farver_2.1.2          nnet_7.3-17           deldir_2.0-4         
 [58] utf8_1.2.4            tidyselect_1.2.1      labeling_0.4.3       
 [61] rlang_1.1.4           reshape2_1.4.4        munsell_0.5.1        
 [64] tools_4.1.2           cli_3.6.2             generics_0.1.3       
 [67] evaluate_0.23         stringr_1.5.1         fastmap_1.1.1        
 [70] ragg_1.2.1            yaml_2.3.8            goftest_1.2-3        
 [73] ModelMetrics_1.2.2.2  purrr_1.0.2           packrat_0.9.2        
 [76] future_1.33.1         nlme_3.1-155          brio_1.1.4           
 [79] compiler_4.1.2        rstudioapi_0.15.0     png_0.1-7            
 [82] e1071_1.7-14          testthat_3.2.1        sketchy_1.0.3        
 [85] spatstat.utils_3.0-4  tibble_3.2.1          stringi_1.8.4        
 [88] RSpectra_0.16-0       rgeos_0.6-4           Matrix_1.6-5         
 [91] classInt_0.4-10       fftw_1.0-8            vctrs_0.6.5          
 [94] pillar_1.9.0          lifecycle_1.0.4       spatstat.geom_3.2-9  
 [97] cowplot_1.1.3         data.table_1.14.2     bitops_1.0-7         
[100] raster_3.5-15         R6_2.5.1              KernSmooth_2.23-20   
[103] gridExtra_2.3         parallelly_1.37.1     codetools_0.2-18     
[106] MASS_7.3-55           openssl_2.1.1         rjson_0.2.21         
[109] withr_3.0.0           mgcv_1.8-39           parallel_4.1.2       
[112] terra_1.5-21          grid_4.1.2            rpart_4.1.16         
[115] timeDate_3043.102     class_7.3-20          rmarkdown_2.26       
[118] sf_1.0-6              pROC_1.18.0           lubridate_1.9.3