Acoustic analysis

Cryptic species in Pseudasthenes

Author

Marcelo Araya-Salas

Published

October 30, 2024

Next steps

Add new recordings to EST (“Pc_monticola7.wav” “Pc_monticola8.wav”)
Fix importance plot for songs
Redo stats

1 Purpose

Measure acoustic structure of cape parrot contact calls
Compare acoustic dissimilarity between individuals from different localities

2 Report overview

Acoustic analysis
Statistical analysis

Load packages

Code

# knitr is require for creating html/pdf/word reports
# formatR is used for soft-wrapping code

# install/ load packages
sketchy::load_packages(
  packages = c(
    "knitr",
    "formatR",
    "viridis",
    "warbleR",
    # github = "maRce10/PhenotypeSpace",
    "ggplot2",
    "randomForest",
    "mlbench",
    "caret",
    "pbapply",
    "vegan",
    "umap",
    "maRce10/ohun",
    "corrplot",
    "Rraven"
  )
)

source("./scripts/MRM2.R")

2.1 Incorporate new data

Code

new_anns <- imp_raven(path = "./data/raw/", name.from.file = TRUE,
    ext.case = "lower", warbler.format = TRUE, all.data = TRUE)

check_sels(new_anns, path = "./data/raw/")


new_est <- selection_table(new_anns, path = "./data/raw/", extended = TRUE,
    by.song = "Senal")
new_est$`Begin File` <- new_est$`Begin Path` <- new_est$`End Path` <- new_est$`End File` <- new_est$`Delta Time (s)` <- new_est$`Peak Freq (Hz)` <- new_est$org.file <- NA

new_est$Poblacion <- "Pseudasthenes cactorum monticola"
new_est$Localidad <- "Arequipa"
new_est$`End File` <- new_est$Individuo <- substr(new_est$sound.files,
    0, 13)
new_est$`Subespecie correct` <- "Pseudasthenes cactorum monticola"

2.2 Format data

Code

coors_dat <- readxl::read_xlsx("./data/raw/Base_datos_Pseudasthenes_coordenadas.xlsx")

# dat <-
# readRDS('./data/processed/pool_params_Pseudasthenes.RDS')

est <- readRDS("./data/processed/tabla_extendida_por_Senal_Pseudasthenes_UnidoExcel_v02.RDS")


est$`Subespecie correct`[est$`Subespecie correct` == "Pseudasthenes cactorum cactorum / lachayensis"] <- "Pseudasthenes cactorum lachayensis"

est <- rbind(est, new_est)

est$subspecies <- gsub("Pseudasthenes cactorum ", "", est$`Subespecie correct`)

call_est <- est[grep("Canto", est$Senal, invert = TRUE), ]

song_est <- est[grep("Canto", est$Senal), ]

nrow(call_est)

[1] 276

Code

nrow(song_est)

[1] 100

Code

nrow(est)

[1] 376

Code

# dat <- dat[dat$Localidad != 'Ica', c('Localidad', 'M.peakf',
# 'M.mindom', 'M.maxdom', 'M.elm.duration', 'M.duration',
# 'I.mindom', 'I.maxdom', 'I.peakf', 'I.elm.duration',
# 'F.peakf', 'F.mindom', 'F.maxdom', 'F.elm.duration',
# 'F.duration', 'COMPLETO.peakf', 'COMPLETO.dfrange',
# 'COMPLETO.mindom', 'COMPLETO.maxdom', 'COMPLETO.elm.duration',
# 'COMPLETO.duration' , 'Nnotas')] dat$Localidad <-
# factor(dat$Localidad)

3 Descriptive stats

3.1 Calls

Recordings per subspecies

Code

as.data.frame(table(call_est$subspecies[!duplicated(call_est$`End File`)]))

Var1	Freq
cactorum	3
lachayensis	15
monticola	4

Calls per subspecies

Code

as.data.frame(table(call_est$subspecies[call_est$Nota == "COMPLETO"]))

Var1	Freq
cactorum	6
lachayensis	45
monticola	18

Recordings per locality

Code

as.data.frame(table(call_est$Localidad[!duplicated(call_est$`End File`)]))

Var1	Freq
Arequipa	4
Ica	3
Lima	15

Calls per localities

Code

as.data.frame(table(call_est$Localidad[call_est$Nota == "COMPLETO"]))

Var1	Freq
Arequipa	18
Ica	6
Lima	45

3.2 Songs

Recordings per subspecies

Code

as.data.frame(table(song_est$subspecies[!duplicated(song_est$`End File`)]))

Var1	Freq
cactorum	5
lachayensis	4
monticola	5

Songs per subspecies

Code

as.data.frame(table(song_est$subspecies[song_est$Nota == "COMPLETO"]))

Var1	Freq
cactorum	7
lachayensis	12
monticola	6

Songs per localities

Code

as.data.frame(table(song_est$Localidad[song_est$Nota == "COMPLETO"]))

Var1	Freq
Arequipa	8
Ica	5
Lima	12

4 Acoustic and statistical analyses

4.0.1 Measure acoustic structure

Code

call_acous_feat <- lapply(unique(call_est$Nota), function(x) {

    X <- spectro_analysis(call_est[call_est$Nota == x, ], wl = 200,
        ovlp = 70, parallel = 2)
    if (x == "I")
        names(X)[-1:-2] <- paste(names(X)[-1:-2], x, sep = "-") else {

        X <- X[, -1:-2]
        names(X) <- paste(names(X), x, sep = "-")
    }

    return(X)
})



call_acous_feat_df <- do.call(cbind, call_acous_feat)

call_acous_feat_df$locality <- call_est$Localidad[call_est$Nota ==
    "I"]
call_acous_feat_df$subspecies <- call_est$subspecies[call_est$Nota ==
    "I"]


head(call_acous_feat_df)

saveRDS(call_acous_feat_df, "./data/processed/acoustic_features_calls.RDS")

4.0.2 Random forest

Code

call_acous_feat_df <- readRDS("./data/processed/acoustic_features_calls.RDS")

# remove collinear
cor_dat <- call_acous_feat_df[, !names(call_acous_feat_df) %in% c("sound.files",
    "selec", "locality", "subspecies")]
cor_dat <- cor_dat[, !sapply(cor_dat, anyNA)]
cormat <- cor(cor_dat, use = "pairwise.complete.obs")
hc <- findCorrelation(cormat, cutoff = 0.9)  # putt any value as a 'cutoff' 
hc <- sort(hc)

cor_dat <- cor_dat[, -c(hc)]

names(cor_dat) <- gsub("-", ".", names(cor_dat))

cor_dat$subspecies <- as.factor(call_acous_feat_df$subspecies)

# Create data subsets
partition <- createDataPartition(y = cor_dat$subspecies, times = 1,
    p = 0.75, list = FALSE)

trainset <- cor_dat[partition, ]
testset <- cor_dat[-partition, ]

Code

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 2000,
        repeats = 100,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    subspecies ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "scale"
)

# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), factor(testset$subspecies))

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$subspecies ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  as.factor(subspecies) ~ .,
  data = cor_dat,  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry    # Number of variables tried for splitting at each node
  # nodesize = best_rf_model$,  # Minimum size of terminal nodes
  # maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)

rf_model_results <-
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        cor_dat = cor_dat,
        all_rf_model = all_rf_model
    )


saveRDS(
    rf_model_results,
    "./data/processed/random_forest_model_results_calls.RDS"
)

4.0.3 Checking performance on test data

Code

rf_model_results <- readRDS("./data/processed/random_forest_model_results_calls.RDS")


# print confusion matrix results
rf_model_results$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
      0.687500       0.432624       0.413379       0.889830       0.687500 
AccuracyPValue  McnemarPValue 
      0.618019       0.391625

Code

confusion_df <- rf_model_results$confusion_df_bb
# confusion_df$Prediction <- gsub('.model|P.tricarunculatus.',
# '', confusion_df$Prediction) confusion_df$Reference <-
# gsub('.model|P.tricarunculatus.', '', confusion_df$Reference)

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

4.0.4 Checking performance on the entire data

Code

cor_dat <- rf_model_results$cor_dat
pred_model <- rf_model_results$pred_model_bb

# USING ALL DATA
conf_mat <- confusionMatrix(predict(pred_model, cor_dat), cor_dat$subspecies)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) sum(cor_dat$subspecies ==
    x))

conf_df$proportion <- conf_df$Freq/conf_df$total

# saveRDS(list(pred_model_bb = pred_model, conf_mat_bb =
# conf_mat, confusion_df_bb = conf_df, testset_bb = testset),
# './data/processed/random_forest_model_results.RDS')

rf_model_results_test <- list(pred_model_bb = pred_model, conf_mat_bb = conf_mat,
    confusion_df_bb = conf_df, testset_bb = testset)

# print confusion matrix results
rf_model_results_test$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
    0.72463768     0.54855372     0.60384919     0.82537405     0.65217391 
AccuracyPValue  McnemarPValue 
    0.12672396     0.00153085

Code

confusion_df <- rf_model_results_test$confusion_df_bb
# confusion_df$Prediction <- gsub('.model|P.tricarunculatus.',
# '', confusion_df$Prediction) confusion_df$Reference <-
# gsub('.model|P.tricarunculatus.', '', confusion_df$Reference)

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

4.0.5 Variable importance

Code

# importance
gbmImp <- varImp(pred_model, scale = FALSE)
gbmImp

rf variable importance

  only 20 most important variables shown (out of 83)

                     Overall
meanfreq.COMPLETO      0.683
time.median.F          0.655
duration.M             0.485
time.ent.M             0.447
modindx.M              0.424
duration.F             0.391
entropy.M              0.387
freq.Q25.COMPLETO      0.354
modindx.COMPLETO       0.297
freq.median.COMPLETO   0.290
freq.Q75.F             0.273
mindom.COMPLETO        0.263
time.Q75.F             0.224
time.Q75.M             0.210
meanpeakf.COMPLETO     0.203
time.median.M          0.190
startdom.M             0.166
freq.IQR.M             0.162
dfrange.COMPLETO       0.156
skew.M                 0.152

Code

plot(gbmImp, top = 20)

4.0.6 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], location = cor_dat$subspecies)

umap_df$pred.subspecies <- predict(object = rf_model_results$all_rf_model,
    cor_dat)

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_localities_calls.csv",
    row.names = FALSE)

Code

umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_localities_calls.csv")

# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location,
    fill = location)) + geom_point(size = 4) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "UMAP1",
    y = "UMAP2", color = "subspecies", fill = "subspecies")

gg_umap_loc

4.0.6.1 Projecting random forest proximity with PCA

Code

pca_result <- prcomp(rf_model_results$all_rf_model$proximity)

# Create a data frame with the UMAP results
pca_df <- data.frame(PC1 = pca_result$x[, 1], PC2 = pca_result$x[,
    2], PC3 = pca_result$x[, 3], location = cor_dat$subspecies)

pca_df$pred.subspecies <- predict(object = rf_model_results$all_rf_model,
    cor_dat)

# Create a scatterplot
gg_pca_loc <- ggplot(pca_df, aes(x = PC1, y = PC2, color = location,
    fill = location)) + geom_point(size = 4) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "PC1", y = "PC2",
    color = "subspecies", fill = "subspecies")

gg_pca_loc

Code

gg_pca_loc2 <- ggplot(pca_df, aes(x = PC1, y = PC3, color = location,
    fill = location)) + geom_point(size = 4) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "PC1", y = "PC3",
    color = "subspecies", fill = "subspecies")

gg_pca_loc2

4.0.7 Measure acoustic structure

Code

song_acous_feat <- lapply(unique(song_est$Nota), function(x) {

    X <- spectro_analysis(song_est[song_est$Nota == x, ], wl = 200,
        ovlp = 70, parallel = 2)
    if (x == "I")
        names(X)[-1:-2] <- paste(names(X)[-1:-2], x, sep = "-") else {

        X <- X[, -1:-2]
        names(X) <- paste(names(X), x, sep = "-")
    }

    return(X)
})

song_acous_feat_df <- do.call(cbind, song_acous_feat)

song_acous_feat_df$locality <- song_est$Localidad[song_est$Nota ==
    "I"]
song_acous_feat_df$subspecies <- song_est$subspecies[song_est$Nota ==
    "I"]


head(song_acous_feat_df)

saveRDS(song_acous_feat_df, "./data/processed/acoustic_features_songs.RDS")

4.0.8 Random forest

Code

song_acous_feat_df <- readRDS("./data/processed/acoustic_features_songs.RDS")

# remove collinear
cor_dat <- song_acous_feat_df[, !names(song_acous_feat_df) %in% c("sound.files",
    "selec", "locality", "subspecies")]

names(cor_dat) <- gsub("-", ".", names(cor_dat))


cor_dat <- cor_dat[, !sapply(cor_dat, anyNA)]
cormat <- cor(cor_dat, use = "pairwise.complete.obs")
hc <- findCorrelation(cormat, cutoff = 0.9)  # putt any value as a 'cutoff' 
hc <- sort(hc)

cor_dat <- cor_dat[, -c(hc)]

head(cor_dat)

sd.I	freq.median.I	freq.Q25.I	freq.Q75.I	freq.IQR.I	time.Q25.I	kurt.I	time.ent.I	sfm.I	meandom.I	mindom.I	maxdom.I	dfrange.I	startdom.I	enddom.I	dfslope.I	meanpeakf.I	duration.M	freq.median.M	freq.Q25.M	freq.IQR.M	time.median.M	time.Q25.M	skew.M	time.ent.M	sfm.M	mindom.M	maxdom.M	dfrange.M	modindx.M	startdom.M	enddom.M	dfslope.M	meanpeakf.M	duration.F	freq.median.F	freq.Q25.F	freq.IQR.F	kurt.F	sp.ent.F	time.ent.F	entropy.F	sfm.F	meandom.F	mindom.F	maxdom.F	dfrange.F	modindx.F	startdom.F	enddom.F	dfslope.F	meanpeakf.F	meanfreq.COMPLETO	freq.Q25.COMPLETO	freq.IQR.COMPLETO	time.IQR.COMPLETO	kurt.COMPLETO	time.ent.COMPLETO	sfm.COMPLETO	meandom.COMPLETO	mindom.COMPLETO	maxdom.COMPLETO	modindx.COMPLETO	startdom.COMPLETO	enddom.COMPLETO	dfslope.COMPLETO	meanpeakf.COMPLETO
1.451843	5.33136	4.71238	6.38362	1.671240	0.006071	4.69996	0.762849	0.503807	5.29200	4.74075	6.06375	1.323	6.06375	4.74075	-81.6997	5.40225	0.012651	4.35755	4.19976	0.473352	0.007592	0.005061	3.176594	0.740863	0.379431	4.07925	5.40225	1.323	1.00000	5.40225	4.07925	-104.5756	4.29975	0.020242	4.41674	3.82382	1.581118	14.39336	0.894639	0.857600	0.767242	0.511963	4.70400	4.29975	5.84325	1.5435	2.71429	5.18175	4.52025	-32.6799	4.52025	4.61947	3.96704	1.14713	2.51661	25.87454	0.901155	0.445954	4.36940	1.43325	8.93025	91.0882	6.06375	4.52025	-0.291744	4.29975
1.413290	4.97235	3.98066	6.34546	2.364797	0.003762	2.39394	0.780811	0.797688	4.95000	3.72000	5.88000	2.160	5.64000	5.40000	-18.2410	5.64000	0.018607	4.67104	3.97249	1.773230	0.010148	0.006765	1.006360	0.805626	0.597710	4.20000	6.36000	2.160	2.33333	4.44000	4.20000	-12.8983	4.44000	0.020754	4.15965	3.67831	1.203341	6.32277	0.928581	0.827942	0.768811	0.658997	4.12000	3.00000	4.68000	1.6800	2.71429	4.68000	3.96000	-34.6921	4.20000	4.75432	3.98602	1.61328	2.57086	9.69421	0.902993	0.547594	4.44784	2.28000	7.08000	216.4000	5.16000	2.76000	-0.415964	4.44000
1.232368	5.25811	4.87599	6.08602	1.210026	0.005243	6.68887	0.815834	0.553800	5.18400	4.68000	6.12000	1.440	4.92000	4.92000	0.0000	4.92000	0.014671	4.85142	3.82598	2.050883	0.009167	0.003667	0.388957	0.819423	0.708228	3.24000	5.88000	2.640	2.09091	4.44000	4.20000	-16.3589	4.44000	0.026479	6.11108	5.20403	1.738503	5.82817	0.926448	0.916417	0.849013	0.613309	6.10667	5.16000	7.32000	2.1600	3.00000	5.88000	5.16000	-27.1911	6.36000	5.14902	4.23968	1.83649	4.81603	7.87507	0.908760	0.521551	4.76462	1.56000	9.72000	224.3529	4.92000	5.40000	0.046501	4.44000
0.356876	4.42406	4.30945	4.65329	0.343844	0.001771	6.58879	0.805504	0.249210	4.64727	4.56000	5.04000	0.480	5.04000	5.04000	0.0000	4.56000	0.014169	3.96940	3.26115	1.274850	0.006715	0.002985	1.898133	0.892998	0.612120	2.64000	4.56000	1.920	3.25000	4.08000	4.56000	33.8761	4.08000	0.018607	4.43378	3.78647	1.564330	11.87179	0.937028	0.925921	0.867615	0.720422	4.72000	4.08000	5.52000	1.4400	2.66667	5.52000	4.56000	-51.5933	4.56000	4.19800	3.21353	1.76863	2.76715	13.98839	0.910768	0.603869	3.45788	1.68000	7.92000	659.3846	4.56000	4.56000	0.000000	4.56000
0.298200	4.73430	4.56956	4.89905	0.329483	0.004551	4.15210	0.835565	0.237394	5.09647	4.56000	6.00000	1.440	6.00000	5.04000	-79.0443	5.04000	0.016193	4.31981	3.94475	0.562589	0.005151	0.002207	2.358714	0.889456	0.549513	3.12000	5.52000	2.400	3.00000	5.04000	4.56000	-29.6416	4.56000	0.034411	3.30212	2.89317	0.701056	4.33306	0.876227	0.969471	0.849477	0.376832	3.25846	1.68000	3.60000	1.9200	4.25000	3.12000	3.60000	13.9490	3.60000	3.86812	3.31257	1.35822	4.32634	9.69180	0.912347	0.568584	2.44741	1.20000	6.00000	941.8000	1.20000	4.08000	0.375037	4.56000
1.190522	5.45789	4.75876	6.23928	1.480520	0.013676	2.92154	0.851060	0.712503	5.06118	2.52000	6.84000	4.320	5.64000	5.88000	9.8706	5.64000	0.020424	4.90725	4.56354	0.982018	0.009423	0.006282	2.282513	0.890094	0.645359	4.68000	7.32000	2.640	2.27273	4.68000	4.92000	11.7507	4.92000	0.017507	5.06532	4.94967	2.024016	10.72274	0.852082	0.862406	0.734841	0.526528	4.89818	4.44000	5.16000	0.7200	1.33333	4.92000	4.44000	-27.4183	4.92000	4.93364	4.27040	1.37461	1.15550	19.08989	0.897020	0.627395	3.77114	1.80000	8.04000	295.5385	1.80000	2.76000	0.285813	4.92000

Code

cor_dat$subspecies <- as.factor(song_acous_feat_df$subspecies)

table(cor_dat$subspecies)


   cactorum lachayensis   monticola 
          7          12           6

Code

# Create data subsets
partition <- createDataPartition(y = cor_dat$subspecies, times = 1,
    p = 0.75, list = FALSE)

trainset <- cor_dat[partition, ]
testset <- cor_dat[-partition, ]

Code

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 2000,
        repeats = 100,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    subspecies ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "scale"
)

# save confusion matrix
conf_mat <- confusionMatrix(predict(pred_model, testset), factor(testset$subspecies, levels = unique(testset$subspecies)))

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$subspecies ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  as.factor(subspecies) ~ .,
  data = cor_dat,  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry    # Number of variables tried for splitting at each node
  # nodesize = best_rf_model$,  # Minimum size of terminal nodes
  # maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)

rf_model_results <-
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    )


saveRDS(
    rf_model_results,
    "./data/processed/random_forest_model_results_songs.RDS"
)

4.0.9 Checking performance on test data

Code

rf_model_results <- readRDS("./data/processed/random_forest_model_results_songs.RDS")


# print confusion matrix results
rf_model_results$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
      1.000000       1.000000       0.478176       1.000000       0.600000 
AccuracyPValue  McnemarPValue 
      0.077760            NaN

Code

confusion_df <- rf_model_results$confusion_df_bb
# confusion_df$Prediction <- gsub('.model|P.tricarunculatus.',
# '', confusion_df$Prediction) confusion_df$Reference <-
# gsub('.model|P.tricarunculatus.', '', confusion_df$Reference)

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

4.0.10 Checking performance on the entire data

Code

pred_model <- rf_model_results$all_rf_model

# USING ALL DATA
conf_mat <- confusionMatrix(predict(pred_model, cor_dat), cor_dat$subspecies)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <- sapply(conf_df$Reference, function(x) sum(cor_dat$subspecies ==
    x))

conf_df$proportion <- conf_df$Freq/conf_df$total

# saveRDS(list(pred_model_bb = pred_model, conf_mat_bb =
# conf_mat, confusion_df_bb = conf_df, testset_bb = testset),
# './data/processed/random_forest_model_results.RDS')

rf_model_results_test <- list(pred_model_bb = pred_model, conf_mat_bb = conf_mat,
    confusion_df_bb = conf_df, testset_bb = testset)

# print confusion matrix results
rf_model_results_test$conf_mat_bb$overall

      Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
   1.00000e+00    1.00000e+00    8.62815e-01    1.00000e+00    4.80000e-01 
AccuracyPValue  McnemarPValue 
   1.07407e-08            NaN

Code

confusion_df <- rf_model_results_test$confusion_df_bb
# confusion_df$Prediction <- gsub('.model|P.tricarunculatus.',
# '', confusion_df$Prediction) confusion_df$Reference <-
# gsub('.model|P.tricarunculatus.', '', confusion_df$Reference)

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

4.0.11 Variable importance

Code

# importance
gbmImp <- varImp(pred_model, scale = FALSE)
gbmImp

	Overall
sd.I	0.183035
freq.median.I	0.092123
freq.Q25.I	0.266699
freq.Q75.I	0.172164
freq.IQR.I	0.262791
time.Q25.I	0.176199
kurt.I	0.166116
time.ent.I	0.236473
sfm.I	0.141004
meandom.I	0.114293
mindom.I	0.197284
maxdom.I	0.161212
dfrange.I	0.222982
startdom.I	0.154037
enddom.I	0.140063
dfslope.I	0.166555
meanpeakf.I	0.134893
duration.M	0.281443
freq.median.M	0.331528
freq.Q25.M	0.356906
freq.IQR.M	0.265460
time.median.M	0.317432
time.Q25.M	0.137375
skew.M	0.095538
time.ent.M	0.429136
sfm.M	0.264533
mindom.M	0.197773
maxdom.M	0.550380
dfrange.M	0.365828
modindx.M	0.401957
startdom.M	0.371078
enddom.M	0.265095
dfslope.M	0.459403
meanpeakf.M	0.242443
duration.F	0.188369
freq.median.F	0.241376
freq.Q25.F	0.165176
freq.IQR.F	0.259426
kurt.F	0.196289
sp.ent.F	0.156933
time.ent.F	0.206783
entropy.F	0.112629
sfm.F	0.131789
meandom.F	0.403378
mindom.F	0.106897
maxdom.F	0.168016
dfrange.F	0.266894
modindx.F	0.121617
startdom.F	0.213325
enddom.F	0.127532
dfslope.F	0.154502
meanpeakf.F	0.358626
meanfreq.COMPLETO	0.315679
freq.Q25.COMPLETO	0.249872
freq.IQR.COMPLETO	0.268306
time.IQR.COMPLETO	0.297259
kurt.COMPLETO	0.097717
time.ent.COMPLETO	0.229971
sfm.COMPLETO	0.221108
meandom.COMPLETO	0.238710
mindom.COMPLETO	0.152013
maxdom.COMPLETO	0.272782
modindx.COMPLETO	0.346063
startdom.COMPLETO	0.131592
enddom.COMPLETO	0.118977
dfslope.COMPLETO	0.121959
meanpeakf.COMPLETO	0.263124

Code

plot(gbmImp, top = 20)

Warning in plot.window(xlim, ylim, log, ...): "top" is not a graphical
parameter

Warning in axis(side = side, at = at, labels = labels, ...): "top" is not a
graphical parameter

Warning in title(xlab = xlab, ylab = ylab, ...): "top" is not a graphical
parameter

Warning in plot.xy(xy.coords(x, y), type = type, ...): "top" is not a graphical
parameter

4.0.12 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], location = cor_dat$subspecies)

umap_df$pred.subspecies <- predict(object = rf_model_results$all_rf_model,
    cor_dat)

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_localities_songs.csv",
    row.names = FALSE)

Code

umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_localities_songs.csv")

# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location,
    fill = location)) + geom_point(size = 4) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "UMAP1",
    y = "UMAP2", color = "subspecies", fill = "subspecies")

gg_umap_loc

4.0.13 Projecting random forest proximity with PCA

Code

pca_result <- prcomp(rf_model_results$all_rf_model$proximity)

# Create a data frame with the UMAP results
pca_df <- data.frame(PC1 = pca_result$x[, 1], PC2 = pca_result$x[,
    2], location = cor_dat$subspecies)

pca_df$pred.subspecies <- predict(object = rf_model_results$all_rf_model,
    cor_dat)

# Create a scatterplot
gg_pca_loc <- ggplot(pca_df, aes(x = PC1, y = PC2, color = location,
    fill = location)) + geom_point(size = 4) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "PC1", y = "PC2",
    color = "subspecies", fill = "subspecies")

gg_pca_loc

Code

est <- readRDS("./data/processed/tabla_extendida_por_Senal_Pseudasthenes_UnidoExcel.RDS")

est <- resample_est(est, pb = FALSE)
call_est <- est[grep("llamado", est$Senal, ignore.case = TRUE), ]

# spectrograms(call_est, ovlp = 90, flim = c(1, 10), collevels =
# seq(-110, 0, 5), pal = reverse.gray.colors.1, dest.path =
# './data/processed/spectrograms/calls', by.song =
# 'sound.files', xl = 3, wl = 200, parallel = 20, sel.labels =
# 'Nota')

song_est <- est[grep("Canto", est$Senal, ignore.case = TRUE), ]

# spectrograms(song_est, ovlp = 90, flim = c(1, 18), collevels =
# seq(-110, 0, 5), pal = reverse.gray.colors.1, dest.path =
# './data/processed/spectrograms/songs', by.song =
# 'sound.files', xl = 5, wl = 200, parallel = 20, sel.labels =
# 'Nota')

# sample size call
table(call_est$Localidad[!duplicated(call_est$org.file)])

# calls per individual and site
table(call_est$Localidad[!duplicated(call_est$sound.files)], call_est$org.file[!duplicated(call_est$sound.files)])

# song
table(song_est$Localidad[!duplicated(song_est$org.file)])


# songs per individual and site
table(song_est$Localidad[!duplicated(song_est$sound.files)], song_est$org.file[!duplicated(song_est$sound.files)])

# get med element
m_call_est <- call_est[call_est$Nota == "M", ]


xc_m_call <- cross_correlation(m_call_est, wl = 200, ovlp = 70)

mds_m_call <- cmdscale(1 - xc_m_call, k = 2)


# medidas - cross-correlation between M notes, F notes and I
# notes, - difference in peak frequency between first and last
# note - number of notes per call/song - duration - mean element
# values for acoustic features divided by note type (I, M, F)

Code

# coordinates
coors <- readxl::read_excel("./data/processed/Base_datos_Pseudasthenes_coordenadas.xlsx")

cll_elms_est <- call_est[call_est$Nota != "COMPLETO", ]
cll_i_est <- call_est[call_est$Nota == "I", ]


out <- lapply(c("I", "M", "F", "COMPLETO"), function(x) {

    cll_elms_sp <- spectro_analysis(call_est[call_est$Nota == x, ])
    names(cll_elms_sp)[-c(1, 2)] <- paste0(names(cll_elms_sp)[-c(1,
        2)], "-", x)
    return(cll_elms_sp)
})


cll_sp <- cbind(out[[1]], out[[2]][, -c(1, 2)], out[[3]][, -c(1, 2)],
    dur_complete = out[[4]]$`duration-COMPLETO`)
cll_sp$diff_IF <- cll_sp$`meanpeakf-I` - cll_sp$`meanpeakf-F`

# remove 0 var parameters
cll_sp <- cll_sp[, !names(cll_sp) %in% names(which(sapply(cll_sp[,
    -c(1, 2)], sd) == 0))]

pca <- prcomp(cll_sp[, -c(1, 2)], scale. = TRUE)
cll_sp$pc1 <- pca$x[, 1]
cll_sp$pc2 <- pca$x[, 2]

cll_sp$org.sound.file <- sapply(strsplit(cll_sp$sound.files, ".wav"),
    "[[", 1)


coors$file <- gsub(".wav$", "", coors$`Begin File`)


cll_sp$lat <- sapply(cll_sp$org.sound.file, function(x) coors$Latitud[coors$file ==
    x][1])
cll_sp$lon <- sapply(cll_sp$org.sound.file, function(x) coors$Longitud[coors$file ==
    x][1])

cll_sp$pob <- sapply(cll_sp$org.sound.file, function(x) coors$Poblacion[coors$file ==
    x][1])
cll_sp$locality <- sapply(cll_sp$org.sound.file, function(x) coors$Localidad[coors$file ==
    x][1])


subsp_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = cll_sp$pob))

loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = cll_sp$locality))
acu_dist <- dist(cll_sp[, c("pc1", "pc2")])
geo_dist <- dist(cll_sp[, c("lat", "lon")])


# regression models set data format
rect_var <- cbind(acu_dist = as.dist(scale(acu_dist)), geo_dist = as.dist(scale(geo_dist)),
    loc_member_binary = loc_member_binary, subsp_member_binary = subsp_member_binary)

rect_var <- cbind(rect_var, res = residuals(lm(rect_var[, "loc_member_binary"] ~
    rect_var[, "geo_dist"])))

head(rect_var)
apply(rect_var, 2, var)


nperm <- 1000

# (mod_dialect <- MRM2(formula = acu_dist ~ loc_member_binary +
# geo_dist + subsp_member_binary, nperm = nperm, mat =
# rect_var[, c('acu_dist','loc_member_binary', 'geo_dist',
# 'subsp_member_binary')]))

# (mod_dialect <- MRM2(formula = acu_dist ~ loc_member_binary +
# geo_dist, nperm = nperm, mat = rect_var[,
# c('acu_dist','loc_member_binary', 'geo_dist')]))


(mod_dialect <- MRM2(formula = acu_dist ~ loc_member_binary, nperm = nperm,
    mat = rect_var[, c("acu_dist", "loc_member_binary")]))

# (mod_dialect <- MRM2(formula = acu_dist ~ subsp_member_binary
# , nperm = nperm, mat = rect_var[, c('acu_dist',
# 'subsp_member_binary')]))


# (mod_dialect <- MRM2(formula = acu_dist ~ geo_dist, nperm =
# nperm, mat = rect_var[, c('acu_dist', 'geo_dist')]))

# cor(rect_var)

# corrplot.mixed(cor(rect_var), upper= 'ellipse')

# (mod_dialect <- MRM2(formula = acu_dist ~ geo_dist, nperm =
# nperm, mat = rect_var[, c('acu_dist', 'geo_dist')]))

# (mod_dialect <- MRM2(formula = acu_dist ~ res, nperm = nperm,
# mat = rect_var[, c('acu_dist', 'res')]))

5 Statistical analysis

Two approaches:

Multiple Regression on distance Matrices
Partial Mantel test

5.1 Multiple Regression on distance Matrices

Model:
\[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]
Response values scaled to make effect sizes comparable across models
Locality was coded as pairwise binary matrix in which 0 means that calls in a dyad belong to the same locality and 1 means calls belong to different locality

Code

xcorr <- readRDS("./data/processed/cross_correlation_matrix.RDS")

xcorr_mds <- readRDS("./data/processed/cross_correlation_MDS.RDS")


dat <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls_CURATED.csv")

# add data from second location
dat_loc2 <- read.csv("./data/raw/consolidated_sound_files_CPV_contact_calls _UPDATED.csv")

dat <- dat[grepl("cape", dat$species, ignore.case = T) & !is.na(dat$Location.for.cluster) &
    !is.na(dat$Longitude.for.cluster) & !is.na(dat$Latittude.for.cluster),
    ]


dat$sampling.site <- sapply(dat$new_name, function(x) dat_loc2$Sampling.sites[dat_loc2$New_Name ==
    x])
dat$species <- "Cape parrot"

sub_xcorr <- xcorr[rownames(xcorr) %in% dat$new_name, colnames(xcorr) %in%
    dat$new_name]
sub_xcorr_mds <- xcorr_mds[rownames(xcorr_mds) %in% dat$new_name,
    ]

sub_dat <- dat[dat$new_name %in% rownames(sub_xcorr_mds), ]
sub_dat <- sub_dat[match(rownames(sub_xcorr), sub_dat$new_name), ]

location <- sapply(rownames(sub_xcorr), function(x) sub_dat$Location.for.cluster[sub_dat$new_name ==
    x])
sampling.site <- sapply(rownames(sub_xcorr), function(x) sub_dat$sampling.site[sub_dat$new_name ==
    x])

Code

loc_bi_tri <- as.dist(binary_triangular_matrix(group = location))

samp_bi_tri <- as.dist(binary_triangular_matrix(group = sampling.site))

geo_dist <- dist(sub_dat[, c("Latittude.for.cluster", "Longitude.for.cluster")])

rect_var <- cbind(as.dist(1 - sub_xcorr), geo_dist, loc_bi_tri, samp_bi_tri)

colnames(rect_var) <- c("fourier_xc", "geo_distance", "location",
    "sampling.site")

rect_var <- rect_var[!is.infinite(rect_var[, 1]), ]

xc_mod <- MRM2(formula = fourier_xc ~ geo_distance + location + sampling.site,
    nperm = 10000, mat = rect_var)

saveRDS(xc_mod, "./data/processed/matrix_correlation_fourier_cross_correlation.RDS")

Code

readRDS("./data/processed/matrix_correlation_fourier_cross_correlation.RDS")

6 Simulation

6.1 Simulate data with different patterns of geographic variation in call structure

26 localities (10 individuals each) along a longitudinal range
Simulated patterns following Podos & Warren (2007)

Code

# seed to make it reproducible
set.seed(123)

# number of groups
n_locality <- length(unique(sub_dat$Location.for.cluster))

# number of individuals per group
n_indiv <- table(sub_dat$Location.for.cluster)

# create locality labels
localities <- sample(LETTERS[1:n_locality])

# simulated individuals per group
locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
    each = n_indiv[x])))

# simulate geographic coordinates along a gradient
lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
    rnorm(n = length(locality_label), sd = 0.1)
lat <- rnorm(n = length(locality_label), sd = 0.1)


# put all together in a data frame
data <- data.frame(locality = locality_label, lat, lon, color = viridis(n_locality)[as.numeric(as.factor(locality_label))])

# add sampling site (broader geographic mozaic)
data <- data[order(data$lon), ]

data$site_num <- 2

for (i in 2:nrow(data)) {

    data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
        data$site_num[i - 1] else data$site_num[i - 1] + 1

}

# make last location the same as th previous one
data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
    1

data$sampling.site <- sample(letters)[floor(data$site_num/2)]

# plot localities along longitude
plot(data[, c("lon", "lat")], col = data$color, cex = 1.8, ylim = range(data$lat) +
    c(-0.2, 0.2), pch = as.numeric(as.factor(data$sampling.site)))

lon_loc <- tapply(data$lon, data$locality, mean)
text(x = lon_loc, y = rep(0.3, n_locality), labels = names(lon_loc),
    cex = 2.5)

abline(v = 1:30 - 0.5)

6.2 Simulate acoustic variation:

Clinal: acoustic structure vector increases with longitude
Dialect-type: acoustic structure vector similar within a locality but varies randomly between neighnoring localities
- 2 levels: locality and sampling site
- Sampling site create by grouping 2 adjacent localities with the same label
Random: acoustic structure vector varies randomly between individuals regardless of locality or longitude

Code

# seed to make it reproducible
set.seed(123)

# simulate acoustic structure vector clinal variation
data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

# dialect type variation
data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
    sd = 0.2)

data$dialect_site <- as.numeric(as.factor(data$sampling.site)) + rnorm(n = nrow(data),
    sd = 0.2)

# random variation
data$random <- rnorm(n = nrow(data), sd = 0.2)

# sort so lines look good in plot
data <- data[order(data$lon), ]

lng_data <- stack(data[, c("clinal", "dialect", "dialect_site", "random")])

lng_data$locality <- data$locality
lng_data$lat <- data$lat
lng_data$lat <- data$lon
lng_data$lon_seq <- 1:nrow(data)

ggplot(lng_data, aes(x = lon_seq, y = values)) + geom_line(color = "#3E4A89FF",
    linewidth = 1.6) + facet_wrap(~ind, scales = "free_y", nrow = 3) +
    theme_classic(base_size = 25) + labs(x = "Longitude", y = "Acoustic feature")

6.3 Stats on simulated data

Model:
\[\begin{align*} Acoustic\ dissimilarity &\sim locality + sampling\ site + geographic\ distance \end{align*}\]

Predict acoustic structure based on geographic distance and locality membership using multiple Regression on distance matrices

Data is z-transformed so all predictors have similar variance and effect sizes are comparable.

Variance for each predictor:

Code

# create distance matrices
loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$sampling.site))
clinal_dist <- dist(data$clinal)
dialect_loc_dist <- dist(data$dialect)
dialect_site_dist <- dist(data$dialect_site)
random_dist <- dist(data$random)
geo_dist <- dist(data[, c("lat", "lon")])

# regression models set data format
rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
    dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
    geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
    site_member_binary = site_member_binary)

apply(rect_var, 2, var)

6.3.1 Dialect-type variation model at locality level

Code

nperm <- 100

# model predicting dialect variation
(mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

6.3.2 Dialect-type variation model at sampling site level

Code

nperm <- 100

# model predicting dialect variation
(mod_dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
    loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
    c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")]))

6.3.3 Clinal variation model

Code

# model predicting clinal variation
(mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

6.3.4 Random variation model

Code

# model predicting random variation
(mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
    site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
    "geo_dist", "loc_member_binary", "site_member_binary")]))

6.3.5 Combined results

Code

mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect, mod_dialect_site = mod_dialect_site,
    mod_random = mod_random)

names(mods) <- c("Clinal", "Dialect locality", "Dialect site", "Random")

estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
    Y <- data.frame(mods[[x]]$coef[-1, ])
    Y$rel_coef <- Y[, 1]/max(Y[, 1])
    Y$mod <- names(mods)[x]
    Y$predictor <- rownames(Y)
    names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
    return(Y)
}))

estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = round(coef,
    3), color = signif)) + scale_color_manual(values = c("black",
    "gray")) + labs(x = "", y = "", color = "P value") + theme_classic() +
    theme(axis.text.x = element_text(color = "black", size = 11, angle = 30,
        vjust = 0.8, hjust = 0.8))

6.4 Replicate simulation 100 times

Code

nperm <- 100
reps <- 100
rep_models <- pbreplicate(reps, cl = 1, expr = {
    # number of groups
    n_locality <- length(unique(sub_dat$Location.for.cluster))

    # number of individuals per group
    n_indiv <- table(sub_dat$Location.for.cluster)

    # create locality labels
    localities <- sample(LETTERS[1:n_locality])

    # simulated individuals per group
    locality_label <- unlist(sapply(1:length(localities), function(x) rep(localities[x],
        each = n_indiv[x])))

    # simulate geographic coordinates along a gradient
    lon <- unlist(sapply(1:n_locality, function(x) rep(x, each = n_indiv[x]))) +
        rnorm(n = length(locality_label), sd = 0.1)
    lat <- rnorm(n = length(locality_label), sd = 0.1)

    # locality_label <- localities[n_indivs]

    # put all together in a data frame
    data <- data.frame(locality = locality_label, lat, lon)

    # add sampling site (broader geographic mozaic)
    data <- data[order(data$lon), ]

    data$site_num <- 2

    for (i in 2:nrow(data)) {

        data$site_num[i] <- if (data$locality[i - 1] == data$locality[i])
            data$site_num[i - 1] else data$site_num[i - 1] + 1

    }

    # make last location the same as the previous one
    data$site_num[data$locality == data$locality[which.max(data$lon)]] <- data$site_num[which.max(data$lon)] -
        1

    data$sampling.site <- sample(letters)[floor(data$site_num/2)]

    # simulate acoustic structure vector clinal variation
    data$clinal <- data$lon + rnorm(n = nrow(data), sd = 0.2)

    # dialect type variation locality level
    data$dialect <- as.numeric(as.factor(data$locality)) + rnorm(n = nrow(data),
        sd = 0.2)

    # dialect type variation
    data$dialect_site <- as.numeric(as.factor(data$sampling.site)) +
        rnorm(n = nrow(data), sd = 0.2)

    # random variation
    data$random <- rnorm(n = nrow(data), sd = 0.2)

    loc_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$locality))
    site_member_binary <- as.dist(PhenotypeSpace::binary_triangular_matrix(group = data$sampling.site))
    clinal_dist <- dist(data$clinal)
    dialect_loc_dist <- dist(data$dialect)
    dialect_site_dist <- dist(data$dialect_site)
    random_dist <- dist(data$random)
    geo_dist <- dist(data[, c("lat", "lon")])

    # regression models set data format
    rect_var <- cbind(clinal_dist = as.dist(scale(clinal_dist)), dialect_loc_dist = as.dist(scale(dialect_loc_dist)),
        dialect_site_dist = as.dist(scale(dialect_site_dist)), random_dist = as.dist(scale(random_dist)),
        geo_dist = as.dist(scale(geo_dist)), loc_member_binary = loc_member_binary,
        site_member_binary = site_member_binary)

    # model predicting dialect variation
    mod_dialect <- MRM2(formula = dialect_loc_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("dialect_loc_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting clinal variation
    mod_clinal <- MRM2(formula = clinal_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("clinal_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    # # model predicting dialect site variation
    dialect_site <- MRM2(formula = dialect_site_dist ~ geo_dist +
        loc_member_binary + site_member_binary, nperm = nperm, mat = rect_var[,
        c("dialect_site_dist", "geo_dist", "loc_member_binary", "site_member_binary")])

    # model predicting random variation
    mod_random <- MRM2(formula = random_dist ~ geo_dist + loc_member_binary +
        site_member_binary, nperm = nperm, mat = rect_var[, c("random_dist",
        "geo_dist", "loc_member_binary", "site_member_binary")])

    mods <- list(mod_clinal = mod_clinal, mod_dialect = mod_dialect,
        dialect_site = dialect_site, mod_random = mod_random)

    names(mods) <- c("Clinal", "Dialect locality", "Dialect site",
        "Random")

    estimates <- do.call(rbind, lapply(seq_along(mods), function(x) {
        Y <- data.frame(mods[[x]]$coef[-1, ])
        Y$rel_coef <- Y[, 1]/max(Y[, 1])
        Y$mod <- names(mods)[x]
        Y$predictor <- rownames(Y)
        names(Y) <- c("coef", "p", "rel_coef", "model", "predictor")
        return(Y)
    }))

    estimates
}, simplify = FALSE)

coeffs <- do.call(cbind, lapply(rep_models, function(x) x[, 1]))
ps <- do.call(cbind, lapply(rep_models, function(x) x[, 2]))

estimates <- rep_models[[1]]
estimates$coef <- rowMeans(coeffs)
estimates$sd <- apply(coeffs, 1, sd)
estimates$p <- 1 - (apply(ps, 1, function(x) sum(x < 0.05))/reps)
estimates$rel_coef <- estimates[, 1]/max(estimates[, 1])
estimates$rel_coef <- ifelse(estimates$p < 0.05, estimates$rel_coef,
    0)
estimates$signif <- ifelse(estimates$p < 0.05, "p < 0.05", "p >= 0.05")

estimates$coef_sd <- paste0(round(estimates$coef, 3), "\n(", round(estimates$sd,
    4), ")")

saveRDS(estimates, "./data/processed/estimates_for_replicated_simulation_dialects.RDS")

Code

estimates <- readRDS("./data/processed/estimates_for_replicated_simulation_dialects.RDS")

ggplot(estimates, aes(x = predictor, y = model, fill = rel_coef)) +
    geom_tile() + coord_equal() + scale_fill_gradient2(low = viridis(10)[3],
    high = viridis(10)[7], guide = "none") + geom_text(aes(label = coef_sd,
    color = signif)) + scale_color_manual(values = c("black", "gray")) +
    labs(x = "", y = "", color = "P value") + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

6.5 Partial Mantel test

Evaluate association between acoustic dissimilarity and locality membership accounting for the effect of geographic distance

6.5.1 By location

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

mantel_part <- mantel.partial(ydis = xc_dist, xdis = loc_bi_tri, zdis = geo_dist)

saveRDS(mantel_part, "./data/processed/partial_mantel_correlation_cross_correlation.RDS")

Code

readRDS("./data/processed/partial_mantel_correlation_cross_correlation.RDS")

6.5.2 By sampling site

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)

mantel_part <- mantel.partial(ydis = xc_dist, xdis = samp_bi_tri,
    zdis = geo_dist)

saveRDS(mantel_part, "./data/processed/partial_mantel_correlation_cross_correlation_by_sampling_site.RDS")

Code

readRDS("./data/processed/partial_mantel_correlation_cross_correlation_by_sampling_site.RDS")

6.6 Mantel correlogram at different distances

Code

geo_vect <- geo_dist[lower.tri(geo_dist)]
geo_vect <- geo_vect[!is.na(geo_vect)]

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <- mean(xc_dist[-which(is.infinite(xc_dist))],
    na.rm = TRUE)


dists <- 1:10

mantelcorrlg <- function(i) {

    classes <- seq(0, max(geo_vect), i)
    # length(classes)

    # Run a mantel correlation on the data
    correl_SPCC <- vegan::mantel.correlog(D.eco = xc_dist, D.geo = geo_dist,
        break.pts = classes, cutoff = FALSE, r.type = "pearson", nperm = 1,
        mult = "holm", progressive = FALSE)

    mantel.res <- correl_SPCC$mantel.res[, 1:3]
    mantel.res <- cbind(mantel.res, break.size = i)

    return(mantel.res)

}

mantel_list <- warbleR:::pblapply_wrblr_int(dists, cl = 1, function(x) try(mantelcorrlg(x),
    silent = TRUE))

mantel_list <- mantel_list[sapply(mantel_list, class) != "try-error"]


mantel_list <- lapply(mantel_list, as.data.frame)

# # Save the correlation as an .RDS file so you don't have to
# run it multiple times in the future
saveRDS(mantel_list, paste0("./data/processed/correl_SPCC_several_distances.RDS"))

Code

mantel_list <- readRDS(paste0("./data/processed/correl_SPCC_several_distances.RDS"))

mantels_df <- as.data.frame(do.call(rbind, mantel_list))

combined_dists <- sort(unique(mantels_df$class.index))

# interpolate
interpol_mantel_list <- lapply(mantel_list, function(x) {

    appx <- approx(x = x$class.index[x$n.dist > 0], y = x$Mantel.cor[x$n.dist >
        0], xout = combined_dists, method = "linear")

    return(appx$y)
})


interpol_mantel_mat <- do.call(cbind, interpol_mantel_list)


interpol_mantel_df <- data.frame(combined_dists, mean.cor = apply(interpol_mantel_mat,
    1, mean, na.rm = TRUE), sd.cor = apply(interpol_mantel_mat, 1,
    sd, na.rm = TRUE))


ggplot(data = interpol_mantel_df, mapping = aes(x = combined_dists,
    y = mean.cor)) + geom_ribbon(data = interpol_mantel_df, aes(ymin = mean.cor -
    sd.cor, ymax = mean.cor + sd.cor), fill = "gray", alpha = 0.3) +
    geom_point(col = viridis(10, alpha = 0.5)[7], size = 2.5) + geom_line(col = viridis(10,
    alpha = 0.5)[7], size = 2) + xlim(c(0, 4)) + ylim(c(-0.025, 0.2)) +
    geom_point(size = 3, color = "transparent") + theme_classic(base_size = 20) +
    labs(x = "Pairwise geographic distance (Degrees)", y = "Correlation coefficient")

6.7 Dialect discrimination with Random Forest

Data split in training and testing subsets (75% and 25% respectively)
Accuracy measured on test subset

6.7.1 Discriminating locations

Code

xc_dist <- 1 - sub_xcorr
xc_dist[which(is.infinite(xc_dist))] <-
    mean(xc_dist[-which(is.infinite(xc_dist))], na.rm = TRUE)

xcorr_mds <- cmdscale(d = as.dist(xc_dist), k = 10)

xcorr_mds <-
    data.frame(sound.files = rownames(xcorr_mds), xcorr_mds)
xcorr_mds$location <-
    sapply(xcorr_mds$sound.files, function(x)
        sub_dat$Location.for.cluster[sub_dat$new_name == x])

xcorr_mds$location <- as.factor(make.names(xcorr_mds$location,
                                           unique = FALSE, allow_ = TRUE))


# Create data subsets
partition <- createDataPartition(
    y = xcorr_mds$location,
    times = 1,
    p = 0.75,
    list = FALSE
)

trainset <- xcorr_mds[partition,-1]
testset <- xcorr_mds[-partition,-1]

trcontrol <-
    trainControl(
        method = "repeatedcv",
        number = 20,
        savePredictions = TRUE,
        sampling = "down",
        classProbs = TRUE,
        returnResamp = "all"
    )

pred_model <- train(
    location ~ .,
    data = trainset,
    method = "rf",
    trControl = trcontrol,
    metric = "Accuracy",
    preProcess = "BoxCox",
    proximity = TRUE    
)

# save confusion matrix
conf_mat <-
    confusionMatrix(predict(pred_model, testset), testset$location)

conf_df <- as.data.frame(conf_mat$table)

conf_df$total <-
    sapply(conf_df$Reference, function(x)
        sum(testset$location ==
                x))

conf_df$proportion <- conf_df$Freq / conf_df$total

# fit model on complete data set
best_rf_model <- pred_model$finalModel

all_rf_model <- randomForest(
  location ~ .,
  data = xcorr_mds[,-1],  # Your entire dataset
  proximity = TRUE,  # Include proximity matrix
  ntree = best_rf_model$ntree,  # Number of trees
  mtry = best_rf_model$mtry,    # Number of variables tried for splitting at each node
  nodesize = best_rf_model$nodesize,  # Minimum size of terminal nodes
  maxnodes = best_rf_model$maxnodes  # Maximum number of terminal nodes
)

saveRDS(
    list(
        pred_model_bb = pred_model,
        conf_mat_bb = conf_mat,
        confusion_df_bb = conf_df,
        testset_bb = testset,
        all_rf_model = all_rf_model
    ),
    "./data/processed/random_forest_model_results_locations.RDS"
)

Code

rf_model_results <- readRDS("./data/processed/random_forest_model_results_locations.RDS")

# print confusion matrix results
rf_model_results$conf_mat_bb$overall

confusion_df <- rf_model_results$confusion_df_bb

ggplot(confusion_df, aes(x = Reference, y = Prediction, fill = proportion)) +
    geom_tile() + theme_bw() + coord_equal() + scale_fill_distiller(palette = "Greens",
    direction = 1) + geom_text(aes(label = round(proportion, 2)),
    color = "black", size = 3) + theme_classic() + theme(axis.text.x = element_text(color = "black",
    size = 11, angle = 30, vjust = 0.8, hjust = 0.8))

6.7.2 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)

# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], location = xcorr_mds$location, sound.files = xcorr_mds$sound.files)

umap_df$pred.locality <- predict(object = rf_model_results$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv",
    row.names = FALSE)

Code

umap_loc_df <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists_localities.csv")

# Create a scatterplot
gg_umap_loc <- ggplot(umap_loc_df, aes(x = UMAP1, y = UMAP2, color = location,
    fill = location)) + geom_point(size = 4) + ylim(c(-7, 7)) + scale_color_viridis_d(alpha = 0.3,
    begin = 0.1, end = 0.8) + scale_fill_viridis_d(alpha = 0.2, begin = 0.1,
    end = 0.8) + theme_classic(base_size = 20) + labs(x = "UMAP1",
    y = "UMAP2", color = "Locality", fill = "Locality")

gg_umap_loc

6.7.3 Projecting random forest proximity with UMAP

Code

umap_result <- umap(rf_model_results$all_rf_model$proximity, n_neighbors = 15,
    n_components = 2)


# Create a data frame with the UMAP results
umap_df <- data.frame(UMAP1 = umap_result$layout[, 1], UMAP2 = umap_result$layout[,
    2], sampling.site = xcorr_mds$sampling.site, sound.files = xcorr_mds$sound.files)

umap_df$pred.site <- predict(object = rf_model_results$all_rf_model,
    xcorr_mds[, grep("X", names(xcorr_mds))])

write.csv(umap_df, "./data/processed/umap_on_rf_proximity_on_xcorr_dists.csv",
    row.names = FALSE)

Code

umap_df <- read.csv("./data/processed/umap_on_rf_proximity_on_xcorr_dists.csv")


umap_df$sampling.site <- factor(umap_df$sampling.site, levels = c("central.coast",
    "southern", "northern", "central.mountains"))
# Create a scatterplot
gg_umap_site <- ggplot(umap_df, aes(x = UMAP1, y = UMAP2, color = sampling.site,
    fill = sampling.site, shape = sampling.site)) + geom_point(size = 4) +
    ylim(c(-7, 7)) + scale_color_viridis_d(alpha = 0, begin = 0.1,
    end = 0.8, labels = c("Central Coast", "Central Mountains", "Northern",
        "Southern")) + scale_fill_viridis_d(alpha = 0.25, begin = 0.1,
    end = 0.8, labels = c("Central Coast", "Central Mountains", "Northern",
        "Southern")) + scale_shape_manual(values = 21:24, labels = c("Central Coast",
    "Central Mountains", "Northern", "Southern")) + guides(shape = guide_legend(override.aes = list(size = 5))) +
    theme_classic(base_size = 20) + labs(x = "UMAP1", y = "UMAP2",
    color = "Region", shape = "Region", fill = "Region")


gg_umap_site

6.8 Combined plots

Code

# cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 1,
# rel_widths = c(1.1, 1))
# cowplot::ggsave2('./output/UMAP_scatterplots.png', dpi = 300,
# width = 22, height = 7)

cowplot::plot_grid(gg_umap_loc, gg_umap_site, nrow = 2)

cowplot::ggsave2("./output/UMAP_scatterplots2.png", dpi = 300, width = 17,
    height = 17)

7 Spectrograms of close-to-centroid calls for each sample site

Code

# keep only those correctly predicted
umap_df <- umap_df[umap_df$sampling.site == umap_df$pred.site, ]

# find prepresentative
y <- 96  #37, 50, 57, 47

# for ( i in 1:100){ y <- y + 1
repren_calls_list <- lapply(unique(umap_df$sampling.site), function(x) {
    X <- umap_df[umap_df$sampling.site == x, ]
    centroid <- sapply(X[, c("UMAP1", "UMAP2")], mean)
    X$centroid_dist <- sapply(seq_len(nrow(X)), function(y) dist(rbind(centroid,
        X[y, c("UMAP1", "UMAP2")])))

    set.seed(y)
    out <- X[order(X$centroid_dist, decreasing = FALSE), ][sample(1:30,
        2), ]
})


repren_calls <- do.call(rbind, repren_calls_list)
repren_calls$selec <- 1
repren_calls$start <- rep(0, nrow(repren_calls))
repren_calls$end <- rep(0.5, nrow(repren_calls))

repren_calls <- repren_calls[c(1, 3, 5, 7, 2, 4, 6, 8), ]

lf <- rep(c(0.06, 0.5), each = 4)
rg <- rep(c(0.5, 0.95), each = 4)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- rep(horiz[-1], 2)
tp <- rep(horiz[-length(horiz)], 2)

m <- cbind(lf, rg, btm, tp)
m <- m[(1:8), ]
lf <- c(rep(0.95, each = 4), 0.06, 0.5, 0, 0)
rg <- c(rep(1, each = 4), 0.5, 0.95, 0.05, 1)
horiz <- seq(0.95, 0.075, length.out = 5)
btm <- c(horiz[-1], 0.95, 0.95, 0.075, 0)
tp <- c(horiz[-length(horiz)], 1, 1, 0.95, 0.075)

m2 <- cbind(lf, rg, btm, tp)
m <- rbind(m, m2)



# png(filename = paste0('./output/spectrograms_by_site', y,
# '.png'), res = 300, width = 4000, height = 3000)

ss <- split.screen(figs = m)

# # testing layout screens for(i in 1:nrow(m)) {screen(i) par(
# mar = rep(0, 4)) plot(0.5, xlim = c(0,1), ylim = c(0,1), type
# = 'n', axes = FALSE, xlab = '', ylab = '', xaxt = 'n', yaxt =
# 'n') box() text(x = 0.5, y = 0.5, labels = i) }
# close.screen(all.screens = T)


ovlp <- 95
colls <- seq(-110, 0, 5)
wl <- 200

lab_bg <- viridis(10, alpha = 0.25)[8]

lab_bg <- "#3A3B39"



# frequency label
screen(15)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 0.9, y = 1, "Frequency (kHz)", srt = 90, cex = 1.6)

# time label
screen(16)
par(mar = c(0, 0, 0, 0), new = TRUE)
plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
text(x = 1, y = 0.75, "Time (s)", cex = 1.6)

for (i in seq_len(nrow(repren_calls))) {
    # print(i)
    screen(i)
    par(mar = c(0, 0, 0, 0))

    warbleR:::spectro_wrblr_int2(wave = read_wave(X = repren_calls,
        index = i, path = "./data/raw/consolidated_files/"), collevels = colls,
        ovlp = ovlp, wl = wl, flim = c(1, 9), palette = viridis, axisX = FALSE,
        axisY = FALSE, grid = FALSE)

    # add frequency axis
    if (i %in% 1:4)
        axis(2, at = c(seq(2, 10, 2)))

    # add time axis
    if (i %in% c(4, 8))
        axis(1)
}

vlabs <- c("Central\nMountains", "Southern", "Northern", "Central\nCoast")

par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
# add vertical labels
for (i in 9:12) {
    screen(i)
    # par(mar = c(0, 0, 0, 0))
    par(mar = c(0, 0, 0, 0), bg = lab_bg, new = TRUE)
    plot(1, frame.plot = FALSE, type = "n", yaxt = "n", xaxt = "n")
    text(x = 1, y = 1, vlabs[i - 8], srt = 270, cex = 1.6, col = "white",
        font = 1)
    box()
}

# dev.off() }

Takeaways

Session information

R version 4.4.1 (2024-06-14)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=es_CR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=es_CR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=es_CR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C       

time zone: America/Costa_Rica
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] Rraven_1.0.13        corrplot_0.92        ohun_1.0.2          
 [4] umap_0.2.10.0        vegan_2.6-8          permute_0.9-7       
 [7] pbapply_1.7-2        caret_6.0-94         lattice_0.22-6      
[10] mlbench_2.1-5        randomForest_4.7-1.1 ggplot2_3.5.1       
[13] warbleR_1.1.32       NatureSounds_1.0.4   seewave_2.2.3       
[16] tuneR_1.4.7          viridis_0.6.5        viridisLite_0.4.2   
[19] formatR_1.14         knitr_1.48          

loaded via a namespace (and not attached):
  [1] DBI_1.2.3            bitops_1.0-9         pROC_1.18.5         
  [4] gridExtra_2.3        remotes_2.5.0        readxl_1.4.3        
  [7] testthat_3.2.1.1     rlang_1.1.4          magrittr_2.0.3      
 [10] e1071_1.7-16         compiler_4.4.1       mgcv_1.9-1          
 [13] png_0.1-8            vctrs_0.6.5          reshape2_1.4.4      
 [16] stringr_1.5.1        pkgconfig_2.0.3      crayon_1.5.3        
 [19] fastmap_1.2.0        backports_1.5.0      labeling_0.4.3      
 [22] utf8_1.2.4           rmarkdown_2.28       prodlim_2024.06.25  
 [25] purrr_1.0.2          xfun_0.48            jsonlite_1.8.9      
 [28] recipes_1.0.10       parallel_4.4.1       cluster_2.1.6       
 [31] R6_2.5.1             RColorBrewer_1.1-3   stringi_1.8.4       
 [34] reticulate_1.39.0    parallelly_1.38.0    rpart_4.1.23        
 [37] cellranger_1.1.0     brio_1.1.5           lubridate_1.9.3     
 [40] Rcpp_1.0.13          iterators_1.0.14     future.apply_1.11.2 
 [43] igraph_2.0.3         Matrix_1.7-0         splines_4.4.1       
 [46] nnet_7.3-19          timechange_0.3.0     tidyselect_1.2.1    
 [49] rstudioapi_0.16.0    yaml_2.3.10          timeDate_4032.109   
 [52] codetools_0.2-20     dtw_1.23-1           listenv_0.9.1       
 [55] tibble_3.2.1         plyr_1.8.9           withr_3.0.1         
 [58] askpass_1.2.1        evaluate_1.0.1       signal_1.8-1        
 [61] sf_1.0-17            future_1.34.0        survival_3.7-0      
 [64] units_0.8-5          sketchy_1.0.3        proxy_0.4-27        
 [67] pillar_1.9.0         KernSmooth_2.23-24   packrat_0.9.2       
 [70] checkmate_2.3.2      foreach_1.5.2        stats4_4.4.1        
 [73] generics_0.1.3       RCurl_1.98-1.16      munsell_0.5.1       
 [76] scales_1.3.0         globals_0.16.3       class_7.3-22        
 [79] glue_1.8.0           tools_4.4.1          xaringanExtra_0.8.0 
 [82] data.table_1.15.4    RSpectra_0.16-1      ModelMetrics_1.2.2.2
 [85] gower_1.0.1          grid_4.4.1           ipred_0.9-14        
 [88] colorspace_2.1-1     nlme_3.1-165         cli_3.6.3           
 [91] fansi_1.0.6          lava_1.8.0           dplyr_1.1.4         
 [94] gtable_0.3.5         fftw_1.0-9           digest_0.6.37       
 [97] classInt_0.4-10      farver_2.1.2         rjson_0.2.23        
[100] htmlwidgets_1.6.4    htmltools_0.5.8.1    lifecycle_1.0.4     
[103] hardhat_1.4.0        openssl_2.2.2        MASS_7.3-61