Acoustic space function

This function calculates the acoustic space area for different individuals with varying repertoire size. The function takes the following arguments:

X: the proximity matrix
labels: the element type ID for each row in X
n.lements: is the different sizes of the repertoires to be simulated (number of elements). Default is seq(5, 40, 5) which means that there will be 8 repertoire sizes = 5, 10, 15, 20, 25, 30, 35 and 40.
cl: number of cores for using parallelization (if cl > 1). Might not work in windows.
kernel: determines if kernel density is used (but its not working great with few elements per song). Default is FALSE which means than the minimum convex polygon area is used instead.
rarefaction: whether to rarefact data for kernel area estimation (warning, kernel doesn’t work great)
pb: progress bar

The function randomizes the input data so each run is a different simulation. The 1000 simulated elements (100 element types each one with 10 copies) are distributed among the the simulated individuals so most elements are used on each iteration (but the bigger the repertoire sizes the smaller the number of individuals). The output is a data frame with the repertoire size and acoustic space area for the simulated individuals (# of rows = # individuals). It also includes de stress for the multidimensional scaling (isoMDS() function, same value for all rows in a simulation):

# need to load these packages first
library(MASS)  
library(fossil)  
library(pbapply)  

acoustic_spaces <- function(X, labels, n.elements = seq(5, 40, 5), cl = 1, kernel = FALSE, rarefaction = FALSE, pb = TRUE) {
  
  # reset progress bar when exiting
  on.exit(pbapply::pboptions(type = .Options$pboptions$type))
  
  # set progress bar
  pbapply::pboptions(type = ifelse(pb, "timer", "none"))
  
  # convert to distance
  dst_mt <- sqrt(1- X)
  
  # shuflle data position
  shff <- sample(1:nrow(dst_mt))
  dst_mt <- dst_mt[shff, shff] 
  labels <- labels[shff]
  
  # mds for 2 dimensions
  mds <- isoMDS(dst_mt, y = cmdscale(dst_mt, k = 2), k = 2, maxit = 5, trace = FALSE, tol = 1e-3, p = 2)
   
  # standardize to 0-1
   mds$points[, 1] <- mds$points[, 1] / max(mds$points[, 1])
   mds$points[, 2] <- mds$points[, 2] / max(mds$points[, 2])

  
  # repertoires
  rps <- rep(n.elements, 10000)
  cs <- cumsum(rps)
  rps <- rps[cs < nrow(X)]
  
  rps <- sort(rps, decreasing = TRUE)
  
  df <- data.frame(labels, id = NA, mds$points)
  
  df$row <- 1:nrow(df)
    
  for(i in 1:length(rps)){

    # number of element types for each individual
    n <- rps[i]
    
    # element replicates that are available
    to.fill <- df$row[is.na(df$id)]
    df2 <- df[df$row %in% to.fill, ]
    to.fill <- df2$row[!duplicated(df2$labels)]

    # select the ones needed for this individual
    to.fill <- to.fill[1:n]  
  
    # add id of individual to data frame
    df$id[df$row %in% to.fill] <- i     
    }  

  # put indiv id, element label and MDS vectors together
  Y <- data.frame(id = df$id, labels, X1 = df$X1, X2 = df$X2)
  
  # remove not assigned rows
  Y <- Y[!is.na(Y$id), ]
  
  # if kernel area was selected
  if (kernel){
  raref_fun <- function(min.n, W) {
    Z <- W[sample(1:nrow(W), min.n), ]
    coordinates(Z) <-  ~ X1 + X2
    
    kernel.area(kernelUD(Z, extent = 1.5), percent = 95)[[1]]
    
  }
  
  # calculate acoustic area for each individual 
   krnl.area <- pblapply(unique(Y$id), cl = cl, function(y) {
    
  # subset for each individual 
  W <- Y[Y$id == y, c("X1", "X2")]
  
  if (rarefaction)
  ka <- replicate(30, raref_fun(min.n = min(n.elements), W)) else
  ka <- raref_fun(min.n = min(n.elements), W)

  return(data.frame(id = y, repertoire = nrow(W), area = mean(ka), stress = mds$stress))

  })
   
   acous.area <- do.call(rbind, krnl.area)
  } else  { # else use minimum convex polygon
    mcp.area <- pblapply(unique(Y$id), cl = cl, function(y) {
    
      
        
    # subset for each individual 
    W <- Y[Y$id == y, c("X1", "X2")]
    
    area <- try(as.numeric(earth.poly(W)$area), silent = TRUE)

    # coordinates(W) <-  ~ X1 + X2
    # 
    # area <- mcp(xy = W)$area
    
    return(data.frame(id = y, repertoire = nrow(W), area, stress = mds$stress))
      })
    acous.area <- do.call(rbind, mcp.area)
    }
 
  print(paste(nrow(acous.area), "individuals simulated"))
     
 return(acous.area)
}

Testing the function

This is a test using the budgie simulated data (#21):

load("budgie syth data ap.trans.urf 21.rda")

dat <- read.csv("transformed acoustic parms synthetic budgie 21.csv")

aps <- acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE)

## [1] "45 individuals simulated"

summary(reg <- lm(aps$area ~ aps$repertoire))

## 
## Call:
## lm(formula = aps$area ~ aps$repertoire)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -10853  -2804   1002   3537   7341 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     8932.29    1468.41   6.083 2.77e-07 ***
## aps$repertoire   553.58      60.09   9.213 9.78e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4556 on 43 degrees of freedom
## Multiple R-squared:  0.6637, Adjusted R-squared:  0.6559 
## F-statistic: 84.88 on 1 and 43 DF,  p-value: 9.779e-12

plot(aps$repertoire, aps$area, xlab = "Repertoire size",ylab =  "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)

abline(reg = reg, lwd = 2)

If you repeat it you will get slightly different results:

aps <- acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE)

## [1] "45 individuals simulated"

summary(reg <- lm(aps$area ~ aps$repertoire))

## 
## Call:
## lm(formula = aps$area ~ aps$repertoire)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12786.8  -2735.9   -187.7   2678.3   9687.2 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     11029.4     1490.6   7.399 3.42e-09 ***
## aps$repertoire    474.3       61.0   7.776 9.86e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4625 on 43 degrees of freedom
## Multiple R-squared:  0.5844, Adjusted R-squared:  0.5748 
## F-statistic: 60.47 on 1 and 43 DF,  p-value: 9.856e-10

plot(aps$repertoire, aps$area, xlab = "Repertoire size",ylab =  "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)

abline(reg = reg, lwd = 2)

Consider using log scale for repertoire size:

summary(reg <- lm(aps$area ~ log(aps$repertoire)))

## 
## Call:
## lm(formula = aps$area ~ log(aps$repertoire))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9518.6 -2923.4   385.4  2730.3  9298.3 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -3865.6     2778.0  -1.391    0.171    
## log(aps$repertoire)   8697.7      935.9   9.293 7.62e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4137 on 43 degrees of freedom
## Multiple R-squared:  0.6676, Adjusted R-squared:  0.6599 
## F-statistic: 86.36 on 1 and 43 DF,  p-value: 7.617e-12

plot(log(aps$repertoire), aps$area, xlab = "Log(repertoire size)",ylab =  "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)

abline(reg = reg, lwd = 2)

Power of acoustic space approach

Budgie simulation

load("budgie syth data ap.trans.urf 21.rda")

dat <- read.csv("transformed acoustic parms synthetic budgie 21.csv")

# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)

# number of individuals
n <- seq(5, 100, 5)


# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)

# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))


# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]

pboptions(type = "timer")

output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
  
  # variance around mean repertoire size
  n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
  
  # start empty list to save acoustic spaces
  aspc <- list()
  
  # run fuction to get acoustics spaces til having n (sample size) 
  while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
  
    aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))

  if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
    }
  
  # put in a data frame
  if (length(aspc) > 1)
  aspc <- do.call(rbind,  aspc) else aspc <- aspc[[1]]
  
  # add repetition id to choose different repertoires
  aspc <- aspc[order(aspc$repertoire), ]
  aspc$rep.id <- 1
  
  for(i in 2:nrow(aspc))
  aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1 
  
  # order by rep.id
  aspc <- aspc[order(aspc$rep.id), ]
   
  # subset to correct n
  aspc <- aspc[1:grd$n[x], ]

  # get regression on log
  sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
  sm.reg <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
  
  #put in a data frame
  out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), log.pvalue = sm.reg.log$coefficients[2, 4], log.effect.size = sm.reg.log$coefficients[2, 1], log.pvalue = sm.reg$coefficients[2, 4]), silent = TRUE)
  
  return(out)
  
})

# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]
                                                                                                                              
# put in a dataframe
budgie_pwr_ac_sp <- do.call(rbind, output.l)

saveRDS(budgie_pwr_ac_sp, "budgie_power_analysis_acoustic_space_approach.RDS")

Long-billed hermit simulation

load("LBH syth data ap.trans.urf 21.rda")

dat <- read.csv("transformed acoustic parms synthetic LBH 21.csv")

# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)

# number of individuals
n <- seq(5, 100, 5)


# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)

# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))


# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]

pboptions(type = "timer")

output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
  
  # variance around mean repertoire size
  n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
  
  # start empty list to save acoustic spaces
  aspc <- list()
  
  # run fuction to get acoustics spaces til having n (sample size) 
  while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
  
    aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))

  if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
    }
  
  # put in a data frame
  if (length(aspc) > 1)
  aspc <- do.call(rbind,  aspc) else aspc <- aspc[[1]]
  
  # add repetition id to choose different repertoires
  aspc <- aspc[order(aspc$repertoire), ]
  aspc$rep.id <- 1
  
  for(i in 2:nrow(aspc))
  aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1 
  
  # order by rep.id
  aspc <- aspc[order(aspc$rep.id), ]
   
  # subset to correct n
  aspc <- aspc[1:grd$n[x], ]

  # get regression on log
  sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
  sm.reg <- summary(reg <- lm(aspc$area ~ aspc$repertoire))
  
  #put in a data frame
  out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), pvalue = sm.reg.log$coefficients[2, 4], effect.size = sm.reg.log$coefficients[2, 1], pvalue2 = sm.reg$coefficients[2, 4]), silent = TRUE)
  
  return(out)
})

# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]

# put in a dataframe
lbh_pwr_ac_sp <- do.call(rbind, output.l)

saveRDS(lbh_pwr_ac_sp, "lbh_power_analysis_acoustic_space_approach.RDS")

Long-billed hermit real data

## Power vs sample size plot


load("real.lbh.ap.trans.urf.rda")

dat <- read.csv("Acoustic parameters on realLBH data.csv")

# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)

# number of individuals
n <- seq(5, 100, 5)


# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)

# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))


# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]

pboptions(type = "timer")

output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
  
  # variance around mean repertoire size
  n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
  
  # start empty list to save acoustic spaces
  aspc <- list()
  
  # run fuction to get acoustics spaces til having n (sample size) 
  while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
  
    aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))

  if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
    }
  
  # put in a data frame
  if (length(aspc) > 1)
  aspc <- do.call(rbind,  aspc) else aspc <- aspc[[1]]
  
  # add repetition id to choose different repertoires
  aspc <- aspc[order(aspc$repertoire), ]
  aspc$rep.id <- 1
  
  for(i in 2:nrow(aspc))
  aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1 
  
  # order by rep.id
  aspc <- aspc[order(aspc$rep.id), ]
   
  # subset to correct n
  aspc <- aspc[1:grd$n[x], ]

  # get regression on log
  sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
  sm.reg <- summary(reg <- lm(aspc$area ~ aspc$repertoire))
  
  #put in a data frame
  out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), log.pvalue = sm.reg.log$coefficients[2, 4], effect.size = sm.reg.log$coefficients[2, 1], pvalue = sm.reg$coefficients[2, 4]), silent = TRUE)
  
  return(out)
})

# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]

# put in a dataframe
lbh_pwr_ac_sp <- do.call(rbind, output.l)

saveRDS(lbh_pwr_ac_sp, "lbh_real_power_analysis_acoustic_space_approach.RDS")

Power is defined as the proportion of tests that detect a significant pattern.
Points show observed values, lines are loess function interpolation
Repertoire size (colors) is the the minimum repertoire size of the individuals in a simulation
Repertoirse size was allowed to vary 1.9 times the orignal size (rep.size + rep.size * 1.9; so almost twice the orignal size)
Vertical dotted line shows 0.8 power threshold (what is regarded as OK)

Budgies synthetic data

pwr_ac_sp <- readRDS("budgie_power_analysis_acoustic_space_approach.RDS")

library(ggplot2)

pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))

pwr_fun <- function(x) sum(x < 0.05) / length(x)

agg_pwr_ac_sp <- aggregate(log.pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)

ggplot(data = agg_pwr_ac_sp, aes(x = n, y = log.pvalue, color = target.sizes)) + 
   geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
  geom_smooth(size = 1.6, alpha = 0.1) +
    scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
  scale_fill_viridis_d(guide = FALSE) +
 geom_point(size = 3) +
  labs(x = "Number of individuals", y = "Power") +
  theme_classic(base_size = 16)

LBH synthetic data

pwr_ac_sp <- readRDS("lbh_power_analysis_acoustic_space_approach.RDS")

library(ggplot2)

pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))

pwr_fun <- function(x) sum(x < 0.05) / length(x)

agg_pwr_ac_sp <- aggregate(pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)

ggplot(data = agg_pwr_ac_sp, aes(x = n, y = pvalue, color = target.sizes, fill = target.sizes)) + 
   geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
  geom_smooth(size = 1.6, alpha = 0.1) +
    scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
  scale_fill_viridis_d(guide = FALSE) +
 geom_point(size = 3) +
  labs(x = "Number of individuals", y = "Power") +
  theme_classic(base_size = 16)

LBH real data

pwr_ac_sp <- readRDS("lbh_real_power_analysis_acoustic_space_approach.RDS")

library(ggplot2)

pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))

pwr_fun <- function(x) sum(x < 0.05) / length(x)

agg_pwr_ac_sp <- aggregate(pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)

ggplot(data = agg_pwr_ac_sp, aes(x = n, y = pvalue, color = target.sizes, fill = target.sizes)) + 
  geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
  geom_smooth(size = 1.6, alpha = 0.1) +
    scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
  scale_fill_viridis_d(guide = FALSE) +
  geom_point(size = 3) +
  labs(x = "Number of individuals", y = "Power") +
  theme_classic(base_size = 16)

“Measuring acoustic space”

Random forest on acoustic data

Marcelo Araya-Salas, PhD

23-04-2020