This function calculates the acoustic space area for different individuals with varying repertoire size. The function takes the following arguments:
seq(5, 40, 5)
which means that there will be 8 repertoire sizes = 5, 10, 15, 20, 25, 30, 35 and 40.The function randomizes the input data so each run is a different simulation. The 1000 simulated elements (100 element types each one with 10 copies) are distributed among the the simulated individuals so most elements are used on each iteration (but the bigger the repertoire sizes the smaller the number of individuals). The output is a data frame with the repertoire size and acoustic space area for the simulated individuals (# of rows = # individuals). It also includes de stress for the multidimensional scaling (isoMDS()
function, same value for all rows in a simulation):
# need to load these packages first
library(MASS)
library(fossil)
library(pbapply)
acoustic_spaces <- function(X, labels, n.elements = seq(5, 40, 5), cl = 1, kernel = FALSE, rarefaction = FALSE, pb = TRUE) {
# reset progress bar when exiting
on.exit(pbapply::pboptions(type = .Options$pboptions$type))
# set progress bar
pbapply::pboptions(type = ifelse(pb, "timer", "none"))
# convert to distance
dst_mt <- sqrt(1- X)
# shuflle data position
shff <- sample(1:nrow(dst_mt))
dst_mt <- dst_mt[shff, shff]
labels <- labels[shff]
# mds for 2 dimensions
mds <- isoMDS(dst_mt, y = cmdscale(dst_mt, k = 2), k = 2, maxit = 5, trace = FALSE, tol = 1e-3, p = 2)
# standardize to 0-1
mds$points[, 1] <- mds$points[, 1] / max(mds$points[, 1])
mds$points[, 2] <- mds$points[, 2] / max(mds$points[, 2])
# repertoires
rps <- rep(n.elements, 10000)
cs <- cumsum(rps)
rps <- rps[cs < nrow(X)]
rps <- sort(rps, decreasing = TRUE)
df <- data.frame(labels, id = NA, mds$points)
df$row <- 1:nrow(df)
for(i in 1:length(rps)){
# number of element types for each individual
n <- rps[i]
# element replicates that are available
to.fill <- df$row[is.na(df$id)]
df2 <- df[df$row %in% to.fill, ]
to.fill <- df2$row[!duplicated(df2$labels)]
# select the ones needed for this individual
to.fill <- to.fill[1:n]
# add id of individual to data frame
df$id[df$row %in% to.fill] <- i
}
# put indiv id, element label and MDS vectors together
Y <- data.frame(id = df$id, labels, X1 = df$X1, X2 = df$X2)
# remove not assigned rows
Y <- Y[!is.na(Y$id), ]
# if kernel area was selected
if (kernel){
raref_fun <- function(min.n, W) {
Z <- W[sample(1:nrow(W), min.n), ]
coordinates(Z) <- ~ X1 + X2
kernel.area(kernelUD(Z, extent = 1.5), percent = 95)[[1]]
}
# calculate acoustic area for each individual
krnl.area <- pblapply(unique(Y$id), cl = cl, function(y) {
# subset for each individual
W <- Y[Y$id == y, c("X1", "X2")]
if (rarefaction)
ka <- replicate(30, raref_fun(min.n = min(n.elements), W)) else
ka <- raref_fun(min.n = min(n.elements), W)
return(data.frame(id = y, repertoire = nrow(W), area = mean(ka), stress = mds$stress))
})
acous.area <- do.call(rbind, krnl.area)
} else { # else use minimum convex polygon
mcp.area <- pblapply(unique(Y$id), cl = cl, function(y) {
# subset for each individual
W <- Y[Y$id == y, c("X1", "X2")]
area <- try(as.numeric(earth.poly(W)$area), silent = TRUE)
# coordinates(W) <- ~ X1 + X2
#
# area <- mcp(xy = W)$area
return(data.frame(id = y, repertoire = nrow(W), area, stress = mds$stress))
})
acous.area <- do.call(rbind, mcp.area)
}
print(paste(nrow(acous.area), "individuals simulated"))
return(acous.area)
}
This is a test using the budgie simulated data (#21):
load("budgie syth data ap.trans.urf 21.rda")
dat <- read.csv("transformed acoustic parms synthetic budgie 21.csv")
aps <- acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE)
## [1] "45 individuals simulated"
summary(reg <- lm(aps$area ~ aps$repertoire))
##
## Call:
## lm(formula = aps$area ~ aps$repertoire)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10853 -2804 1002 3537 7341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8932.29 1468.41 6.083 2.77e-07 ***
## aps$repertoire 553.58 60.09 9.213 9.78e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4556 on 43 degrees of freedom
## Multiple R-squared: 0.6637, Adjusted R-squared: 0.6559
## F-statistic: 84.88 on 1 and 43 DF, p-value: 9.779e-12
plot(aps$repertoire, aps$area, xlab = "Repertoire size",ylab = "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)
abline(reg = reg, lwd = 2)
If you repeat it you will get slightly different results:
aps <- acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE)
## [1] "45 individuals simulated"
summary(reg <- lm(aps$area ~ aps$repertoire))
##
## Call:
## lm(formula = aps$area ~ aps$repertoire)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12786.8 -2735.9 -187.7 2678.3 9687.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11029.4 1490.6 7.399 3.42e-09 ***
## aps$repertoire 474.3 61.0 7.776 9.86e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4625 on 43 degrees of freedom
## Multiple R-squared: 0.5844, Adjusted R-squared: 0.5748
## F-statistic: 60.47 on 1 and 43 DF, p-value: 9.856e-10
plot(aps$repertoire, aps$area, xlab = "Repertoire size",ylab = "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)
abline(reg = reg, lwd = 2)
Consider using log scale for repertoire size:
summary(reg <- lm(aps$area ~ log(aps$repertoire)))
##
## Call:
## lm(formula = aps$area ~ log(aps$repertoire))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9518.6 -2923.4 385.4 2730.3 9298.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3865.6 2778.0 -1.391 0.171
## log(aps$repertoire) 8697.7 935.9 9.293 7.62e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4137 on 43 degrees of freedom
## Multiple R-squared: 0.6676, Adjusted R-squared: 0.6599
## F-statistic: 86.36 on 1 and 43 DF, p-value: 7.617e-12
plot(log(aps$repertoire), aps$area, xlab = "Log(repertoire size)",ylab = "Acoustic space area", col = adjustcolor("blue", alpha.f = 0.5), pch = 20, cex = 2)
abline(reg = reg, lwd = 2)
load("budgie syth data ap.trans.urf 21.rda")
dat <- read.csv("transformed acoustic parms synthetic budgie 21.csv")
# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)
# number of individuals
n <- seq(5, 100, 5)
# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)
# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))
# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]
pboptions(type = "timer")
output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
# variance around mean repertoire size
n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
# start empty list to save acoustic spaces
aspc <- list()
# run fuction to get acoustics spaces til having n (sample size)
while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))
if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
}
# put in a data frame
if (length(aspc) > 1)
aspc <- do.call(rbind, aspc) else aspc <- aspc[[1]]
# add repetition id to choose different repertoires
aspc <- aspc[order(aspc$repertoire), ]
aspc$rep.id <- 1
for(i in 2:nrow(aspc))
aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1
# order by rep.id
aspc <- aspc[order(aspc$rep.id), ]
# subset to correct n
aspc <- aspc[1:grd$n[x], ]
# get regression on log
sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
sm.reg <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
#put in a data frame
out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), log.pvalue = sm.reg.log$coefficients[2, 4], log.effect.size = sm.reg.log$coefficients[2, 1], log.pvalue = sm.reg$coefficients[2, 4]), silent = TRUE)
return(out)
})
# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]
# put in a dataframe
budgie_pwr_ac_sp <- do.call(rbind, output.l)
saveRDS(budgie_pwr_ac_sp, "budgie_power_analysis_acoustic_space_approach.RDS")
load("LBH syth data ap.trans.urf 21.rda")
dat <- read.csv("transformed acoustic parms synthetic LBH 21.csv")
# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)
# number of individuals
n <- seq(5, 100, 5)
# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)
# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))
# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]
pboptions(type = "timer")
output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
# variance around mean repertoire size
n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
# start empty list to save acoustic spaces
aspc <- list()
# run fuction to get acoustics spaces til having n (sample size)
while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))
if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
}
# put in a data frame
if (length(aspc) > 1)
aspc <- do.call(rbind, aspc) else aspc <- aspc[[1]]
# add repetition id to choose different repertoires
aspc <- aspc[order(aspc$repertoire), ]
aspc$rep.id <- 1
for(i in 2:nrow(aspc))
aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1
# order by rep.id
aspc <- aspc[order(aspc$rep.id), ]
# subset to correct n
aspc <- aspc[1:grd$n[x], ]
# get regression on log
sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
sm.reg <- summary(reg <- lm(aspc$area ~ aspc$repertoire))
#put in a data frame
out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), pvalue = sm.reg.log$coefficients[2, 4], effect.size = sm.reg.log$coefficients[2, 1], pvalue2 = sm.reg$coefficients[2, 4]), silent = TRUE)
return(out)
})
# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]
# put in a dataframe
lbh_pwr_ac_sp <- do.call(rbind, output.l)
saveRDS(lbh_pwr_ac_sp, "lbh_power_analysis_acoustic_space_approach.RDS")
## Power vs sample size plot
load("real.lbh.ap.trans.urf.rda")
dat <- read.csv("Acoustic parameters on realLBH data.csv")
# repertoire sizes
target.sizes <- c(5, 10, 15, 20, 50)
# number of individuals
n <- seq(5, 100, 5)
# simulation parameter grid
grd <- expand.grid(target.sizes = target.sizes, n = n)
# repeat each combination 30 times
grd <- do.call(rbind, lapply(1:30, function(x) grd))
# for testing
# grd <- grd[sample(1:nrow(grd), 6), ]
pboptions(type = "timer")
output.l <- pblapply(1:nrow(grd), cl = parallel::detectCores() - 2, function(x){
# variance around mean repertoire size
n.elements <- round(seq(grd$target.sizes[x], grd$target.sizes[x] * 1.9, length.out = grd$n[x]))
# start empty list to save acoustic spaces
aspc <- list()
# run fuction to get acoustics spaces til having n (sample size)
while(if (length(aspc) > 0) sum(sapply(aspc, nrow)) < grd$n[x] else TRUE){
aspc[[length(aspc) + 1]] <- try(acoustic_spaces(X = ap.trans.urf$proximity, labels = dat$elm.type, cl = 3, pb = FALSE, n.elements = n.elements))
if(class(aspc[[length(aspc)]]) == "try-error") aspc <- aspc[-length(apsc)]
}
# put in a data frame
if (length(aspc) > 1)
aspc <- do.call(rbind, aspc) else aspc <- aspc[[1]]
# add repetition id to choose different repertoires
aspc <- aspc[order(aspc$repertoire), ]
aspc$rep.id <- 1
for(i in 2:nrow(aspc))
aspc$rep.id[i] <- if (aspc$repertoire[i -1] != aspc$repertoire[i]) 1 else aspc$rep.id[i -1] + 1
# order by rep.id
aspc <- aspc[order(aspc$rep.id), ]
# subset to correct n
aspc <- aspc[1:grd$n[x], ]
# get regression on log
sm.reg.log <- summary(reg <- lm(aspc$area ~ log(aspc$repertoire)))
sm.reg <- summary(reg <- lm(aspc$area ~ aspc$repertoire))
#put in a data frame
out <- try(data.frame(grd[x, , drop = FALSE], low.size = min(n.elements), hi.size = max(n.elements), log.pvalue = sm.reg.log$coefficients[2, 4], effect.size = sm.reg.log$coefficients[2, 1], pvalue = sm.reg$coefficients[2, 4]), silent = TRUE)
return(out)
})
# remove errors
output.l <- output.l[sapply(output.l, class) != "try-error"]
# put in a dataframe
lbh_pwr_ac_sp <- do.call(rbind, output.l)
saveRDS(lbh_pwr_ac_sp, "lbh_real_power_analysis_acoustic_space_approach.RDS")
Power is defined as the proportion of tests that detect a significant pattern.
Points show observed values, lines are loess function interpolation
Repertoire size (colors) is the the minimum repertoire size of the individuals in a simulation
Repertoirse size was allowed to vary 1.9 times the orignal size (rep.size + rep.size * 1.9; so almost twice the orignal size)
Vertical dotted line shows 0.8 power threshold (what is regarded as OK)
pwr_ac_sp <- readRDS("budgie_power_analysis_acoustic_space_approach.RDS")
library(ggplot2)
pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))
pwr_fun <- function(x) sum(x < 0.05) / length(x)
agg_pwr_ac_sp <- aggregate(log.pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)
ggplot(data = agg_pwr_ac_sp, aes(x = n, y = log.pvalue, color = target.sizes)) +
geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
geom_smooth(size = 1.6, alpha = 0.1) +
scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
scale_fill_viridis_d(guide = FALSE) +
geom_point(size = 3) +
labs(x = "Number of individuals", y = "Power") +
theme_classic(base_size = 16)
pwr_ac_sp <- readRDS("lbh_power_analysis_acoustic_space_approach.RDS")
library(ggplot2)
pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))
pwr_fun <- function(x) sum(x < 0.05) / length(x)
agg_pwr_ac_sp <- aggregate(pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)
ggplot(data = agg_pwr_ac_sp, aes(x = n, y = pvalue, color = target.sizes, fill = target.sizes)) +
geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
geom_smooth(size = 1.6, alpha = 0.1) +
scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
scale_fill_viridis_d(guide = FALSE) +
geom_point(size = 3) +
labs(x = "Number of individuals", y = "Power") +
theme_classic(base_size = 16)
pwr_ac_sp <- readRDS("lbh_real_power_analysis_acoustic_space_approach.RDS")
library(ggplot2)
pwr_ac_sp$target.sizes <- factor(as.character(pwr_ac_sp$target.sizes), levels = sort(unique(pwr_ac_sp$target.sizes)))
pwr_fun <- function(x) sum(x < 0.05) / length(x)
agg_pwr_ac_sp <- aggregate(pvalue ~ target.sizes + n, pwr_ac_sp, pwr_fun)
ggplot(data = agg_pwr_ac_sp, aes(x = n, y = pvalue, color = target.sizes, fill = target.sizes)) +
geom_hline(yintercept = 0.8, lty = 2, col = "gray", size = 1.2) +
geom_smooth(size = 1.6, alpha = 0.1) +
scale_color_viridis_d(alpha = 0.5, name = "Repertoire\n size") +
scale_fill_viridis_d(guide = FALSE) +
geom_point(size = 3) +
labs(x = "Number of individuals", y = "Power") +
theme_classic(base_size = 16)