Load packages
## vector with package names
x <- c( "pbapply", "parallel", "ggplot2", "readxl", "knitr", "kableExtra", "viridis", "baRulho", "ggplot2")
aa <- lapply(x, function(y) {
# check if installed, if not then install
if (!y %in% installed.packages()[,"Package"])
if (y != "Biostrings")
install.packages(y) else
BiocManager::install("Biostrings")
# load package
try(require(y, character.only = T), silent = T)
})
Functions and parameters
# recalculate
recal <- TRUE
last_day <- TRUE
save.csv <- FALSE
random <- TRUE
reps <- 2000
#functions and parameters
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_chunk$set(dpi = 200)
# ggplot2 theme
theme_set(theme_classic(base_size = 90))
read_excel_df <- function(...) data.frame(read_excel(...))
# file name for experiment sequence
file.name <- paste("./data/raw/combinations_", Sys.time(), ".csv", sep = "")
Load data
# read data
grp <- read_excel_df("./data/raw/GC_lab_thyroptera_database.xlsx", sheet = "group_capture_data")
grp$date <- as.Date(grp$date, format = "%m/%d/%Y")
ind <- read_excel_df("./data/raw/GC_lab_thyroptera_database.xlsx", sheet = "individual_capture data")
ind <- ind[!is.na(ind$transponder_ID), ]
adult_ind <- ind[ind$age == "adult", ]
if (last_day)
adult_ind <- adult_ind[adult_ind$group_capture_ID %in% grp$group_capture_ID[grp$date %in% max(grp$date)], ]
# else adult_ind <- adult_ind_all
dyad_vocal_exp <- read_excel_df("./data/raw/pairwise_vocal_response_experiment.xlsx")
dyad_vocal_exp <- dyad_vocal_exp[!is.na(dyad_vocal_exp$Group), ]
dyad_vocal_exp$replicate <- ifelse(duplicated(dyad_vocal_exp$dyad), 2, 1)
dyad_vocal_exp$dyad.replicate <- paste(dyad_vocal_exp$dyad, dyad_vocal_exp$replicate, sep = "-")
# remove those than need to be repeated
dyad_vocal_exp <- dyad_vocal_exp[dyad_vocal_exp$Status == "done", ]
Last day recorded is 2021-07-30
Adult individuals caught on the last day on record
kb <-kable(adult_ind[, c( "group_ID", "transponder_ID")], row.names = FALSE, align = "c")
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
group_ID
|
transponder_ID
|
5
|
982126058484290
|
5
|
900200000279415
|
5
|
982126051278549
|
9
|
982126051278521
|
9
|
982126058484263
|
9
|
982126058484300
|
9
|
982126058484315
|
6
|
982126051278470
|
6
|
900200000279470
|
6
|
900200000279506
|
6
|
900200000279517
|
6
|
982126057845067
|
6
|
982126057845225
|
11
|
982126058484299
|
11
|
982126058484302
|
7
|
982000359237615
|
7
|
982126051278504
|
7
|
982126051278564
|
7
|
982126052945896
|
7
|
982126058484305
|
dyads <- lapply(unique(adult_ind$group_ID), function(i){
# print(i)
indvs <- adult_ind$transponder_ID[adult_ind$group_ID == i]
indvs <- unique(indvs[!is.na(indvs)])
combs <- combn(indvs, 2)
df1 <- data.frame(t(combs))
df2 <- df1[, 2:1]
names(df1) <- names(df2) <- c("fly", "leaf")
dfrep1 <- rbind(df1, df2)
dfrep1$replicate <- 2
dfrep1 <- dfrep1[sample(1:nrow(dfrep1)), ]
dfrep2 <- dfrep1
dfrep2$replicate <- 1
df <- rbind(dfrep2, dfrep1)
df$group <- i
# df <- df[, c(3, 1, 2)]
df$dyad <- paste(df$fly, df$leaf, sep = "-")
df$dyad.rep <- paste(df$dyad, df$replicate, sep = "-")
donegrp <- dyad_vocal_exp[dyad_vocal_exp$Group == i, ]
df$status <- ifelse(df$dyad.rep %in% donegrp$dyad.replicate, "done", "not done")
df <- df[order(df$status), ]
rownames(df) <- 1:nrow(df)
return(df)
# print(df3)
})
names(dyads) <- unique(adult_ind$group_ID)
dyads_missing <- lapply(dyads, function(x){
miss <- x[x$status == "not done", ]
out <- if (nrow(miss) == 0) "All done!" else miss
return(out)
})
names(dyads_missing) <- names(dyads)
dyads_missing <- dyads_missing[sapply(dyads_missing, is.data.frame)]
# optimize so individuals are not tested more than twice for each experiment (fly or leaf)
if (recal){
best_seq_by_group <- pblapply(dyads_missing, cl = 1, function(x){
# print(x$group[5])
out <- lapply(1:reps, function(w){
if (!random)
set.seed(w)
x <- x[sample(1:nrow(x)), ]
x$max_fly <- c(1, sapply(2:nrow(x), function(y)
max(table(x$fly[1:y]))
)
)
x$max_leaf <- c(1, sapply(2:nrow(x), function(y)
max(table(x$leaf[1:y]))
)
)
x$max_repeated_experiment <- x$max_repeated_experiment_dummy <- apply(x[, c("max_fly", "max_leaf")], 1, max)
x$day <- 1
for(e in 1:(nrow(x) - 1)){
if(x$max_repeated_experiment_dummy[e] == 3) {
x$day[e:nrow(x)] <- x$day[e - 1] + 1
x$max_fly[(e):nrow(x)] <- sapply(e:nrow(x), function(y)
max(table(x$fly[e:y])))
x$max_leaf[(e):nrow(x)] <- sapply(e:nrow(x), function(y)
max(table(x$leaf[e:y])))
# x$max_repeated_experiment_dummy[e:nrow(x)] <- x$max_repeated_experiment_dummy[e:nrow(x)] - (x$max_repeated_experiment_dummy[e] - 1)
x$max_repeated_experiment <- x$max_repeated_experiment_dummy <- apply(x[, c("max_fly", "max_leaf")], 1, max)
}
}
x$day[nrow(x)] <- if (x$max_repeated_experiment_dummy[nrow(x)] > 2) max(x$day) + 1 else max(x$day)
# sort so day with more experiments go first
if (length(unique(x$day)) > 1){
dat_cnt <- table(x$day)
dat_cnt <- sort(dat_cnt, decreasing = TRUE)
x$day <- sapply(1:nrow(x), function(o) names(dat_cnt[x$day[o]]))
}
x <- x[order(x$day, x$replicate, decreasing = FALSE),]
x$rep <- w
x$penalization <- c(0, sapply(2:nrow(x), function(u) if(any(c(x$fly[u], x$leaf[u]) %in% c(x$fly[u - 1], x$leaf[u - 1])) & x$day[u] == x$day[u-1]) 1 else 0))
x$individual_in_previous_experiment <- ifelse(x$penalization > 0, paste(x$penalization, "bat(s)"), "")
# x$penalization <- x$penalization + x$replicate
return(list(summary = data.frame(group = x$group[1], rep = w, min_days = max(x$day), penalization = sum(x$penalization), mean_rep_day1 = mean(x$replicate[x$day == 1])), data.frame = x))
})
# dfs <- lapply(out, `[[`, 2)
# names(dfs) <- 1:reps
res <- do.call(rbind, lapply(out, `[[`, 1))
res <- res[order(res$min_days, res$mean_rep_day1, res$penalization, decreasing = FALSE), ]
return(out[[res$rep[1]]][[2]])
})
names(best_seq_by_group) <- names(dyads_missing)
saveRDS(best_seq_by_group, "./data/raw/best_combinations_temporary.RDS")
} else
best_seq_by_group <- readRDS("./data/raw/best_combinations_temporary.RDS")
best_exp_seq <- do.call(rbind, best_seq_by_group)
Optimized sequence for several groups for 30-06-2021 (today)
first_day <- best_exp_seq[best_exp_seq$day == "1", c("group", "replicate","fly", "leaf")]
first_day_combinations <- lapply(1:reps, function(u){
x <- first_day[sample(1:nrow(first_day)), ]
x <- x[order(x$replicate, decreasing = FALSE), ]
x$penalization <- c(0, sapply(2:nrow(x), function(u) if(any(c(x$fly[u], x$leaf[u]) %in% c(x$fly[u - 1], x$leaf[u - 1]))) 1 else 0))
x$individual_in_previous_experiment <- ifelse(x$penalization > 0, paste(x$penalization, "bat(s)"), "")
return(list(summary = data.frame(rep = u, penalization = sum(x$penalization)), data.frame = x))
})
res <- do.call(rbind, lapply(first_day_combinations, `[[`, 1))
res <- res[order(res$penalization, decreasing = FALSE), ]
# best
best <- first_day_combinations[[res$rep[1]]][[2]]
best$sound_file <- ""
kb <-kable(best[, c("group", "sound_file", "fly", "leaf", "individual_in_previous_experiment", "replicate")], row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
group
|
sound_file
|
fly
|
leaf
|
individual_in_previous_experiment
|
replicate
|
7
|
|
982126051278504
|
982000359237615
|
|
1
|
9
|
|
982126058484315
|
982126058484263
|
|
1
|
7
|
|
982126051278564
|
982126051278504
|
|
1
|
6
|
|
982126051278470
|
900200000279470
|
|
1
|
5
|
|
982126058484290
|
900200000279415
|
|
1
|
7
|
|
982126058484305
|
982126051278504
|
|
1
|
6
|
|
900200000279506
|
900200000279470
|
|
1
|
5
|
|
900200000279415
|
982126051278549
|
|
1
|
6
|
|
982126057845067
|
982126051278470
|
|
1
|
9
|
|
982126058484300
|
982126051278521
|
|
1
|
7
|
|
982126052945896
|
982126058484305
|
|
1
|
9
|
|
982126058484263
|
982126058484315
|
|
2
|
11
|
|
982126058484302
|
982126058484299
|
|
2
|
9
|
|
982126058484300
|
982126051278521
|
|
2
|
5
|
|
982126058484290
|
982126051278549
|
|
2
|
7
|
|
982000359237615
|
982126052945896
|
|
2
|
6
|
|
900200000279470
|
900200000279506
|
|
2
|
7
|
|
982126052945896
|
982126051278564
|
|
2
|
11
|
|
982126058484299
|
982126058484302
|
|
2
|
6
|
|
900200000279506
|
982126057845067
|
|
2
|
9
|
|
982126058484263
|
982126058484300
|
|
2
|
5
|
|
982126051278549
|
982126058484290
|
|
2
|
6
|
|
982126051278470
|
982126057845067
|
|
2
|
7
|
|
982126051278564
|
982000359237615
|
|
2
|
Counts per individual from previous table
- No individual should have counts higher than 2!
For flights
fly_cnt <- as.data.frame(table(best$fly))
leaf_cnt <- as.data.frame(table(best$leaf))
names(fly_cnt) <- names(leaf_cnt) <- c("ID", "Counts")
kb <-kable(fly_cnt, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
ID
|
Counts
|
900200000279415
|
1
|
900200000279470
|
1
|
900200000279506
|
2
|
982000359237615
|
1
|
982126051278470
|
2
|
982126051278504
|
1
|
982126051278549
|
1
|
982126051278564
|
2
|
982126052945896
|
2
|
982126057845067
|
1
|
982126058484263
|
2
|
982126058484290
|
2
|
982126058484299
|
1
|
982126058484300
|
2
|
982126058484302
|
1
|
982126058484305
|
1
|
982126058484315
|
1
|
For leaves
kb <-kable(leaf_cnt, row.names = FALSE)
kb <- kable_styling(kb, bootstrap_options = c("striped", "hover", "condensed", "responsive"))
print(kb)
ID
|
Counts
|
900200000279415
|
1
|
900200000279470
|
2
|
900200000279506
|
1
|
982000359237615
|
2
|
982126051278470
|
1
|
982126051278504
|
2
|
982126051278521
|
2
|
982126051278549
|
2
|
982126051278564
|
1
|
982126052945896
|
1
|
982126057845067
|
2
|
982126058484263
|
1
|
982126058484290
|
1
|
982126058484299
|
1
|
982126058484300
|
1
|
982126058484302
|
1
|
982126058484305
|
1
|
982126058484315
|
1
|
R session information
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_CR.UTF-8 LC_COLLATE=pt_BR.UTF-8
## [5] LC_MONETARY=es_CR.UTF-8 LC_MESSAGES=pt_BR.UTF-8
## [7] LC_PAPER=es_CR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_CR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] baRulho_1.0.5 warbleR_1.1.27 NatureSounds_1.0.4 seewave_2.1.6
## [5] tuneR_1.3.3 viridis_0.6.1 viridisLite_0.4.0 kableExtra_1.3.4
## [9] knitr_1.33 readxl_1.3.1 ggplot2_3.3.3 pbapply_1.4-3
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.6 svglite_2.0.0 fftw_1.0-6 assertthat_0.2.1
## [5] digest_0.6.27 utf8_1.2.1 R6_2.5.0 cellranger_1.1.0
## [9] signal_0.7-7 evaluate_0.14 httr_1.4.2 highr_0.9
## [13] pillar_1.6.1 rlang_0.4.11 rstudioapi_0.13 jquerylib_0.1.4
## [17] rmarkdown_2.8 webshot_0.5.2 stringr_1.4.0 RCurl_1.98-1.3
## [21] munsell_0.5.0 proxy_0.4-26 compiler_4.0.5 xfun_0.24
## [25] pkgconfig_2.0.3 systemfonts_1.0.2 htmltools_0.5.1.1 tidyselect_1.1.1
## [29] tibble_3.1.2 gridExtra_2.3 dtw_1.22-3 fansi_0.4.2
## [33] crayon_1.4.1 dplyr_1.0.6 withr_2.4.2 MASS_7.3-54
## [37] bitops_1.0-7 grid_4.0.5 jsonlite_1.7.2 gtable_0.3.0
## [41] lifecycle_1.0.0 DBI_1.1.1 magrittr_2.0.1 scales_1.1.1
## [45] stringi_1.6.2 xml2_1.3.2 bslib_0.2.5.1 ellipsis_0.3.2
## [49] generics_0.1.0 vctrs_0.3.8 rjson_0.2.20 tools_4.0.5
## [53] glue_1.4.2 purrr_0.3.4 yaml_2.2.1 colorspace_2.0-1
## [57] rvest_1.0.0 sass_0.4.0