Testing the ability of the IndEffectTest function to detect ISF when different individuals have different ammounts of data. The test was developed for the Detecting recurrent sources of variability in animal tracking studies manuscript by Virginia Morera-Pujol et al. submitted to Ecological Applications in 2021
Here, we generate 9 different datasets of a hundred tracks of a hundred positions each (10000 rows in total) with different individual sampling imbalances:
91:1:1:1:1:1:1:1:1:1
46:46:1:1:1:1:1:1:1:1
31:31:31:1:1:1:1:1:1:1
23:23:23:23:1:1:1:1:1:1
19:19:19:19:19:1:1:1:1:1
16:16:16:16:16:16:1:1:1:1
13:13:13:13:13:13:13:1:1:1
12:12:12:12:12:12:12:12:1:1
11:11:11:11:11:11:11:11:11:1
10:10:10:10:10:10:10:10:10:10
We will generate the different simulated datasets following the instructions specified in the Demo vignette, so the different steps are not explained here again, just shown.
CRW <- species(state.CRW(0.4))
set.seed(1234)
test <- simulate(CRW, coords = c(0, 0), time = 100)
# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -50, xmx = 50, ymn = -50, ymx = 50, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")
# create an vector of random angles
origin_points <- data.frame(x = runif(10, -25, 25), y = runif(10, -25, 25))
# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()
for (i in 1:10) {
# i = 1
# First we need to create a template trip to get the UD for the resistance
# surface
set.seed(12)
template <- simulate(CRW, coords = c(origin_points[i, 1], origin_points[i, 2]),
time = 100)
template_tracks[i] <- list(template)
# lines(template, col = brewer.pal(10,'Set3')[i]) points(template, col =
# brewer.pal(10,'Set3')[i], pch = 16)
spTemplate <- SpatialPoints(coords = template[, 1:2], proj4string = baseGrid@proj4string)
KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid)
UD50 <- getverticeshr(KDE, percent = 50)
template_UDs[i] <- list(UD50)
}
We generate the simulated datasets and run the IndEffectTest functin steps inside a loop for all the specified unbalanced datasets. We will simulate a high ISF, with a resistance value of 1 outside the “allowed” area.
repeated_birds <- c(1:9)
list_of_results <- list()
plotlist <- list()
for(j in repeated_birds) {
# j = 1
cat(j, "repeated bird(s) \n")
set.seed(j)
x <- sample(1:10, j, replace = FALSE)
xlist <- list(CRW)
splist <- rep(xlist, floor((100-(10-j))/j))
simulated_tracks <- list()
# simulate tracks for each individual
for (i in x) {
# i = 3
res_surface <- resistanceFromShape(template_UDs[[i]], baseRaster = baseRaster,
field = 0, background = 1)
# plot(res_surface)
sim_tracks <- simulate(splist,
start.resistance = 0,
time = 100, resist = res_surface)
colnames(sim_tracks) <- paste(rep(c("x", "y", "z"), floor((100-(10-j))/j)),
rep(1:floor((100-(10-j))/j), each = 3),
sep = "_")
sim_tracks <- sim_tracks %>%
as.data.frame()
x_vector <- sim_tracks %>%
dplyr::select(contains('x')) %>%
mutate(posID = seq(1:100)) %>%
pivot_longer(!posID, names_to = "track_coord", values_to = "degrees") %>%
separate(track_coord, into = c("coord", "trackID")) %>%
as.data.frame() %>%
dplyr::select(trackID, posID, x = degrees)
y_vector <- sim_tracks %>%
dplyr::select(contains('y')) %>%
mutate(posID = seq(1:100)) %>%
pivot_longer(!posID, names_to = "track_coord", values_to = "degrees") %>%
separate(track_coord, into = c("coord", "trackID")) %>%
as.data.frame() %>%
dplyr::select(trackID, posID, y = degrees)
sim_tracks2 <- x_vector %>%
left_join(y_vector) %>%
arrange(trackID, posID)
spSim <- SpatialPointsDataFrame(coords = sim_tracks2[,3:4],
data = data.frame(TrackID = paste(rep(i, nrow(sim_tracks2)),
sim_tracks2$trackID, sep = "_"),
BirdID = rep(i, nrow(sim_tracks2))))
spSim@proj4string <- baseGrid@proj4string
simulated_tracks[i] <- list(spSim)
spSim$BirdID <- NULL
# KUD_sim <- kernelUD(spSim, grid = baseGrid)
# print(mean(kerneloverlap(spSim, method = "BA", percent = 50, conditional = T)))
# sp::plot(spSim, col = factor(spSim$TrackID), pch = 20, cex = 0.5)
}
y <- 1:10
for (i in y[-x]) {
res_surface <- resistanceFromShape(template_UDs[[i]], baseRaster = baseRaster,
field = 0, background = 0.5)
# plot(res_surface)
sim_tracks <- simulate(xlist,
start.resistance = 0,
time = 100, resist = res_surface)
xcoords <- as.vector(sim_tracks[,1])
ycoords <- as.vector(sim_tracks[,2])
spSim <- SpatialPointsDataFrame(coords = cbind(xcoords, ycoords),
data = data.frame(TrackID = paste(i,
rep(1, each = 100),
sep = "_"),
BirdID = rep(i, 100)))
spSim@proj4string <- baseGrid@proj4string
simulated_tracks[i] <- list(spSim)
spSim$BirdID <- NULL
# KUD_sim <- kernelUD(spSim, grid = baseGrid)
}
list.df <- lapply(seq_along(simulated_tracks),
function(i){simulated_tracks[i][[1]]})
# apply rbind to the list
simulated_dataset <- do.call("rbind",
list.df)
simulated_dataset@proj4string <- baseGrid@proj4string
sds <- data.frame(TrackID = simulated_dataset$TrackID,
BirdID = simulated_dataset$BirdID,
xcoords = simulated_dataset@coords[,1],
ycoords = simulated_dataset@coords[,2])
sds$BirdID <- NULL
sds <- sds %>%
dplyr::group_by(TrackID) %>%
dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'),
length.out = n())) %>%
mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>%
dplyr::select(TrackID,
Longitude = xcoords, Latitude = ycoords,
datetime = Date_Time) %>%
ungroup() %>%
as.data.frame()
message_filename = paste("j", j, 'script_messages.txt', sep = "_")
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")
sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")
FIT.list <- list()
before <- Sys.time()
for(i in 1:length(sd_tel)){
print(i)
one <- sd_tel[[i]]
# 2
GUESS <- ctmm.guess(one, interactive=F)
# 3
FITS <- ctmm.select(one, GUESS, trace=TRUE, verbose=TRUE, cores=-1)
print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
FIT.list[[i]] <- FITS[[1]]
}
sink(file = NULL, type="message")
sink(file = NULL, type="output")
# closeAllConnections()
Sys.time() - before
# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)
# save(FIT.list, ov, file = paste("powerTest". j, ".RDS"))
# we select the "middle layer" of the array which contains the point estimate (for now ignoring the CI)
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA
Result1 <- list()
Result1[1] <- list(list(X05, Xlo, Xhi))
# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid
# separate within (WI) and between (BW) group overlaps
# median
WI05 <- NULL
BW05 <- NULL
# lower
WIlo <- NULL
BWlo <- NULL
# higher
WIhi <- NULL
BWhi <- NULL
for (i in seq_along(rownames(X05))) {
# i = 1
# medians
x1 <- X05[i,]
x2 <- x1[which(names(x1) == rownames(X05)[i])]
x3 <- x1[which(names(x1) != rownames(X05)[i])]
WI05 <- c(WI05, x2)
BW05 <- c(BW05, x3)
# BW intervals
x1 <- Xlo[i,]
x3 <- x1[which(names(x1) != rownames(Xlo)[i])]
BWlo <- c(BWlo, x3)
x1 <- Xhi[i,]
x3 <- x1[which(names(x1) != rownames(Xhi)[i])]
BWhi <- c(BWhi, x3)
}
# medians
BW05 <- BW05[!is.na(BW05)]
WI05 <- WI05[!is.na(WI05)]
# BW intervals
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]
# organize values in a dataframe for plotting
Overlaps <- data.frame(
Overlap = c(WI05, BW05),
Type = c(rep("Within", length(WI05)), rep("Between", length(BW05))),
Test = "ind_1"
)
Result1[2] <- list(Overlaps)
# plot boxplot
plot1 <- ggplot(data = Overlaps) +
geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) +
ggtitle("Density of within individual vs. between individual median overlaps") +
theme_bw()
Result1[3] <- list(plot1)
WI_median <- quantile(WI05, 0.5)
BWint <- data.frame(Low = BWlo,
High = BWhi)
BWint <- BWint %>%
mutate(Contains = "NA") %>%
mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))
proportions <- as.data.frame(prop.table(table(BWint$Contains)))
names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)
(proportions_wide <- spread(proportions, WI_Median_Position, Percentage))
Result1[4] <- list(proportions_wide)
list_of_results[j] <- list(Result1)
plotlist[j] <- list(plot1)
}
## 1 repeated bird(s)
## 2 repeated bird(s)
## 3 repeated bird(s)
## 4 repeated bird(s)
## 5 repeated bird(s)
## 6 repeated bird(s)
## 7 repeated bird(s)
## 8 repeated bird(s)
## 9 repeated bird(s)
grid.arrange(grobs = plotlist, layout_matrix = rbind(c(1, 2, 3), c(4, 5, 6), c(7,
8, 9)))
Wether there’s a strong unbalance (1 individual repeatedly sampled and the rest only sampled once) or data are more even (9 individuals repeatedly sampled, only one sampled only once), the function is able to detect individual site fidelity