This document contains all the supplementary material of the manuscript Detecting recurrent sources of variability in animal tracking studies by Virginia Morera-Pujol et al. submitted to Ecological Applications in 2021.

1 Simulating animal tracks

This section shows how to generate, using the package SIMRiv, simulated animal movement tracks to test the detection of Individual Site Fidelity by the test developed for the MS “Detecting recurrent sources of variability in animal tracking studies”, by Virginia Morera-Pujol et al. (2020)

1.1 Simulate template track

First, simulate a correlated RW. We’ve tried to simulate a Lévy Walk but since ideally we’re not using commuting points in our real-data analysis, CRW looks more similar to a real dataset

CRW15 <- species(state.CRW(0.25) + 7)
simCRW <- simulate(CRW15, coords = matrix(rep(0, 2), ncol = 2), time = 10000)
plot(simCRW[,1:2], type = "l", 
     xlim = c(min(simCRW[,1]), max(simCRW[,1])), 
     ylim = c(min(simCRW[,2]), max(simCRW[,2])), 
     main = "Simulation of a correlated random walk with 10,000 steps")

spSimCRW <- SpatialPoints(coords = simCRW[,1:2])

1.2 Turn template into resistance surface

Then, generate a convex hull of that correlated RW

example_MCP <- mcp(spSimCRW, percent = 100)
plot(example_MCP, main = "100% Minimum convex polygon of the simulated CRW")
plot(spSimCRW, add = T, pch = 20, cex = 0.5, col = "gray")

Convert this convex hull into a resistance surface from SiMRiv to limit distribution of newly simulated tracks

res_surface <- resistanceFromShape(example_MCP, res = 1, margin = c(500, 500))
plot(res_surface, main = "Resistance surface generated from the previous MCP")

1.3 Simulate tracks within the resistance surface

Now simulate tracks within the resistance shape

simCRW_res <- simulate(list(CRW15, CRW15, CRW15, CRW15), coords = matrix(rep(0, 8), 
                                                                         ncol = 2), 
                       time = 10000, resist = res_surface)
spSimCRW_res <- SpatialPointsDataFrame(coords = cbind(c(simCRW_res[,1], simCRW_res[,4], 
                                                        simCRW_res[,7], simCRW_res[,10]),
                                                      c(simCRW_res[,2], simCRW_res[,5], 
                                                        simCRW_res[,8], simCRW_res[,11])),
                                       data = data.frame(ID = c(rep(1, 10000), 
                                                                rep(2, 10000), 
                                                                rep(3, 10000),
                                                                rep(4, 10000))))

plot(res_surface, 
     main = "Simulated tracks within the MCP \nthrough the resistance surface")
plot(spSimCRW_res, pch = 20, cex = 0.5, add = T, col = spSimCRW_res$ID)

This is the basic workflow to generate simulated datasets displaying individual site fidelity. Different levels of fidelity can be simulated by changing the resistance values outside the convex hull. The higher the resistance, the more intense the “fidelity”. We are going to use it to generate different types of data to test aspects of the indEffectTest function developed for the manuscript.

2 Individual site fidelity test

Using the procedure outlined in the previous section, we’ll generate first a dataset of 10 individuals with 10 trips each, with different origins, and then a dataset of 10 individuals with 10 trips each with the same origin (central place forager scenario) to test how the indEffect_test() function works.

2.1 For animals with different origins

2.1.1 Generating the data

We use a value for resistance = 1, which means we’re simulating maximum ISF (simulated tracks are forced within the template area of no resistance because resistance outside the template area is 1).

set.seed(123)
# set movement parameters of the "species". 
# We're going to configure a Correlated Random walker
CRW <- species(state.CRW(0.3))
test <- simulate(CRW, coords = c(10, 15), time = 100)

# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -50, xmx = 50, ymn = -50, ymx = 50, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")
baseGrid@proj4string <- CRS(as.character(NA))

# generate 10 random origin points within a 100000x100000 square, one per "individual"
origin_points <- data.frame(x = runif(10, -25, 25), 
                            y = runif(10, -25, 25))

# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()
resistance_surfaces <- list()
simulated_tracks <- list()

for (i in 1:10) {
  # i = 2
  # First we need to create a template trip to get the UD for the resistance surface
  template <- simulate(CRW, coords = c(origin_points[i,1], origin_points[i, 2]), 
                       time = 100)
  template_tracks[i] <- list(template)
  
  spTemplate <- SpatialPoints(coords = template[,1:2], proj4string = baseGrid@proj4string)
  KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid, extent = 2) 
  UD50 <- getverticeshr(KDE, percent = 50)
  template_UDs[i] <- list(UD50) 
  
  # Second, we create the resistance surface
  res_surface <- resistanceFromShape(UD50, baseRaster = baseRaster, field = 0, 
                                     background = 1)
  resistance_surfaces[i] <- list(res_surface)
  
  # Third, we simulate 10 tracks from each surface (i.e. from each individual)
  sim_tracks <- simulate(list(CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW), 
                         start.resistance   = 0,
                         time = 100, resist = res_surface)
  spSim <- SpatialPointsDataFrame(coords = cbind(c(sim_tracks[,1], sim_tracks[,4], 
                                                   sim_tracks[,7], sim_tracks[,10], 
                                                   sim_tracks[,13],sim_tracks[,16],
                                                   sim_tracks[,19], sim_tracks[,22], 
                                                   sim_tracks[,25], sim_tracks[,28]),
                                               c(sim_tracks[,2], sim_tracks[,5], 
                                                 sim_tracks[,8], sim_tracks[,11], 
                                                 sim_tracks[,14], sim_tracks[,17],
                                                 sim_tracks[,20], sim_tracks[,23], 
                                                 sim_tracks[,26], sim_tracks[,29])), 
                                data = data.frame(TrackID = paste(i, 
                                                                  rep(1:10, each = 100),
                                                                  sep = "_"), 
                                                  BirdID = rep(i, 1000)))
  spSim@proj4string <- baseGrid@proj4string
  simulated_tracks[i] <- list(spSim)
  spSim$BirdID <- NULL
  KUD_sim <- kernelUD(spSim, grid = baseGrid)
  
}

Now the list simulated_tracks has 10 elements, each of them containing the 10 tracks belonging to each “individual”. We need to reformat that in a way that we can proceed with the Individual Effect Test

list.df <- lapply(seq_along(simulated_tracks),
                  function(i){simulated_tracks[i][[1]]})

# apply rbind to the list
simulated_dataset <- do.call("rbind",
                             list.df)
# assign null projection as X/Y values don't mean anything in this case
simulated_dataset@proj4string <- CRS(as.character(NA))

2.1.2 Visualising the data

# first create a version of the data without BirdID to calculate trip overlaps 
sds <- as.data.frame(simulated_dataset)

# convert it in order to be able to plot it 
sdf <- sds %>% 
  st_as_sf(coords = c("coords.x1", "coords.x2")) %>% 
  tidyr::separate(TrackID, into = c("BirdID", "TrackID"), sep = "_") %>% 
  mutate(BirdID = factor(BirdID, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))

col1 <- ggplot() + 
  geom_sf(data = sdf, aes(col = BirdID), alpha = 0.3, size = 0.7) + 
  xlab("Longitude") + ylab("Latitude") +
  # scale_shape_manual(values = 1:10, guide = "none") +
  theme_bw() + 
  theme(legend.position = "right", legend.box = "horizontal") + 
  guides(col = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  # scale_shape(guide = "none") +
  ggtitle("Simulated positions of 10 non-colonial animals") + 
  NULL 
print(col1)

2.1.3 Testing the function

We now run a simplified version of the function IndEffectTest without all the code re the spatial projection, crossing datelines, etc. and as plain code outside the function for easy tweaking.

2.1.3.1 Calculate akde for all tracks and pairwise overlaps

# To calculate the overlap we:
# 1 - Turn simulated tracks into "telemetry" objects"
# 2 - fit a guesstimate movement model to each track
# 3 - using the parameters of the guesstimate, fit a series of competing movement models
# 4 - Calculate the debiased BA between top-selected models for each track
# (uncertainty from the model fits is propagated into the overlap estimate under the
#    approximation that the BA is a chi-square random variable)

# 1
sds %>% 
  dplyr::group_by(TrackID) %>%
  dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'),
                                length.out = 100)) %>% 
  mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
  dplyr::select(individual.local.identifier = TrackID, 
                Longitude = coords.x1, Latitude = coords.x2, 
                datetime = Date_Time) %>% 
  ungroup() -> sds
sds <- as.data.frame(sds)

sdst <- sds %>% 
  filter(str_detect(individual.local.identifier, "^7")) 

# to remove output from console
message_filename = 'ISF_test_fun1_run_console_output.txt'
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")

sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")

# FIT.list <- list()
cl <- makeCluster(10, type="PSOCK") ## specify number of cores, and type of cluster (PSOCK or FORK)
registerDoParallel(cl) ## register the backend into a virtual node

before <- Sys.time()

## Execute parallel operation
FIT.list <- foreach(i = 1:length(sd_tel), .packages=c("dplyr", "ctmm", "ggplot2", "lubridate")) %dopar% {
  # print(i)
  one <- sd_tel[[i]]
  # 2
  GUESS <- ctmm.guess(one, interactive=F)
  # 3
  FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
  print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
  x <- FITS[[1]]
}
sink()

Sys.time() - before

# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)
save(FIT.list, ov, file = "testing_function.RDS")

2.1.3.2 Obtain median, lower and upper bounds of CI of the overlap and name the matrices with the “BirdID”

The array ov contains three “layers”, with the median of the overlap distribution in the middle one, and the upper and lower bounds of the CI selected in de function in the first and last. We extract the three matrices from there

load("testing_function.RDS")
# separate the layers of the median and CI
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]

# remove lower triangle of each (repeated values since it's a symetric matrix)
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA


# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid

2.1.3.3 Separate within (WI) and between (BW) group overlaps

The function works by testing how many of the Between Individual overlap CIs contain the median of all the within individual overlaps. It provides three values, the proportion of between individual overlap CIs that contain the median of the WI overlaps, the proportion of BW overlap CIs in which the median of the WI overlaps falls outside of the BW and is higher (above) and the proportion in which is lower (below).

# median
WI05 <- NULL
BW05 <- NULL

# BW lower and higher CI bounds
BWlo <- NULL
BWhi <- NULL

for (i in seq_along(rownames(X05))) {
  # i = 2
  
  # medians
  x1 <- X05[i,] 
  x2 <- x1[which(names(x1) == rownames(X05)[i])]
  x3 <- x1[which(names(x1) != rownames(X05)[i])]
  
  WI05 <- c(WI05, x2)
  BW05 <- c(BW05, x3)
  
  # BW intervals
  y1 <- Xlo[i,] 
  y2 <- y1[which(names(y1) != rownames(Xlo)[i])]
  BWlo <- c(BWlo, y2)
  
  z1 <- Xhi[i,] 
  z2 <- z1[which(names(z1) != rownames(Xhi)[i])]
  BWhi <- c(BWhi, z2)
  
}

# remove NAs
WI05 <- WI05[!is.na(WI05)]
BW05 <- BW05[!is.na(BW05)]
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]

# organize values in a dataframe for plotting
Overlaps <- data.frame(
  Overlap = c(WI05, BW05),
  Type = c(rep("Within", length(WI05)), rep("Between", length(BW05)))
)

2.1.4 Visualising results

First of all we visualise the different densities of the WI and BW medians (without taking into account CI of individual overlaps)

(plot <- ggplot(data = Overlaps) + 
  geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
  ggtitle("Density of within individual vs. between individual median overlaps") +
  theme_bw())

2.1.5 Calculating the WBR (Within-Between ratio)

Now we calculate the median of the WI overlaps, to use as reference values (see if it is contained, above or below the BW CIs)

WI_median <- quantile(WI05, 0.5)

BWint <- data.frame(Low = BWlo,
                    High = BWhi)

Finally we produce a one row dataset containing the proportion of Between individual overlaps in which the Within Individual median is below, inside or above the CI.

BWint <- BWint %>% 
    mutate(Contains = "NA") %>% 
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))
  
proportions <- as.data.frame(prop.table(table(BWint$Contains)))

names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)

proportions_wide <- spread(proportions, WI_Median_Position, Percentage)

proportions_wide %>% 
  kable(digits = 2, align = "rrrr") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Above (WBR) NotAbove
83.09 16.91

The Within-Between Ratio (WBR) is the percenatge of the cases in which the median of the Within Individual overlaps falls above the 95% CI of the between individual overlap distribution. A high WBR indicates that, when run on an artificial dataset, purposely simulating a high degree of individual site fidelity, our function is able to detect individual site fidelity in the simulated dataset.

2.2 For animals with the same origin but different outward angles

2.2.1 Generating the data

In case of foraging trips of central place foragers, all trips will have the same origin. We create a dataset that simulates those conditions too, to see if the indEffectTest function is still able to detect individual site fidelity.

Again, we simulate the dataset with resistance = 1, which will simulate high ISF conditions

set.seed(2106)
CRW <- species(state.CRW(0.2))
test <- simulate(CRW, coords = c(0, 0), time = 100)

# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -50, xmx = 50, ymn = -50, ymx = 50, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")

# create a vector of random angles
angles <- runif(10, 0, 360)
# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()
resistance_surfaces <- list()
simulated_tracks <- list()

for (i in 1:10) {
  # i = 1
  
  # First we need to create a template trip to get the UD for the resistance surface
  template <- simulate(CRW, coords = c(0, 0), angles = angles[i], time = 100)
  template_tracks[i] <- list(template)
  spTemplate <- SpatialPoints(coords = template[,1:2], proj4string = baseGrid@proj4string)
  KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid) 
  UD50 <- getverticeshr(KDE, percent = 50)
  template_UDs[i] <- list(UD50) 
  # plot(KDE)
  # plot(UD50, add = T)
  # plot(spTemplate, pch = 20, cex = 0.5, add = T)
  
  # Second, we create the resistance surface
  res_surface <- resistanceFromShape(UD50, baseRaster = baseRaster, field = 0, 
                                     background = 1)
  resistance_surfaces[i] <- list(res_surface)
  # plot(res_surface)
  
  # Third, we simulate 10 tracks from each surface (i.e. from each individual)
  sim_tracks <- simulate(list(CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW), 
                         start.resistance   = 0,
                         time = 100, resist = res_surface)
  spSim <- SpatialPointsDataFrame(coords = cbind(c(sim_tracks[,1], sim_tracks[,4],
                                                   sim_tracks[,7], sim_tracks[,10],
                                                   sim_tracks[,13], sim_tracks[,16],
                                                   sim_tracks[,19], sim_tracks[,22],
                                                   sim_tracks[,25],sim_tracks[,28]),
                                                 c(sim_tracks[,2], sim_tracks[,5],
                                                   sim_tracks[,8], sim_tracks[,11],
                                                   sim_tracks[,14],sim_tracks[,17],
                                                   sim_tracks[,20], sim_tracks[,23],
                                                   sim_tracks[,26], sim_tracks[,29])), 
                                  data = data.frame(TrackID = paste(i, rep(1:10, 
                                                                           each = 100),
                                                                    sep = "_"), 
                                                  BirdID = rep(i, 1000)))
  spSim@proj4string <- baseGrid@proj4string
  simulated_tracks[i] <- list(spSim)
  spSim$BirdID <- NULL
  KUD_sim <- kernelUD(spSim, grid = baseGrid)
    # print(mean(kerneloverlap(spSim, method = "BA", percent = 50, conditional = T)))
  # plot(spSim, add = T, col = spSim$TrackID, pch = 20, cex = 0.5)
}

We adapt data to indEffectTest same as before

list.df <- lapply(seq_along(simulated_tracks),
                  function(i){simulated_tracks[i][[1]]})

# apply rbind to the list
simulated_dataset <- do.call("rbind",
                             list.df)

# assign null projection as X/Y values don't mean anything in this case
simulated_dataset@proj4string <- CRS(as.character(NA))

2.2.2 Visualising the data

# first create a version of the data without BirdID to calculate trip overlaps 
sds <- as.data.frame(simulated_dataset)

# convert it in order to be able to plot it 
sdf <- sds %>% 
  st_as_sf(coords = c("coords.x1", "coords.x2")) %>% 
  tidyr::separate(TrackID, into = c("BirdID", "TrackID"), sep = "_") %>% 
  mutate(BirdID = factor(BirdID, levels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10")))

col2 <- ggplot() + 
  geom_sf(data = sdf, aes(col = BirdID), alpha = 0.3, size = 0.7) + 
  xlab("Longitude") + ylab("Latitude") +
  # scale_shape_manual(values = 1:10) +
  theme_bw() + 
  theme(legend.position = "right", legend.box = "horizontal") + 
  guides(col = guide_legend(override.aes = list(size = 3, alpha = 1))) +
  ggtitle("Simulated positions of 10 colonial animals") + 
  NULL
print(col2)

2.2.3 Testing the function

2.2.3.1 Calculate akde for all tracks and pairwise overlaps

# To calculate the overlap we:
# 1 - Turn simulated tracks into "telemetry" objects"
# 2 - fit a guesstimate movement model to each track
# 3 - using the parameters of the guesstimate, fit a series of competing movement models
# 4 - Calculate the debiased BA between top-selected models for each track
# (uncertainty from the model fits is propagated into the overlap estimate under the
#     approximation that the BA is a chi-square random variable)

# 1
sds %>% 
  dplyr::group_by(TrackID) %>%
  dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'),
                                length.out = 100)) %>% 
  mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
  dplyr::select(individual.local.identifier = TrackID, 
                Longitude = coords.x1, Latitude = coords.x2, 
                datetime = Date_Time) %>% 
  ungroup() -> sds
sds <- as.data.frame(sds)

message_filename = 'script_messages.txt'
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")

sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")


# FIT.list <- list()
cl <- makeCluster(10, type="PSOCK") ## specify number of cores, and type of cluster (PSOCK or FORK)
registerDoParallel(cl) ## register the backend into a virtual node

before <- Sys.time()

## Execute parallel operation
FIT.list <- foreach(i = 1:length(sd_tel), .packages=c("dplyr", "ctmm", "ggplot2", "lubridate")) %dopar% {
  #  print(i)
  one <- sd_tel[[i]]
  # 2
  GUESS <- ctmm.guess(one, interactive=F)
  # 3
  FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
  print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
  x <- FITS[[1]]
}
sink()
Sys.time() - before

# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)

save(FIT.list, ov, file = "testing_function2.RDS")

2.2.3.2 Obtain median, lower and upper bounds of CI of the overlap and name the matrices with the “BirdID”

The array ov contains three “layers”, with the median of the overlap distribution in the middle one, and the upper and lower bounds of the CI selected in de function in the first and last. We extract the three matrices from there

load("testing_function2.RDS")
# separate the layers of the median and CI
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]

# remove lower triangle of each (repeated values since it's a symetric matrix)
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA

# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid

2.2.3.3 Separate within (WI) and between (BW) group overlaps

The function works by testing how many of the Between Individual overlap CIs contain the median of all the within individual overlaps. It provides three values, the proportion of between individual overlap CIs that contain the median of the WI overlaps, the proportion of BW overlap CIs in which the median of the WI overlaps falls outside of the BW and is higher (above) and the proportion in which is lower (below).

# mediana
WI05 <- NULL
BW05 <- NULL

# BW lower and higher CI bounds
BWlo <- NULL
BWhi <- NULL

for (i in seq_along(rownames(X05))) {
  # i = 2
  # medians
  x1 <- X05[i,] 
  x2 <- x1[which(names(x1) == rownames(X05)[i])]
  x3 <- x1[which(names(x1) != rownames(X05)[i])]
  
  WI05 <- c(WI05, x2)
  BW05 <- c(BW05, x3)
  
  # BW intervals
  y1 <- Xlo[i,] 
  y2 <- y1[which(names(y1) != rownames(Xlo)[i])]
  BWlo <- c(BWlo, y2)
  
  z1 <- Xhi[i,] 
  z2 <- z1[which(names(z1) != rownames(Xhi)[i])]
  BWhi <- c(BWhi, z2)
  
}

# remove NAs
WI05 <- WI05[!is.na(WI05)]
BW05 <- BW05[!is.na(BW05)]
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]

# organize values in a dataframe for plotting
Overlaps <- data.frame(
  Overlap = c(WI05, BW05),
  Type = c(rep("Within", length(WI05)), rep("Between", length(BW05)))
)

2.2.4 Visualising results

First of all we visualise the different densities of the WI and BW medians (without taking into account CI of individual overlaps)

(plot <- ggplot(data = Overlaps) + 
  geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
  ggtitle("Density of within individual vs. between individual median overlaps") +
  theme_bw())

2.2.5 Calculating the WBR (Within-Between ratio)

Now we calculate the median of the WI overlaps, to use as reference values (see if it is contained, above or below the BW CIs)

WI_median <- quantile(WI05, 0.5)

BWint <- data.frame(Low = BWlo,
                    High = BWhi)

Finally we produce a one row dataset containing the proportion of Between individual overlaps in which the Within Individual median is below, inside or above the CI.

BWint <- BWint %>% 
    mutate(Contains = "NA") %>% 
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))
  
proportions <- as.data.frame(prop.table(table(BWint$Contains)))

names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)

proportions_wide <- spread(proportions, WI_Median_Position, Percentage)

proportions_wide %>% 
  kable(digits = 2, align = "rrrr") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Above (WBR) NotAbove
62.69 37.31

The Within-Between Ratio (WBR) is the percenatge of the cases in which the median of the Within Individual overlaps falls above the 95% CI of the between individual overlap distribution. A high WBR indicates that, when run on an artificial dataset, purposely simulating a high degree of individual site fidelity, our function is able to detect individual site fidelity in the simulated dataset.

3 Power Test

In this section, I will run the indEffect_test() function over a sequence of datasets with increasing levels of individual site fidelity for data of both colonial and non-colonial simulated “species”. To simulate the different degrees of ISF we change the resistance of the layer outside the “allowed” polygon.

3.1 For animals with different origins

3.1.1 Generating the data

For more detail on this procedure, see the Demo_simulating_ISF_tracks in this repository

set.seed(2020)
CRW <- species(state.CRW(0.2))
test <- simulate(CRW, coords = c(0, 0), time = 100)

# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -50, xmx = 50, ymn = -50, ymx = 50, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")

# create an set of random origin points
origin_points <- data.frame(x = runif(10, -25, 25), 
                            y = runif(10, -25, 25))

# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()
resistance_surfaces <- list()
simulated_tracks <- list()

plot(1, xlim = c(-50, 50), ylim = c(-50, 50), type = "n", 
     main = "All the 'template' tracks")

# 
resistances <- c(1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0)

for (j in seq_along(resistances)) {
  # j = 5 
  res <- resistances[j]
  
  for (i in 1:10) {
    # i = 1
    
    # First we need to create a template trip to get the UD for the resistance surface
    template <- simulate(CRW, coords = c(origin_points[i,1], origin_points[i, 2]), 
                       time = 100)
    template_tracks[i] <- list(template)
    lines(template, col = brewer.pal(10,"Set3")[i])
    points(template, col = brewer.pal(10,"Set3")[i], pch = 16)
    spTemplate <- SpatialPoints(coords = template[,1:2], proj4string = baseGrid@proj4string)
    KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid) 
    UD50 <- getverticeshr(KDE, percent = 50)
    template_UDs[i] <- list(UD50) 
    # plot(KDE)
    # plot(UD50, add = T)
    # plot(spTemplate, pch = 20, cex = 0.5, add = T)
    
    # Second, we create the resistance surface
    res_surface <- resistanceFromShape(UD50, baseRaster = baseRaster, field = 0, 
                                       binary = F, background = 1)
    res_surface[res_surface == 1] <- res
    # plot(res_surface)
    resistance_surfaces[i] <- list(res_surface)
    # plot(res_surface)
    
    # Third, we simulate 10 tracks from each surface (i.e. from each individual)
    sim_tracks <- simulate(list(CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW), 
                           start.resistance = 0,
                           time = 100, resist = res_surface)
    spSim <- SpatialPointsDataFrame(coords = cbind(c(sim_tracks[,1], sim_tracks[,4],
                                                     sim_tracks[,7], sim_tracks[,10],
                                                     sim_tracks[,13], sim_tracks[,16],
                                                     sim_tracks[,19], sim_tracks[,22],
                                                     sim_tracks[,25],sim_tracks[,28]),
                                                   c(sim_tracks[,2], sim_tracks[,5],
                                                     sim_tracks[,8], sim_tracks[,11],
                                                     sim_tracks[,14],sim_tracks[,17],
                                                     sim_tracks[,20], sim_tracks[,23],
                                                     sim_tracks[,26], sim_tracks[,29])), 
                                    data = data.frame(TrackID = paste(i, rep(1:10, 
                                                                             each = 100),
                                                                      sep = "_"), 
                                                      BirdID = rep(i, 1000)))
    spSim@proj4string <- baseGrid@proj4string
    simulated_tracks[i] <- list(spSim)
    spSim$BirdID <- NULL
    # KUD_sim <- kernelUD(spSim, grid = baseGrid)
    # print(mean(kerneloverlap(spSim, method = "BA", percent = 50, conditional = T)))
    # plot(spSim, add = T, col = factor(spSim$TrackID), pch = ".", cex = 0.1)
  }
  
  list.df <- lapply(seq_along(simulated_tracks),
                    function(i){simulated_tracks[i][[1]]})
  
  # apply rbind to the list
  simulated_dataset <- do.call("rbind",
                               list.df)
  
  # assign null projection as X/Y values don't mean anything in this case
  simulated_dataset@proj4string <- CRS(as.character(NA))
  save(simulated_dataset, file = paste0("simulated_dataset_noncolonial_res", res, ".RDS"))
}

3.1.2 Testing the function

Plot first all the datasets used for the powertest

# plot the different datasets to compare ISF visually
rdslist <- list.files(pattern = ".RDS")

rdslist <- rdslist[grepl('simulated_dataset_noncolonial', rdslist)]
plotlist <- list()
# for (j in c(10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11)) {
for(j in seq_along(rdslist)){  
  # j = 10
  
  load(rdslist[j])
  sds <- as.data.frame(simulated_dataset)

  sds %>% 
    dplyr::group_by(TrackID) %>%
    dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), 
                                  ymd_hms('2020-01-30 12:10:00'), 
                                  length.out = 100)) %>% 
    mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
    dplyr::select(individual.local.identifier = TrackID, 
                  Longitude = coords.x1, Latitude = coords.x2, 
                  datetime = Date_Time) %>% 
    ungroup() -> sds
  sds <- as.data.frame(sds)
  
  sdf <- sds %>% 
    separate(individual.local.identifier, c("Ind", "Trip")) %>% 
    st_as_sf(coords = c("Longitude", "Latitude")) %>% 
    mutate(Ind = factor(as.character(Ind), levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))) %>% 
    mutate(Trip = factor(as.character(Trip), levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)))
  plot_title <- paste0("Outside resistance ", 
                       gsub("res", "", 
                            strsplit(tools::file_path_sans_ext(rdslist[j]), "_")[[1]][[4]]))
  
  plt <- ggplot() + 
    geom_sf(data = sdf, aes(col = Ind), alpha = 0.3, size = 0.7) + 
    xlab("") + ylab("") +
    theme_bw() + 
    theme_bw() + 
    theme(legend.position = "right", legend.box = "horizontal") + 
    guides(col = guide_legend(override.aes = list(size = 3, alpha = 1))) +
    coord_sf(xlim = c(-50, 50),
             ylim = c(-50, 50), expand = T) +
    # scale_shape(guide = "none") +
    ggtitle(plot_title)
    NULL
  plotlist[j] <- list(plt)
  names(plotlist)[j] <- plot_title
  # print(plt)
  rm(simulated_dataset)
}

plotlist <- plotlist[
  c("Outside resistance 0", "Outside resistance 0.1", "Outside resistance 0.2", 
           "Outside resistance 0.3", "Outside resistance 0.4", "Outside resistance 0.5",
           "Outside resistance 0.6", "Outside resistance 0.7", "Outside resistance 0.8",
           "Outside resistance 0.9", "Outside resistance 1")]
ggpubr::ggarrange(plotlist = plotlist, common.legend = T)

We now run a simplified version of the function IndEffectTest without all the code re the spatial projection, crossing datelines, etc. and as plain code outside the function for easy tweaking.

# To calculate the overlap we:
# 1 - Turn simulated tracks into "telemetry" objects"
# 2 - fit a guesstimate movement model to each track
# 3 - using the parameters of the guesstimate, fit a series of competing movement models
# 4 - Calculate the debiased BA between top-selected models for each track
# (uncertainty from the model fits is propagated into the overlap estimate under the
#     approximation that the BA is a chi-square random variable)

cl <- makeCluster(10, type="PSOCK") ## specify number of cores, and type of cluster (PSOCK or FORK)
registerDoParallel(cl) ## register the backend into a virtual node

# results_list <- list()
## Execute parallel operation
results_list <- foreach(j = 1:11, .packages=c("dplyr", "ctmm", "ggplot2", "lubridate")) %dopar% {
          # for (j in seq_along(rdslist)) {
          Result <- list()
          # j = 1
          load(rdslist[j])
  sds <- as.data.frame(simulated_dataset)

  sds %>% 
    dplyr::group_by(TrackID) %>%
    dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), 
                                  ymd_hms('2020-01-30 12:10:00'), 
                                  length.out = 100)) %>% 
    mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
    dplyr::select(individual.local.identifier = TrackID, 
                  Longitude = coords.x1, Latitude = coords.x2, 
                  datetime = Date_Time) %>% 
    ungroup() -> sds
  
  sds <- as.data.frame(sds)
  # 1
  sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")
  
  
  FIT.list <- list()
  
  before <- Sys.time()
  
  for(i in 1:length(sd_tel)){
  # for(i in 1:10){
    print(i)
    one <- sd_tel[[i]]
    # 2
    GUESS <- ctmm.guess(one, interactive=F)
    # 3
    FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
    print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
    FIT.list[[i]] <- FITS[[1]]
  }
  Sys.time() - before
  
  # 4
  ov <- ctmm::overlap(FIT.list, level = 0.95)
  
  # we select the "middle layer" of the array which contains the point estimate 
  # (for now ignoring the CI)
  X05 <- ov[,,2]
  Xlo <- ov[,,1]
  Xhi <- ov[,,3]
  
  X05[lower.tri(X05, diag = T)] <- NA
  Xlo[lower.tri(Xlo, diag = T)] <- NA
  Xhi[lower.tri(Xhi, diag = T)] <- NA
  
  Result[1] <- list(list(X05, Xlo, Xhi))
  # assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
  gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
  rownames(X05) <- colnames(X05) <- gid
  rownames(Xlo) <- colnames(Xlo) <- gid
  rownames(Xhi) <- colnames(Xhi) <- gid
  
  # separate within (WI) and between (BW) group overlaps
  
  # median
  WI05 <- NULL
  BW05 <- NULL
  
  # lower
  WIlo <- NULL
  BWlo <- NULL
  
  # higher
  WIhi <- NULL
  BWhi <- NULL
  
  for (i in seq_along(rownames(X05))) {
    # i = 1
    
    # medians
    x1 <- X05[i,] 
    x2 <- x1[which(names(x1) == rownames(X05)[i])]
    x3 <- x1[which(names(x1) != rownames(X05)[i])]
    WI05 <- c(WI05, x2)
    BW05 <- c(BW05, x3)
    
    # BW intervals
    x1 <- Xlo[i,] 
    x3 <- x1[which(names(x1) != rownames(Xlo)[i])]
    BWlo <- c(BWlo, x3)
    
    x1 <- Xhi[i,] 
    x3 <- x1[which(names(x1) != rownames(Xhi)[i])]
    BWhi <- c(BWhi, x3)
    
  }
  
  # medians
  BW05 <- BW05[!is.na(BW05)]
  WI05 <- WI05[!is.na(WI05)]
  
  # BW intervals
  BWlo <- BWlo[!is.na(BWlo)]
  BWhi <- BWhi[!is.na(BWhi)]
  
  # organize values in a dataframe for plotting
  Overlaps <- data.frame(
    Overlap = c(WI05, BW05),
    Type = c(rep("Within", length(WI05)), rep("Between", length(BW05))),
    Test = rdslist[j]
    )
  Result[2] <- list(Overlaps)
  # plot boxplot
  
  plot1 <- ggplot(data = Overlaps) + 
    geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
    ggtitle("Density of within individual vs. between individual median overlaps") +
    theme_bw()
  
  Result[3] <- list(plot1)
  
  
  WI_median <- quantile(WI05, 0.5)
  
  BWint <- data.frame(Low = BWlo,
                      High = BWhi)
  
  BWint <- BWint %>% 
    mutate(Contains = "NA") %>% 
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above"))
  
  proportions <- as.data.frame(prop.table(table(BWint$Contains)))
  
  names(proportions) <- c("WI_Median_Position", "Percentage")
  proportions$Percentage <- round(proportions$Percentage*100, 1)
  
  Result[4] <- list(proportions)
  
  # results_list[j] <- list(Result)
  
  save(FIT.list, ov, Result, 
       file = paste("noncolonial_powerTest", strsplit(rdslist[j], "_")[[1]][4], sep = "_"))
}
stopCluster(cl)

3.1.3 Explore sensitivity tests outputs

powertestfiles <- list.files(pattern = "noncolonial_powerTest")

res_list <- list()
plot_list <- list()
for(i in c(10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11)) {
  # i = 10
  load(powertestfiles[i])
  Test = str_sub(powertestfiles[i], end = -5)
  res <- Result[[4]]
  res2 <- cbind(Test, res)
  res_list[i] <- list(res2)
  plot_list[i] <- list(Result[[3]])
  names(plot_list)[i] <- Test
}

test_results <- bind_rows(res_list)

test_results %>% 
  pivot_wider(names_from = "WI_Median_Position", values_from = "Percentage") %>% 
  separate(Test, into = c("Type of test", "test", "Resistance"), sep = "_", remove = T) %>% 
  mutate(Resistance = as.numeric(gsub("res", "", Resistance))) %>% 
  arrange(Resistance) %>% 
  dplyr::select("Resistance", "Above (WBR)" = "Above", "NotAbove") %>% 
  kable(digits = 2, align = "rrrr", caption = "Power Test - noncolonial") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Power Test - noncolonial
Resistance Above (WBR) NotAbove
0.0 6.9 93.1
0.1 8.6 91.4
0.2 6.4 93.6
0.3 10.4 89.6
0.4 14.5 85.5
0.5 16.3 83.7
0.6 26.2 73.8
0.7 25.5 74.5
0.8 32.7 67.3
0.9 56.7 43.3
1.0 85.8 14.2

3.2 For animals with the same origin

3.2.1 Generating the data

For more detail on this procedure, see the Demo_simulating_ISF_tracks in this repository

set.seed(205)
CRW <- species(state.CRW(0.1))
test <- simulate(CRW, coords = c(0, 0), time = 100)

# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -70, xmx = 70, ymn = -70, ymx = 70, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")

# create a vector of random angles
angles <- runif(10, 0, 360)

# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()
resistance_surfaces <- list()
simulated_tracks <- list()

plot(1, xlim = c(-70, 70), ylim = c(-70, 70), type = "n", 
     main = "All the 'template' tracks")

resistances <- c(1, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0)

for (j in seq_along(resistances)) {
  # j = 11
  res <- resistances[j]
  
  for (i in 1:10) {
    # i = 1
    
    # First we need to create a template trip to get the UD for the resistance surface
    template <- simulate(CRW, coords = c(0, 0), angles = angles[i], time = 1000)
    template <- as.data.frame(template)
    template <- template %>% 
      filter(V1 > -60 & V1 < 60) %>% 
      filter(V2 > -60 & V2 < 60)
    
    template_tracks[i] <- list(template)
    lines(template, col = brewer.pal(10,"Set3")[i])
    points(template, col = brewer.pal(10,"Set3")[i], pch = 16)
    spTemplate <- SpatialPoints(coords = template[,1:2], proj4string = baseGrid@proj4string)
    
    print(i)
    KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid, extent = 2) 
    UD50 <- getverticeshr(KDE, percent = 50)
    template_UDs[i] <- list(UD50) 
    
    # Second, we create the resistance surface
    res_surface <- resistanceFromShape(UD50, baseRaster = baseRaster, field = 0, 
                                       binary = F, background = 1)
    res_surface[res_surface == 1] <- res
    resistance_surfaces[i] <- list(res_surface)
    
    # Third, we simulate 10 tracks from each surface (i.e. from each individual)
    sim_tracks <- simulate(list(CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW, CRW), 
                           start.resistance = 0,
                           time = 100, resist = res_surface)
    spSim <- SpatialPointsDataFrame(coords = cbind(c(sim_tracks[,1], sim_tracks[,4],
                                                     sim_tracks[,7], sim_tracks[,10],
                                                     sim_tracks[,13], sim_tracks[,16],
                                                     sim_tracks[,19], sim_tracks[,22],
                                                     sim_tracks[,25],sim_tracks[,28]),
                                                   c(sim_tracks[,2], sim_tracks[,5],
                                                     sim_tracks[,8], sim_tracks[,11],
                                                     sim_tracks[,14],sim_tracks[,17],
                                                     sim_tracks[,20], sim_tracks[,23],
                                                     sim_tracks[,26], sim_tracks[,29])), 
                                    data = data.frame(TrackID = paste(i, rep(1:10, 
                                                                             each = 100),
                                                                      sep = "_"), 
                                                      BirdID = rep(i, 1000)))
    spSim@proj4string <- baseGrid@proj4string
    simulated_tracks[i] <- list(spSim)
    spSim$BirdID <- NULL
  }
  
  list.df <- lapply(seq_along(simulated_tracks),
                    function(i){simulated_tracks[i][[1]]})
  
  # apply rbind to the list
  simulated_dataset <- do.call("rbind",
                               list.df)
  
  # assign null projection as X/Y values don't mean anything in this case
  simulated_dataset@proj4string <- CRS(as.character(NA))
  save(simulated_dataset, file = paste0("simulated_dataset_colonial_res", res, ".RDS"))
}

3.2.2 Testing the function

Plot first all the datasets used for the powertest

# plot the different datasets to compare ISF visually
rdslist <- list.files(pattern = ".RDS")

rdslist <- rdslist[grepl('simulated_dataset_colonial', rdslist)]

plotlist <- list()
for (j in seq_along(rdslist)) {
  # j = 1
  
  load(rdslist[j])
  sds <- as.data.frame(simulated_dataset)

  sds %>% 
    dplyr::group_by(TrackID) %>%
    dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), 
                                  ymd_hms('2020-01-30 12:10:00'), 
                                  length.out = 100)) %>% 
    mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
    dplyr::select(individual.local.identifier = TrackID, 
                  Longitude = coords.x1, Latitude = coords.x2, 
                  datetime = Date_Time) %>% 
    ungroup() -> sds
  sds <- as.data.frame(sds)
  
  sdf <- sds %>% 
    separate(individual.local.identifier, c("Ind", "Trip")) %>% 
    st_as_sf(coords = c("Longitude", "Latitude")) %>% 
    mutate(Ind = factor(as.character(Ind), levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))) %>% 
    mutate(Trip = factor(as.character(Trip), levels = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)))
  
  plot_title <- paste0("Outside resistance ", 
                       gsub("res", "", 
                            strsplit(tools::file_path_sans_ext(rdslist[j]), "_")[[1]][[4]]))
  
  plt <- ggplot() + 
    geom_sf(data = sdf, aes(col = Ind), alpha = 0.3, size = 0.7) + 
    xlab("") + ylab("") +
    theme_bw() + 
    theme_bw() + 
    theme(legend.position = "right", legend.box = "horizontal") + 
    guides(col = guide_legend(override.aes = list(size = 3, alpha = 1))) +
    coord_sf(xlim = c(-50, 50),
             ylim = c(-50, 50), expand = T) +
    # scale_shape(guide = "none") +
    ggtitle(plot_title)
    NULL
  plotlist[j] <- list(plt)
  names(plotlist)[j] <- plot_title
  # print(plt)
  rm(simulated_dataset)
}

plotlist <- plotlist[
  c("Outside resistance 0", "Outside resistance 0.1", "Outside resistance 0.2", 
           "Outside resistance 0.3", "Outside resistance 0.4", "Outside resistance 0.5",
           "Outside resistance 0.6", "Outside resistance 0.7", "Outside resistance 0.8",
           "Outside resistance 0.9", "Outside resistance 1")]
ggpubr::ggarrange(plotlist = plotlist, common.legend = T)

We now run a simplified version of the function IndEffectTest without all the code re the spatial projection, crossing datelines, etc. and as plain code outside the function for easy tweaking.

# To calculate the overlap we:
# 1 - Turn simulated tracks into "telemetry" objects"
# 2 - fit a guesstimate movement model to each track
# 3 - using the parameters of the guesstimate, fit a series of competing movement models
# 4 - Calculate the debiased BA between top-selected models for each track
# (uncertainty from the model fits is propagated into the overlap estimate under the
#     approximation that the BA is a chi-square random variable)
rdslist <- list.files(pattern = ".RDS")

rdslist <- rdslist[grepl('simulated_dataset_colonial', rdslist)]

cl <- makeCluster(10, type="PSOCK") ## specify number of cores, and type of cluster (PSOCK or FORK)
registerDoParallel(cl) ## register the backend into a virtual node

# results_list <- list()
## Execute parallel operation
results_list <- foreach(j = 1:11, .packages=c("dplyr", "ctmm", "ggplot2", "lubridate")) %dopar% {
  Result <- list()
  # j = 1
  
  load(rdslist[j])
  sds <- as.data.frame(simulated_dataset)

  sds %>% 
    dplyr::group_by(TrackID) %>%
    dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), 
                                  ymd_hms('2020-01-30 12:10:00'), 
                                  length.out = 100)) %>% 
    mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
    dplyr::select(individual.local.identifier = TrackID, 
                  Longitude = coords.x1, Latitude = coords.x2, 
                  datetime = Date_Time) %>% 
    ungroup() -> sds
  
  sds <- as.data.frame(sds)
  # 1
  sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")
  
  
  FIT.list <- list()
  
  before <- Sys.time()
  
  for(i in 1:length(sd_tel)){
    print(i)
    one <- sd_tel[[i]]
    # 2
    GUESS <- ctmm.guess(one, interactive=F)
    # 3
    FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
    print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
    FIT.list[[i]] <- FITS[[1]]
  }
  Sys.time() - before
  
  # 4
  ov <- ctmm::overlap(FIT.list, level = 0.95)

  # we select the "middle layer" of the array which contains the point estimate 
  # (for now ignoring the CI)
  X05 <- ov[,,2]
  Xlo <- ov[,,1]
  Xhi <- ov[,,3]
  
  X05[lower.tri(X05, diag = T)] <- NA
  Xlo[lower.tri(Xlo, diag = T)] <- NA
  Xhi[lower.tri(Xhi, diag = T)] <- NA
  
  Result[1] <- list(list(X05, Xlo, Xhi))
  # assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
  gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
  rownames(X05) <- colnames(X05) <- gid
  rownames(Xlo) <- colnames(Xlo) <- gid
  rownames(Xhi) <- colnames(Xhi) <- gid
  
  # separate within (WI) and between (BW) group overlaps
  
  # median
  WI05 <- NULL
  BW05 <- NULL
  
  # lower
  WIlo <- NULL
  BWlo <- NULL
  
  # higher
  WIhi <- NULL
  BWhi <- NULL
  
  for (i in seq_along(rownames(X05))) {
    # i = 1
    
    # medians
    x1 <- X05[i,] 
    x2 <- x1[which(names(x1) == rownames(X05)[i])]
    x3 <- x1[which(names(x1) != rownames(X05)[i])]
    WI05 <- c(WI05, x2)
    BW05 <- c(BW05, x3)
    
    # BW intervals
    x1 <- Xlo[i,] 
    x3 <- x1[which(names(x1) != rownames(Xlo)[i])]
    BWlo <- c(BWlo, x3)
    
    x1 <- Xhi[i,] 
    x3 <- x1[which(names(x1) != rownames(Xhi)[i])]
    BWhi <- c(BWhi, x3)
    
  }
  
  # medians
  BW05 <- BW05[!is.na(BW05)]
  WI05 <- WI05[!is.na(WI05)]
  
  # BW intervals
  BWlo <- BWlo[!is.na(BWlo)]
  BWhi <- BWhi[!is.na(BWhi)]
  
  # organize values in a dataframe for plotting
  Overlaps <- data.frame(
    Overlap = c(WI05, BW05),
    Type = c(rep("Within", length(WI05)), rep("Between", length(BW05))),
    Test = rdslist[j]
    )
  Result[2] <- list(Overlaps)

  plot1 <- ggplot(data = Overlaps) + 
    geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
    ggtitle("Density of within individual vs. between individual median overlaps") +
    theme_bw()
  
  Result[3] <- list(plot1)
  
  
  WI_median <- quantile(WI05, 0.5)
  
  BWint <- data.frame(Low = BWlo,
                      High = BWhi)
  
  BWint <- BWint %>% 
    mutate(Contains = "NA") %>% 
    mutate(Contains = if_else(High > WI_median, "Above", "NotAbove"))
  
  proportions <- as.data.frame(prop.table(table(BWint$Contains)))
  
  names(proportions) <- c("WI_Median_Position", "Percentage")
  proportions$Percentage <- round(proportions$Percentage*100, 1)
  
  Result[4] <- list(proportions)
  
  results_list[j] <- list(Result)
  
  save(FIT.list, ov, Result, 
       file = paste("colonial_powerTest", strsplit(rdslist[j], "_")[[1]][4], sep = "_"))
}
stopCluster(cl)

3.2.3 Explore sensitivity tests outputs

powertestfiles <- list.files(pattern = "^colonial_powerTest")

res_list <- list()
plot_list <- list()
for(i in seq_along(powertestfiles)) {
  # i = 1
  load(powertestfiles[i])
  Test = str_sub(powertestfiles[i], end = -5)
  res <- Result[[4]]
  res2 <- cbind(Test, res)
 res_list[i] <- list(res2)
 plot_list[i] <- list(Result[[3]])
 names(plot_list)[i] <- Test
}

test_results <- bind_rows(res_list)

test_results %>% 
  pivot_wider(names_from = "WI_Median_Position", values_from = "Percentage") %>% 
  separate(Test, into = c("Type of test", "test", "Resistance"), sep = "_", remove = T) %>% 
  mutate(Resistance = as.numeric(gsub("res", "", Resistance))) %>% 
  arrange(Resistance) %>% 
  dplyr::select("Resistance", "Above (WBR)" = "Above", "NotAbove") %>% 
  kable(digits = 2, align = "rrrr", caption = "Power Test - colonial") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Power Test - colonial
Resistance Above (WBR) NotAbove
0.0 93.6 6.4
0.1 85.0 15.0
0.2 85.5 14.5
0.3 87.2 12.8
0.4 82.2 17.8
0.5 83.4 16.6
0.6 85.7 14.3
0.7 77.7 22.3
0.8 73.7 26.3
0.9 71.8 28.2
1.0 65.2 34.8

4 Sampling effort test

In this section we’ll test the ability of the indEffect_test() function to detect ISF when different individuals have different ammounts of data. For that, we generate 9 different datasets of a hundred tracks of a hundred positions each (10000 rows in total) with different individual sampling imbalances:

  • 91:1:1:1:1:1:1:1:1:1

  • 46:46:1:1:1:1:1:1:1:1

  • 31:31:31:1:1:1:1:1:1:1

  • 23:23:23:23:1:1:1:1:1:1

  • 19:19:19:19:19:1:1:1:1:1

  • 16:16:16:16:16:16:1:1:1:1

  • 13:13:13:13:13:13:13:1:1:1

  • 12:12:12:12:12:12:12:12:1:1

  • 11:11:11:11:11:11:11:11:11:1

  • 10:10:10:10:10:10:10:10:10:10

4.1 Generate the templates

CRW <- species(state.CRW(0.4))
set.seed(1234)
test <- simulate(CRW, coords = c(0, 0), time = 100)

# create empty base raster for resistance surfaces
baseRaster <- raster(xmn = -50, xmx = 50, ymn = -50, ymx = 50, res = 0.1)
values(baseRaster) <- 1
baseGrid <- as(baseRaster, "SpatialPixels")

# create an vector of random angles
origin_points <- data.frame(x = runif(10, -25, 25), 
                            y = runif(10, -25, 25))

# simulate 10 trips for each origin (i.e. 10 trips per individual)
template_tracks <- list()
template_UDs <- list()


for (i in 1:10) {
  # i = 1
  
  # First we need to create a template trip to get the UD for the resistance surface
  set.seed(12)
  template <- simulate(CRW, 
                       coords = c(origin_points[i,1], origin_points[i, 2]), 
                       time = 100)
  template_tracks[i] <- list(template)
  spTemplate <- SpatialPoints(coords = template[,1:2], proj4string = baseGrid@proj4string)
  KDE <- kernelUD(spTemplate, h = "href", grid = baseGrid) 
  UD50 <- getverticeshr(KDE, percent = 50)
  template_UDs[i] <- list(UD50) 
}

4.2 Generate the data and run the function

We generate the simulated datasets and run the IndEffectTest functin steps inside a loop for all the specified unbalanced datasets. We will simulate a high ISF, with a resistance value of 1 outside the “allowed” area.

repeated_birds <- c(1:9) 

plotlist <- list()

cl <- makeCluster(10, type="PSOCK") ## specify number of cores, and type of cluster (PSOCK or FORK)
registerDoParallel(cl) ## register the backend into a virtual node
# results_list <- list()
## Execute parallel operation

foreach(j = 1:9, .packages=c("dplyr", "ctmm", "ggplot2", "lubridate", "SiMRiv", "tidyr", "sp")) %dopar% {
  # j = 1
  cat(j, "repeated bird(s) \n")
  set.seed(j)
  x <- sample(1:10, j, replace = FALSE)
  
  xlist <- list(CRW)
  splist <- rep(xlist, floor((100-(10-j))/j))
  
  simulated_tracks <- list()
  
  # simulate tracks for each individual 
  for (i in x) {
    # i = 3
    res_surface <- resistanceFromShape(template_UDs[[i]], baseRaster = baseRaster, 
                                       field = 0, background = 1)
    sim_tracks <- simulate(splist, 
                           start.resistance = 0,
                           time = 100, resist = res_surface)
    
    colnames(sim_tracks) <- paste(rep(c("x", "y", "z"), floor((100-(10-j))/j)),
                                  rep(1:floor((100-(10-j))/j), each = 3), 
                                  sep = "_")
    
    sim_tracks <- sim_tracks %>% 
      as.data.frame() 
    
    
    x_vector <- sim_tracks %>% 
      dplyr::select(contains('x')) %>% 
      mutate(posID = seq(1:100)) %>% 
      pivot_longer(!posID, names_to = "track_coord", values_to = "degrees")  %>% 
      separate(track_coord, into = c("coord", "trackID")) %>% 
      as.data.frame() %>% 
      dplyr::select(trackID, posID, x = degrees)
    
    y_vector <- sim_tracks %>% 
      dplyr::select(contains('y')) %>% 
      mutate(posID = seq(1:100)) %>% 
      pivot_longer(!posID, names_to = "track_coord", values_to = "degrees")  %>% 
      separate(track_coord, into = c("coord", "trackID")) %>% 
      as.data.frame() %>% 
      dplyr::select(trackID, posID, y = degrees)
    
    sim_tracks2 <- x_vector %>% 
      left_join(y_vector) %>% 
      arrange(trackID, posID)
    
    spSim <- SpatialPointsDataFrame(coords = sim_tracks2[,3:4], 
                                    data = data.frame(TrackID = paste(rep(i,
                                                                          nrow(sim_tracks2)), 
                                                                      sim_tracks2$trackID, 
                                                                      sep = "_"),
                                                      BirdID = rep(i, nrow(sim_tracks2))))
    spSim@proj4string <- baseGrid@proj4string
    simulated_tracks[i] <- list(spSim)
    spSim$BirdID <- NULL
   }  
  
  y <- 1:10
  
  for (i in y[-x]) {
    res_surface <- resistanceFromShape(template_UDs[[i]], baseRaster = baseRaster, 
                                       field = 0, background = 0.5)
    sim_tracks <- simulate(xlist, 
                           
                           start.resistance = 0,
                           time = 100, resist = res_surface)
    xcoords <- as.vector(sim_tracks[,1])
    ycoords <- as.vector(sim_tracks[,2])
    spSim <- SpatialPointsDataFrame(coords = cbind(xcoords, ycoords), 
                                    data = data.frame(TrackID = paste(i, 
                                                                      rep(1, each = 100),
                                                                      sep = "_"), 
                                                      BirdID = rep(i, 100)))
    spSim@proj4string <- baseGrid@proj4string
    simulated_tracks[i] <- list(spSim)
    spSim$BirdID <- NULL
  }
  
  list.df <- lapply(seq_along(simulated_tracks),
                    function(i){simulated_tracks[i][[1]]})
  
  # apply rbind to the list
  simulated_dataset <- do.call("rbind",
                               list.df)
  
  simulated_dataset@proj4string <- baseGrid@proj4string
  
  
  sds <- data.frame(TrackID = simulated_dataset$TrackID, 
                    BirdID = simulated_dataset$BirdID, 
                    xcoords = simulated_dataset@coords[,1],
                    ycoords = simulated_dataset@coords[,2])
  
  sds$BirdID <- NULL
  
  sds <- sds %>% 
    dplyr::group_by(TrackID) %>%
    dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), 
                                  ymd_hms('2020-01-30 12:10:00'), 
                                  length.out = n())) %>% 
    mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
    dplyr::select(TrackID, 
                  Longitude = xcoords, Latitude = ycoords, 
                  datetime = Date_Time) %>% 
    ungroup() %>% 
    as.data.frame()
  
  message_filename = paste("j", j, 'script_messages.txt', sep = "_")
  try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
  sink(message_file, type="message")
  sink(message_file, type="output")
  
  sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")
  
  
  FIT.list <- list()
  
  before <- Sys.time()
  
  for(i in 1:length(sd_tel)){
    print(i)
    one <- sd_tel[[i]]
    # 2
    GUESS <- ctmm.guess(one, interactive=F)
    # 3
    FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
    print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
    FIT.list[[i]] <- FITS[[1]]
  }
  
  sink(file = NULL, type="message")
  sink(file = NULL, type="output")
  # closeAllConnections() 
  Sys.time() - before
  
  # 4
  ov <- ctmm::overlap(FIT.list, level = 0.95)

  # we select the "middle layer" of the array which contains the point estimate 
  # (for now ignoring the CI)
  X05 <- ov[,,2]
  Xlo <- ov[,,1]
  Xhi <- ov[,,3]
  
  X05[lower.tri(X05, diag = T)] <- NA
  Xlo[lower.tri(Xlo, diag = T)] <- NA
  Xhi[lower.tri(Xhi, diag = T)] <- NA
  
  Result1 <- list()
  Result1[1] <- list(list(X05, Xlo, Xhi))
  # assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
  gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
  rownames(X05) <- colnames(X05) <- gid
  rownames(Xlo) <- colnames(Xlo) <- gid
  rownames(Xhi) <- colnames(Xhi) <- gid
  
  # separate within (WI) and between (BW) group overlaps
  
  # median
  WI05 <- NULL
  BW05 <- NULL
  
  # lower
  WIlo <- NULL
  BWlo <- NULL
  
  # higher
  WIhi <- NULL
  BWhi <- NULL
  
  for (i in seq_along(rownames(X05))) {
    # i = 1
    
    # medians
    x1 <- X05[i,] 
    x2 <- x1[which(names(x1) == rownames(X05)[i])]
    x3 <- x1[which(names(x1) != rownames(X05)[i])]
    WI05 <- c(WI05, x2)
    BW05 <- c(BW05, x3)
    
    # BW intervals
    x1 <- Xlo[i,] 
    x3 <- x1[which(names(x1) != rownames(Xlo)[i])]
    BWlo <- c(BWlo, x3)
    
    x1 <- Xhi[i,] 
    x3 <- x1[which(names(x1) != rownames(Xhi)[i])]
    BWhi <- c(BWhi, x3)
    
  }
  
  # medians
  BW05 <- BW05[!is.na(BW05)]
  WI05 <- WI05[!is.na(WI05)]
  
  # BW intervals
  BWlo <- BWlo[!is.na(BWlo)]
  BWhi <- BWhi[!is.na(BWhi)]
  
  # organize values in a dataframe for plotting
  Overlaps <- data.frame(
    Overlap = c(WI05, BW05),
    Type = c(rep("Within", length(WI05)), rep("Between", length(BW05))),
    Test = "ind_1"
  )
  Result1[2] <- list(Overlaps)
  # plot boxplot
  
  plot1 <- ggplot(data = Overlaps) + 
    geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
    ggtitle(paste(j, " repeated birds")) +
    theme_bw()
  
  Result1[3] <- list(plot1)
  
  
  WI_median <- quantile(WI05, 0.5)
  
  BWint <- data.frame(Low = BWlo,
                      High = BWhi)
  
  BWint <- BWint %>% 
    mutate(Contains = "NA") %>% 
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))
  
  proportions <- as.data.frame(prop.table(table(BWint$Contains)))
  
  names(proportions) <- c("WI_Median_Position", "Percentage")
  proportions$Percentage <- round(proportions$Percentage*100, 2)
  
  (proportions_wide <- spread(proportions, WI_Median_Position, Percentage))
  
  Result1[4] <- list(proportions_wide)
  
  #list_of_results[j] <- list(Result1)
  plotlist[j] <- list(plot1)
  saveRDS(Result1, file = paste0(j, "_repeated_birds.RDS"))
}

4.3 Compare results

rdslist <- list.files(pattern = "repeated_birds.RDS")

list_of_results <- list()
for (i in seq_along(rdslist)) {
  res <- readRDS(rdslist[i])
  list_of_results[[i]] <- res
}

final_df <- dplyr::bind_rows(
  list_of_results[[1]][[4]], list_of_results[[2]][[4]], list_of_results[[3]][[4]], list_of_results[[4]][[4]], 
  list_of_results[[5]][[4]], list_of_results[[6]][[4]], list_of_results[[7]][[4]], list_of_results[[9]][[4]], 
  list_of_results[[9]][[4]])

final_df$Test <- paste(seq(1, 9), "repeated birds")

final_df %>% 
  dplyr::select("Test", "Above (WBR)", "NotAbove") %>% 
  kable(digits = 2, align = "rrrr", caption = "Test sampling unbalance") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Test sampling unbalance
Test Above (WBR) NotAbove
1 repeated birds 85.96 14.04
2 repeated birds 67.05 32.95
3 repeated birds 59.58 40.42
4 repeated birds 85.03 14.97
5 repeated birds 73.28 26.72
6 repeated birds 73.74 26.26
7 repeated birds 71.16 28.84
8 repeated birds 79.98 20.02
9 repeated birds 79.98 20.02

Whether there’s a strong unbalance (1 individual repeatedly sampled and the rest only sampled once) or data are more even (9 individuals repeatedly sampled, only one sampled only once), the function is able to detect individual site fidelity

5 Sampling frequency test

Testing the ability of the IndEffectTest function to detect ISF at different sampling frequencies. Here, we use the dataset of the non-colonial simulated species with a 1 resistance surface (high ISF). From it, we will subset every fifth position, and then every tenth, and compare the results of the individual site fidelity test.

5.1 Test ISF with the full simulated dataset of high ISF

sds <- as.data.frame(simulated_dataset)

sds %>% 
  dplyr::group_by(TrackID) %>%
  dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'), 
                                length.out = 100)) %>% 
  mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>% 
  dplyr::select(individual.local.identifier = TrackID, 
                Longitude = coords.x1, Latitude = coords.x2, 
                datetime = Date_Time) %>% 
  ungroup() -> sds
sds <- as.data.frame(sds)

message_filename = "output_fullds.txt"
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")
sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")


FIT.list <- list()

before <- Sys.time()
for(i in 1:length(sd_tel)){
  # print(i)
  one <- sd_tel[[i]]
  # 2
  GUESS <- ctmm.guess(one, interactive=F)
  # 3
  FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
  # print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
  FIT.list[[i]] <- FITS[[1]]
}
sink(file = NULL, type="message")
sink(file = NULL, type="output")
# closeAllConnections() 
Sys.time() - before

# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)

# separate the layers of the median and CI
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]

# remove lower triangle of each (repeated values since it's a symetric matrix)
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA


# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid

# median
WI05 <- NULL
BW05 <- NULL

# BW lower and higher CI bounds
BWlo <- NULL
BWhi <- NULL

for (i in seq_along(rownames(X05))) {
  # i = 2
  
  # medians
  x1 <- X05[i,] 
  x2 <- x1[which(names(x1) == rownames(X05)[i])]
  x3 <- x1[which(names(x1) != rownames(X05)[i])]
  
  WI05 <- c(WI05, x2)
  BW05 <- c(BW05, x3)
  
  # BW intervals
  y1 <- Xlo[i,] 
  y2 <- y1[which(names(y1) != rownames(Xlo)[i])]
  BWlo <- c(BWlo, y2)
  
  z1 <- Xhi[i,] 
  z2 <- z1[which(names(z1) != rownames(Xhi)[i])]
  BWhi <- c(BWhi, z2)
  
}

# remove NAs
WI05 <- WI05[!is.na(WI05)]
BW05 <- BW05[!is.na(BW05)]
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]

# organize values in a dataframe for plotting
Overlaps <- data.frame(
  Overlap = c(WI05, BW05),
  Type = c(rep("Within", length(WI05)), rep("Between", length(BW05)))
)

plotFull <- ggplot(data = Overlaps) + 
  geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) + 
  ggtitle("Full dataset") +
  theme_bw()

WI_median <- quantile(WI05, 0.5)

BWint <- data.frame(Low = BWlo,
                    High = BWhi)

BWint <- BWint %>% 
  mutate(Contains = "NA") %>% 
  mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))

proportions <- as.data.frame(prop.table(table(BWint$Contains)))

names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)

proportions_wide1 <- spread(proportions, WI_Median_Position, Percentage)
write.csv(proportions_wide1, file = "proportions_wide1.csv", row.names = F)

5.2 With the same dataset but subset just one every 5 positions

# turn to sf for easy subsetting and subset

sim_5th <- st_as_sf(simulated_dataset) %>%
  slice(which(row_number() %% 5 == 1))

sim_5th <- as_Spatial(sim_5th)

sds <- as.data.frame(sim_5th)

sds %>%
  dplyr::group_by(TrackID) %>%
  dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'),
                                length.out = 20)) %>%
  mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>%
  dplyr::select(individual.local.identifier = TrackID,
                Longitude = coords.x1, Latitude = coords.x2,
                datetime = Date_Time) %>%
  ungroup() -> sds

sds <- as.data.frame(sds)

message_filename = "output_5thds.txt"
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")

# 1
sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")

FIT.list <- list()

before <- Sys.time()
for(i in 1:length(sd_tel)){
  # print(i)
  one <- sd_tel[[i]]
  # 2
  GUESS <- ctmm.guess(one, interactive=F)
  # 3
  FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
  # print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
  FIT.list[[i]] <- FITS[[1]]
}
sink(file = NULL, type="message")
sink(file = NULL, type="output")
# closeAllConnections()
Sys.time() - before

# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)

# separate the layers of the median and CI
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]

# remove lower triangle of each (repeated values since it's a symetric matrix)
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA


# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid

# median
WI05 <- NULL
BW05 <- NULL

# BW lower and higher CI bounds
BWlo <- NULL
BWhi <- NULL

for (i in seq_along(rownames(X05))) {
  # i = 2

  # medians
  x1 <- X05[i,]
  x2 <- x1[which(names(x1) == rownames(X05)[i])]
  x3 <- x1[which(names(x1) != rownames(X05)[i])]

  WI05 <- c(WI05, x2)
  BW05 <- c(BW05, x3)

  # BW intervals
  y1 <- Xlo[i,]
  y2 <- y1[which(names(y1) != rownames(Xlo)[i])]
  BWlo <- c(BWlo, y2)

  z1 <- Xhi[i,]
  z2 <- z1[which(names(z1) != rownames(Xhi)[i])]
  BWhi <- c(BWhi, z2)

}

# remove NAs
WI05 <- WI05[!is.na(WI05)]
BW05 <- BW05[!is.na(BW05)]
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]

# organize values in a dataframe for plotting
Overlaps <- data.frame(
  Overlap = c(WI05, BW05),
  Type = c(rep("Within", length(WI05)), rep("Between", length(BW05)))
)

plot1 <- ggplot(data = Overlaps) +
  geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) +
  ggtitle("Every 5th position") +
  theme_bw()

WI_median <- quantile(WI05, 0.5)

BWint <- data.frame(Low = BWlo,
                    High = BWhi)

BWint <- BWint %>%
    mutate(Contains = "NA") %>%
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))

proportions <- as.data.frame(prop.table(table(BWint$Contains)))

names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)

proportions_wide2 <- spread(proportions, WI_Median_Position, Percentage)
write.csv(proportions_wide2, file = "proportions_wide2.csv", row.names = F)

5.3 With the same dataset but subset just one every 10 positions

# turn to sf for easy subsetting and subset

sim_10th <- st_as_sf(simulated_dataset) %>%
  slice(which(row_number() %% 10 == 1))

sim_10th <- as_Spatial(sim_10th)

sds <- as.data.frame(sim_10th)

sds %>%
  dplyr::group_by(TrackID) %>%
  dplyr::mutate(Date_Time = seq(ymd_hms('2019-11-30 12:10:00'), ymd_hms('2020-01-30 12:10:00'),
                                length.out = 10)) %>%
  mutate(Date_Time = format(Date_Time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC")) %>%
  dplyr::select(individual.local.identifier = TrackID,
                Longitude = coords.x1, Latitude = coords.x2,
                datetime = Date_Time) %>%
  ungroup() -> sds

sds <- as.data.frame(sds)

message_filename = "output_10thds.txt"
try(message_file <- file(message_filename, open="at")) # open file for appending in text mode
sink(message_file, type="message")
sink(message_file, type="output")
sd_tel <- as.telemetry(object = sds, timeformat = "%Y/%m/%d %H:%M:%S")

FIT.list <- list()

before <- Sys.time()
for(i in 1:length(sd_tel)){
  # print(i)
  one <- sd_tel[[i]]
  # 2
  GUESS <- ctmm.guess(one, interactive=F)
  # 3
  FITS <- ctmm.select(one, GUESS,  trace=TRUE, verbose=TRUE, cores=0)
  # print(paste("The selected model for", names(sd_tel)[i], "is", names(FITS)[1], sep = " "))
  FIT.list[[i]] <- FITS[[1]]
}
sink(file = NULL, type="message")
sink(file = NULL, type="output")
# closeAllConnections()
Sys.time() - before

# 4
ov <- ctmm::overlap(FIT.list, level = 0.95)

# separate the layers of the median and CI
X05 <- ov[,,2]
Xlo <- ov[,,1]
Xhi <- ov[,,3]

# remove lower triangle of each (repeated values since it's a symetric matrix)
X05[lower.tri(X05, diag = T)] <- NA
Xlo[lower.tri(Xlo, diag = T)] <- NA
Xhi[lower.tri(Xhi, diag = T)] <- NA


# assign value of BirdID that we rescue from the simulated_dataset df to rows and columns
gid <- simulated_dataset@data[!duplicated(simulated_dataset@data[, "TrackID"]), ][["BirdID"]]
rownames(X05) <- colnames(X05) <- gid
rownames(Xlo) <- colnames(Xlo) <- gid
rownames(Xhi) <- colnames(Xhi) <- gid

# median
WI05 <- NULL
BW05 <- NULL

# BW lower and higher CI bounds
BWlo <- NULL
BWhi <- NULL

for (i in seq_along(rownames(X05))) {
  # i = 2

  # medians
  x1 <- X05[i,]
  x2 <- x1[which(names(x1) == rownames(X05)[i])]
  x3 <- x1[which(names(x1) != rownames(X05)[i])]

  WI05 <- c(WI05, x2)
  BW05 <- c(BW05, x3)

  # BW intervals
  y1 <- Xlo[i,]
  y2 <- y1[which(names(y1) != rownames(Xlo)[i])]
  BWlo <- c(BWlo, y2)

  z1 <- Xhi[i,]
  z2 <- z1[which(names(z1) != rownames(Xhi)[i])]
  BWhi <- c(BWhi, z2)

}

# remove NAs
WI05 <- WI05[!is.na(WI05)]
BW05 <- BW05[!is.na(BW05)]
BWlo <- BWlo[!is.na(BWlo)]
BWhi <- BWhi[!is.na(BWhi)]

# organize values in a dataframe for plotting
Overlaps <- data.frame(
  Overlap = c(WI05, BW05),
  Type = c(rep("Within", length(WI05)), rep("Between", length(BW05)))
)

plot2 <- ggplot(data = Overlaps) +
  geom_density(aes(x = Overlap, col = Type, fill = Type), alpha = 0.5) +
  ggtitle("Every 10th position") +
  theme_bw()

WI_median <- quantile(WI05, 0.5)

BWint <- data.frame(Low = BWlo,
                    High = BWhi)

BWint <- BWint %>%
    mutate(Contains = "NA") %>%
    mutate(Contains = if_else(High > WI_median, "NotAbove", "Above (WBR)"))

proportions <- as.data.frame(prop.table(table(BWint$Contains)))

names(proportions) <- c("WI_Median_Position", "Percentage")
proportions$Percentage <- round(proportions$Percentage*100, 2)

proportions_wide3 <- spread(proportions, WI_Median_Position, Percentage)
write.csv(proportions_wide3, file = "proportions_wide3.csv", row.names = F)

5.4 Compare the results

prop_files <- list.files(pattern = "proportions_wide")
prop_list <- map(prop_files, read.csv)

final_df <- bind_rows(prop_list)

final_df$Test <- c("original", "every 5th", "every 10th")

final_df %>%
  dplyr::select("Test", "Above (WBR)" = "Above..WBR.", "NotAbove") %>% 
  kable(digits = 2, align = "rrrr", caption = "Test sampling frequency") %>%
  kable_classic(full_width = F, html_font = "Cambria")
Test sampling frequency
Test Above (WBR) NotAbove
original 85.80 14.20
every 5th 83.09 16.91
every 10th 77.13 22.87

Either with one 5th or with one 10th of the locations, the function is still able to detect individual site fidelity as well as the full dataset. This demonstrates the function is robust to different sampling frequencies as well.