ARES 40340 Applied Spatial Ecology (Tutorials)

Author

Martin Cooper

Week 3(First tutorial)

MCP_KDE Tutorial

This Tutorial is an introduction to some basic methods of Home Range Estimation. MCP is Minimum Convex Polygons and KDDE is Kernal Density Estimates.

The format of the above chunk is to create a list of the required pacakges as the object List.of.packages, then call this list, check installation status, install any missing ones and then load them all ready for our use.

Data Preparations and Inital Exploration

# import dataset
dat <- read.csv("WD_hedgehogs2022.csv",
               header = TRUE)

# select only the relevant variables

df <- dat %>%
 dplyr::select ( #select following variables
    eventid = event.id,
    id = individual.local.identifier,
    sex = study.specific.measurement,
    behaviour = behavioural.classification,
    timestamp = timestamp,
    utm_x = utm.easting,
    utm_y = utm.northing,
    long = location.long,
    lat = location.lat
    
     )
# check structure
str(df)
'data.frame':   5899 obs. of  9 variables:
 $ eventid  : num  2.38e+10 2.38e+10 2.38e+10 2.38e+10 2.38e+10 ...
 $ id       : chr  "Anne 21" "Anne 21" "Anne 21" "Anne 21" ...
 $ sex      : chr  "F" "F" "F" "F" ...
 $ behaviour: chr  "R" "R" "R" "R" ...
 $ timestamp: chr  "2021-05-01 19:50:09.000" "2021-05-01 20:40:10.000" "2021-05-01 21:00:11.000" "2021-05-01 21:10:06.000" ...
 $ utm_x    : num  549255 549257 549257 549264 549272 ...
 $ utm_y    : num  5872578 5872581 5872569 5872579 5872569 ...
 $ long     : num  -2.27 -2.27 -2.27 -2.27 -2.27 ...
 $ lat      : num  53 53 53 53 53 ...

This has selected the variabels of interest from the dataset.

# make sure the date/time is in POSIXct format 
df$timestamp <- as.POSIXct(df$timestamp, format="%Y-%m-%d %H:%M:%S", tz="UTC")
# confirm
str(df$timestamp)
 POSIXct[1:5899], format: "2021-05-01 19:50:09" "2021-05-01 20:40:10" "2021-05-01 21:00:11" ...
# Count rows with any NA values - works with numerical variables
cat("Number of rows with NA: ", sum(rowSums(is.na(df)) > 0))
Number of rows with NA:  0
# order dataset by id and timestamp
df <- df[order(df$id, df$timestamp),]
head (df)
         eventid   id sex behaviour           timestamp    utm_x   utm_y
4406 23752857921 Alan   M         W 2022-05-07 00:00:04 548309.5 5873005
4407 23752857926 Alan   M         W 2022-05-07 00:10:11 548235.7 5873004
4408 23752857941 Alan   M         W 2022-05-07 01:20:05 548240.9 5873016
4409 23752857945 Alan   M         W 2022-05-07 01:30:04 548296.7 5873010
4410 23752857947 Alan   M         W 2022-05-07 01:40:04 548334.3 5873007
4411 23752857950 Alan   M         W 2022-05-07 01:50:04 548326.1 5873018
         long      lat
4406 -2.28007 53.00442
4407 -2.28117 53.00442
4408 -2.28109 53.00453
4409 -2.28026 53.00447
4410 -2.27970 53.00444
4411 -2.27982 53.00454
# Identify timestamp duplicates for each hedgehog
duplicates <- df %>%
  group_by(id) %>%
  filter(duplicated(timestamp) | duplicated(timestamp, fromLast = TRUE))

# Count duplicates per id
duplicates_count <- duplicates %>%
  count(id)

print(duplicates_count)
# A tibble: 0 × 2
# Groups:   id [0]
# ℹ 2 variables: id <chr>, n <int>

To help identify outliers, lets look at the data. Remember we are choosing the appropriate visual to check our data - as this is movement data, it makes sense to look at it as a map.

# basic plotting

id_colour <- factor (df$id) # hedgehog ID made factor so we can set up a colour for each level of the variable id

plot(df$utm_x, df$utm_y, # simple plot 
     col=id_colour)      # different colour by hedgehog id

This suggests a group of outliers where the df$utm_x is less than 54700 - lets remove them:

# let's remove all locations with utm_x < 547000
# this needs to be done using a data frame object
df <- df[df$utm_x > 547000, ]

# confirm this has worked
plot(df$utm_x, df$utm_y,
     col=id_colour)

Now we can start to look at our data to help answer any questions:

# variables of interest 
  # id (unique hedgehog identifier)
  # sex (F for female, M for male)
  # behaviour (R for rehabilitated, W for wild)
  # timestamp (tracking date and time)

# Convert timestamp to Date (ignoring time)
df$date <- as.Date(df$timestamp)

# Summarize the data by sex and behaviour
summary_stats <- df %>%
  group_by(sex, behaviour) %>% # group hedgehogs by their sex and behaviour
  # For each hedgehog, calculate the number of days tracked and locations
  summarise(
    num_hedgehogs = n_distinct(id),  # Number of unique hedgehogs
    total_days_tracked = n_distinct(date),  # Total number of days tracked (unique dates)
    avg_days_tracked = round(mean(
      sapply(id, function(x) length(unique(df[df$id == x,]$date)))
    ), 2),  # Average number of days tracked (per hedgehog), rounded to 2 decimal points
    min_days_tracked = min(
      sapply(id, function(x) length(unique(df[df$id == x,]$date)))
    ),  # Minimum days tracked (per hedgehog)
    max_days_tracked = max(
      sapply(id, function(x) length(unique(df[df$id == x,]$date)))
    ),  # Maximum days tracked (per hedgehog)
    total_locations = n_distinct(paste(utm_x, utm_y)),  # Total number of unique locations
    avg_locations = round(mean(
      sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y))))
    ), 2),  # Average number of locations (per hedgehog), rounded to 2 decimal points
    min_locations = min(
      sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y))))
    ),  # Minimum locations (per hedgehog)
    max_locations = max(
      sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y))))
    )   # Maximum locations (per hedgehog)
  ) %>%
  ungroup()
`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
# Create a nice table using kableExtra
summary_table <- summary_stats %>%
  kable("html", caption = "Summary of Hedgehog Tracking Data by Sex and Behaviour") %>%
  kable_styling(
     bootstrap_options = c("striped", "hover", "condensed"), # alternative row colours, highlights rows when mouse hovers and reduction of row height to make the table more compact
    full_width = FALSE, # kpeeps table width narrow
    position = "center" # position of the table on the page
  ) %>%
  column_spec(1:2, bold = TRUE) %>%  # Make the first two columns bold
  column_spec(3:10)  # background color of the data columns

# Print the table
summary_table
Summary of Hedgehog Tracking Data by Sex and Behaviour
sex behaviour num_hedgehogs total_days_tracked avg_days_tracked min_days_tracked max_days_tracked total_locations avg_locations min_locations max_locations
F R 13 38 8.21 4 11 2124 198.17 32 334
F W 9 36 7.87 7 9 2069 239.17 144 310
M R 8 34 7.92 6 11 1416 194.72 51 256
M W 3 11 5.41 1 8 100 43.77 16 59
Code Explanation

Explanation of the code used to calculate the Average Days Tracked:

This part calculates the average number of days each hedgehog (id) was tracked, based on the unique dates when GPS locations were recorded.

avg_days_tracked = round(mean(

sapply(id,

function(x) length(unique(df[dfid==x,]date))) )

, 2)

Steps:

df[df$id == x,]: For each hedgehog (id == x), we filter the data frame df to get all rows corresponding to that hedgehog.

df[df\(id == x,]\)date: Extracts the date column for the specific hedgehog.

unique(…): Finds all unique dates for the specific hedgehog. This ensures that repeated detections on the same day are not counted multiple times.

length(…): Counts the number of unique days that a particular hedgehog was tracked.

sapply(id, function(x) …): The sapply function applies this calculation for each unique id (hedgehog) in the dataset.

mean(…): The mean() function calculates the average number of days tracked across all hedgehogs.

round(…, 2): Finally, the result is rounded to 2 decimal points for easier interpretation..

Lets map it!

# Ensure df$id has categorical values (factors or character) for color assignment
palette <- colorFactor(palette = "Set1", domain = df$id)

# Create the leaflet map
m <- leaflet(df) %>%             # Create leaflet object using the dataframe
  addTiles() %>%                 # Add the default basemap
  addCircleMarkers(              # Add circles for stations
    lng = df$long, lat = df$lat,  # Ensure these are the correct columns
    color = ~palette(id)          # Apply the color factor based on 'id'
  )
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
# Show the map
m
Base Maps

You can change the base map and use some satellite imagery

# Ensure df$id has categorical values (factors or character) for color assignment
palette <- colorFactor(palette = "Set1", domain = df$id)

# Create the leaflet map
m <- leaflet(df) %>%     # Create leaflet object using the dataframe  
  addProviderTiles(providers$Esri.WorldImagery) %>% #Add Esri Wrold imagery              
  addCircleMarkers(              # Add circles for stations
    lng = df$long, lat = df$lat,  # Ensure these are the correct columns
    color = ~palette(id)          # Apply the color factor based on 'id'
  )
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
# Show the map
m

Home Range Estimation

Minimum Convex Polygon

This is the simplest delineation of a home range, obtained by creating the polygon of minimum area around a certain percentage of relocation points obtained by telemetry. Due to its simplicity has been widely used in ecology.

Creating Spatial Objects (SF)

Spatial objects integrate both attribute data (e.g., animal id, timestamps) and spatial geometry (e.g., coordinates, polygons) into a single structure. The SF format ensures the data is in a standardised form that is compatible with spatial analysis tools and GIS platforms. Many R packages and functions for home range estimation, such as adehabitatHR and visualization tools like ggplot2, require spatial objects as input. The SF format makes the data compatible with these tools, streamlining workflows.

# Create a Simple Features (SF) object where each row in the data represents a point
GPS_sf <- st_sf(
  IND = df$id,  # The id column as an attribute
  geometry = st_sfc(lapply(1:nrow(df), function(i) st_point(c(df$utm_x[i],    df$utm_y[i]))))  # Create points for each row.  
  )

# Set the CRS (Coordinate Reference System) to UTM Zone 30N (EPSG: 32630)
GPS_sf <- st_set_crs(GPS_sf, 32630)

For the first part of this function:

st_sf(): This function creates a Simple Features object, which is a standard format used to represent spatial data (points, lines, polygons, etc.) in R. It’s part of the sf package.

The first argument inside st_sf() defines the attribute (non-spatial) data you want to include in the SF object. In this case, IND = df$id assigns the id column from your df dataset to a new attribute named IND in the GPS_sf object.

The geometry argument specifies the spatial component (coordinates) of the SF object.

lapply(1:nrow(df), function(i) st_point(c(df\(utm_x[i], df\)utm_y[i]))):

lapply() iterates over each row of your dataset (df).

For each row i, it creates a point using st_point(). This function takes a vector of two coordinates (x and y) and creates a point geometry for that location.

df\(utm_x[i] and df\)utm_y[i] are the UTM coordinates for each point in your dataset. The i indicates that the function applies to each row of the dataset.

The result is a list of points, one for each row in your dataset.

This creates a Simple Features object where each row in the df dataset corresponds to a point (geometry) in the GPS_sf object.

The next part:

st_set_crs(): This function assigns a Coordinate Reference System (CRS) to the SF object.

EPSG: 32630: This is the code for the UTM (Universal Transverse Mercator) coordinate system for Zone 30N, which is a standard projection used in the northern hemisphere for locations near 30°N latitude (England!).

By setting the CRS to EPSG: 32630, you’re telling R that your point coordinates are in this specific projection system (UTM Zone 30N).

Generate the Minimum Convex Polygon

# Convert SF object to an SP object
GPS_sp <- as(GPS_sf, "Spatial")
    # this is because the package AdehabitatHR uses an older spatial library (sp)

# Calculate MCP for each individual (id) using the AdehabitatHR function mcp
mcp_result <- mcp(GPS_sp, percent = 95)  # For 95% MCP

# Convert the result into a Simple Features (SF) object
mcp_sf <- st_as_sf(mcp_result)

We have now created the object, so we need to plot it:

# easiest plot
ggplot() +
  geom_sf(data = mcp_sf,  # MCP polygons
          aes(fill = id), # colour by hedgehog
          alpha = 0.5) +  # transparency
  geom_sf(data = GPS_sf,  # locations
          aes(color = IND), # colour by hedgehog
          show.legend = FALSE) +   # not showing legend for locations
  labs(fill = "Hedgehog") + #legend title
 theme_minimal() +
  theme(
    legend.position = "bottom",          # Position the legend at the bottom
    legend.key.size = unit(0.4, "cm"),   # Adjust the size of the legend keys
    legend.text = element_text(size = 6), # Adjust the size of the  text
    legend.title = element_text(size = 8) # Adjust the size of the  title
  )

We can also plot this using the tmap package (an alternate to leaflet)

# plotting the MCPs using tmap package

# Create the map with all categories displayed
tm_shape(mcp_sf) + 
  tm_tiles("OpenStreetMap") +  # Add a basemap
  tm_borders(lwd = 2) +  # Add borders for MCP polygons
  tm_fill(
    col = "id",  # Color polygons by 'id'
    palette = "Set2",  # Use a palette (consider changing if needed)
    alpha = 0.5,
    legend.show = TRUE,  # Ensure legend is shown
    grouping = FALSE  # Prevent combining categories
  ) + 
  tm_scale_bar(position = c("left", "bottom")) +  # Add a scale bar
  tm_layout(
    legend.outside = TRUE,  # Place legend outside
    main.title = "Minimum Convex Polygons (MCPs) Map",
    main.title.size = 1.5,
    legend.text.size = 0.8,  # Adjust text size for clarity
    legend.title.size = 1.0  # Adjust legend title size
  )
── tmap v3 code detected ───────────────────────────────────────────────────────
[v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
= tm_scale(<HERE>).
[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
(instead of 'col'), and 'col' for the outlines (instead of 'border.col').
[v3->v4] `tm_polygons()`: use `fill_alpha` instead of `alpha`.
[tm_polygons()] Argument `grouping` unknown.
! `tm_scale_bar()` is deprecated. Please use `tm_scalebar()` instead.
[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
[cols4all] color palettes: use palettes from the R package cols4all. Run
`cols4all::c4a_gui()` to explore them. The old palette name "Set2" is named
"brewer.set2"
Multiple palettes called "set2" found: "brewer.set2", "hcl.set2". The first one, "brewer.set2", is returned.
Warning: Number of levels of the variable assigned to the aesthetic "fill" of
the layer "polygons" is 33, which is larger than n.max (which is 30), so levels
are combined.
[plot mode] fit legend/component: Some legend items or map compoments do not
fit well, and are therefore rescaled.
ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

I had a few goes at this, but I could not increase the number of catagoies beyond 30 - for this dataset tmap is not a suitable alternative to leaflet.

Now we have the MCP, we can calcuate the area for each individual, in this example we have asked for a 95% estimate:

  mcp_area_95 <- mcp.area(GPS_sp,     # spatial object
                        percent = 95, # percentage of locations
                        unin = c("m"), # input is in m (UTM)
                        unout = c("ha"), # output in hectares
                        plotit = FALSE) # we want the numbers, not a plot
  print (mcp_area_95)
       Alan  Anne 21   Anne 22    Bessie    Betsy Carolina  Crunchy    Diana
95 1.656166 0.129607 0.9191941 0.2849418 2.988091 2.388883 4.853271 2.347311
    Dorothy      Erin  Frannie   George  Hedgey   Hettie     Jean      Jeff
95 0.416141 0.6820773 0.296092 2.733784 1.28539 4.265623 5.525065 0.4321219
        Joe Loggerhead    Monty Moonface 2022 Mr Whatshisname   Pumpkin
95 96.56358  0.1619925 7.424848      5.128733        4.285415 0.6924634
       Remy     Rosie Saucepan Screamer   Shelia    Silkie  Smiggle    Stone
95 1.953729 0.6090874 0.366028 12.90894 4.966871 0.2799846 1.247345 1.419408
     Timmy  Twitch     Vera
95 4.02342 2.21729 4.637315

We can examine how the home range estimate varies with percentages of data for an individual. This is helpful as it can inform us how much of our data is needed tor an accuarate assessment of home range size.

GPS_sp.alan <- GPS_sp[GPS_sp$IND == "Alan", ] # subset the data for Alan

mcp.area(GPS_sp.alan, 
         percent = seq(20,100, by = 5), # from 20 to 100% in 5% increments
         unin = c("m"), # specify that you have entered the data in m
         unout = c("ha"), #specify that you want the output in ha
         plotit = TRUE) # we want to plot it

         Alan
20  0.2024922
25  0.2024922
30  0.4495936
35  0.4579919
40  0.5477975
45  0.5477975
50  0.5633997
55  0.5664978
60  0.5760531
65  0.5760531
70  0.6999771
75  1.1331053
80  1.1793516
85  1.1793516
90  1.4767719
95  1.6561663
100 2.4299672

We can see for the Hedgehog Alan, that even with all the data included, the home range size is still not stabalising. It may be that we need more data to be bale to estimate Alan’s home range accuratley.

Saving the MCPs for Spatial Analysis

# save a shapefile containing all MCPs in the working folder

# Directory to save shapefiles
shapefile_dir <- "HogsMCPoutputs"
if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)
st_write(mcp_sf,
         file.path(shapefile_dir, paste0("MCP95.shp")),
    delete_layer = TRUE,
    quiet = TRUE)

Kernel Density Estimation (KDE): Utilisation Distribution (UD)

The concept of using kernel density estimators to define utilisation distributions (i.e. probabilistic representation of how an animal uses its habitat within a given spatial area over a specific period) was coined by Worton in 1989 (B. J. Worton 1989). This is a more complex approach that simply delimiting the borders of an area using the relocation of animals (e.g. MCP).

In KDE, a kernel distribution (i.e. a three-dimensional hill or kernel) is placed on each telemetry location. The height of the hill is determined by the bandwidth of the distribution, and many distributions and methods are available (e.g. fixed versus adaptive, univariate versus bivariate bandwidth). The choice of the bandwitch introduces a clear element of bias in this method.

  • Smaller bandwidth: Captures finer details but may overfit.

  • Larger bandwidth: Produces a smoother UD but may miss finer details.

The most common bandwidth used in ecology are the Reference bandwidth selection (href), the Least Squares Cross-Validation (LSCV) and the Plug-In Bandwidth Selection:

  • href: Uses a rule-of-thumb formula based on the standard deviation of the data and assumes data are normally distributed.

  • LCSV: Selects the bandwidth by minimizing the error between the estimated and actual density at the data points, using a cross-validation approach.

  • Plug-In: It works by estimating the second derivative (curvature) of the true density, which tells how much the density changes. This information is then “plugged in” to calculate a bandwidth that balances smoothness and detail in the density estimate. It’s faster than cross-validation and works well when the data has a relatively simple structure.

Cross Validation Explanation

Cross-validation is a statistical method used to evaluate and optimize models by splitting data into training and validation subsets.

The model is trained on one subset and tested on another. This process is repeated across multiple splits to assess performance.

Cross-validation helps identify the best parameters or model configuration by minimizing prediction error and avoiding overfitting.

Bandwidth Method Advantages Disadvantages
Href Easy, fast, and works for simple distributions Oversmooths complex patterns Seaman et al. (1999)
LSCV Data-driven, flexible, and accurate Computationally expensive, undersmoothing risk
Plug-In Balanced smoothing, handles various datasets Computationally demanding, may oversmooth complex areas

KDE (href)

Estimation of UD by individual:

# if we have not created the spatial object containing the coordinates of the locations with the hedgehog identities, we will need to run the following 3 steps:
# 1) Create a Simple Features (SF) object where each row in the data represents a point
#GPS_sf <- st_sf(
#  IND = df$id,  # The id column as an attribute
#  geometry = st_sfc(lapply(1:nrow(df), function(i) st_point(c(df$utm_x[i], #   df$utm_y[i]))))  # Create points for each row.  
 # )
# 2) Set the CRS (Coordinate Reference System) to UTM Zone 30N (EPSG: 32630)
# GPS_sf <- st_set_crs(GPS_sf, 32630)
# 3) Convert SF object to an SP object
# GPS_sp <- as(GPS_sf, "Spatial")

#We can skip this as we did this earlier

# now we calculate the UD for ALL hedgehogs 
allUD <- kernelUD(subset(GPS_sp, select=IND), # to produce a KDE for each id
                  h="href", # href bandwidth method
                  grid=150, # grid resolution  for the estimation of UD
                  same4all=FALSE) # for each id a different href is used 
# a 150x150 grid will result in 22,500 cells. 
#Higher resolution provides more detailed density but it is more computational intensive.
#Each of this cells contain a value of the estimated density for that area.
image(allUD)

The above show different shapes of UDs, from a very narrow and vertical UD (e.g. Jeff), to a long horizontal UD (e.g. Monty). It also shows how some hedgehogs seem to have multiple centers of activity (e.g. Screamer).

This is a helpful visual display, but we want to compare the sizes, so we need to calculate the area of these ramges. Here, we use the function getverticeshr to convert the continuous UD into a discrete polygon representing a specific threshold, typically the home range for a given percentage of utilisation. For example, for a 95% HR, the function draws a polygon around the area that covers 95% of the total UD area.

# for Alan

# Subset allUD to only include "Alan" by indexing the list
allUDAlan <- allUD[["Alan"]]

# Calculate the home range for Alan with a 95% contour - removing outliers
allUDHrefiso95Alan <- getverticeshr(allUDAlan, percent = 95)
# Calculate the home range for Alan with a 90% contour - most of HR
allUDHrefiso90Alan <- getverticeshr(allUDAlan, percent = 90)
# Calculate the home range for Alan with a 50% contour - core area
allUDHrefiso50Alan <- getverticeshr(allUDAlan, percent = 50)

# Plot all the contours overlapping
ggplot() +
  # Plot the 95% contour
  geom_sf(data = st_as_sf(allUDHrefiso95Alan), 
          fill = "blue", 
          alpha = 0.3) + # transparency level 0.30 opaque, 0.7 transparent
  # Plot the 90% contour
  geom_sf(data = st_as_sf(allUDHrefiso90Alan), fill = "red", 
          alpha = 0.3) +
  # Plot the 50% contour
  geom_sf(data = st_as_sf(allUDHrefiso50Alan), 
          fill = "green", 
          alpha = 0.3) +
  theme_minimal() +
  labs(title = "Home Range Contours for Alan",
       fill = "Contour Percent")

# Subset allUD to only include "Screamer" by indexing the list
allUDScreamer <- allUD[["Screamer"]]

# Calculate the home range for Screamer with different contours
allUDHrefiso95Screamer <- getverticeshr(allUDScreamer, percent = 95)
allUDHrefiso90Screamer <- getverticeshr(allUDScreamer, percent = 90)
allUDHrefiso50Screamer <- getverticeshr(allUDScreamer, percent = 50)

# Plot all the contours overlapping for Screamer
ggplot() +
  # Plot the 95% contour
  geom_sf(data = st_as_sf(allUDHrefiso95Screamer), 
          fill = "blue", 
          alpha = 0.3) +
  # Plot the 90% contour
  geom_sf(data = st_as_sf(allUDHrefiso90Screamer), 
          fill = "red", 
          alpha = 0.3) +
  # Plot the 50% contour
  geom_sf(data = st_as_sf(allUDHrefiso50Screamer), 
          fill = "green", 
          alpha = 0.3) +
  theme_minimal() +
  labs(title = "Home Range Contours for Screamer",
       fill = "Contour Percent")

These show a difference in the space used by Screamer and Alan - this is good!

If we want the areas for all hogs. I have disabled the visual from running as it would not render!

allUDHref95 <- getverticeshr(allUD, 
                                percent = 95)

# Convert to Simple Features (SF) object for plotting
allUDHref95_sf <- st_as_sf(allUDHref95)
# Plot with different fill colors based on hedgehog ID

ggplot() +
 geom_sf(data = allUDHref95_sf, 
         aes(fill = id), 
         alpha = 0.5) +
 scale_fill_viridis_d() +  # Using a color scale for discrete values
 theme_minimal() +
 labs(title = "95% Home Range Contour by Hedgehog ID", 
      fill = "Hedgehog ID") +
 theme(
   legend.position = "bottom",          # Position the legend at the bottom
   legend.key.size = unit(0.4, "cm"),  # Adjust the size of the legend keys
   legend.text = element_text(size = 6), # Adjust the size of the  text
   legend.title = element_text(size = 8) # Adjust the size of the  title
)

And then to calcuate the areas:

as.data.frame(allUDHref95)
                             id        area
Alan                       Alan  10.8816742
Anne 21                 Anne 21   0.2387152
Anne 22                 Anne 22   1.1429249
Bessie                   Bessie   0.4678121
Betsy                     Betsy   4.4207416
Carolina               Carolina   2.8500652
Crunchy                 Crunchy   9.7106681
Diana                     Diana   2.6482701
Dorothy                 Dorothy   0.7216981
Erin                       Erin   0.9812055
Frannie                 Frannie   0.4671336
George                   George   4.5300362
Hedgey                   Hedgey   2.9363113
Hettie                   Hettie   5.6547323
Jean                       Jean   9.6023052
Jeff                       Jeff   1.0346386
Joe                         Joe 154.5607442
Loggerhead           Loggerhead   0.5481730
Monty                     Monty  16.3311304
Moonface 2022     Moonface 2022  21.1008450
Mr Whatshisname Mr Whatshisname   6.0410639
Pumpkin                 Pumpkin   1.0647545
Remy                       Remy   2.7423964
Rosie                     Rosie   0.9276362
Saucepan               Saucepan   0.6496031
Screamer               Screamer  35.5762332
Shelia                   Shelia   6.6063573
Silkie                   Silkie   0.4829821
Smiggle                 Smiggle   2.0679313
Stone                     Stone   1.6419157
Timmy                     Timmy   7.6381287
Twitch                   Twitch   2.9055795
Vera                       Vera   6.2948385

To save these for QGIS etc use:

# Directory to save shapefiles
shapefile_dir <- "hogsKDEoutputs"
if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)
st_write(allUDHref95_sf,
         file.path(shapefile_dir, paste0("KDE_href_95.shp")),
    delete_layer = TRUE,
    quiet = TRUE)

Least Square Validation (LSCV)

Create all UDs using the LSCV method:

# now we calculate the UD for ALL hedgehogs 
allUD_LSCV <- kernelUD(subset(GPS_sp, select=IND), # to produce a KDE for each id
                  h="LSCV", # href bandwidth method
                  grid=150, # grid resolution  for the estimation of UD
                  same4all=FALSE) # for each id a different href is used 
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge 
within the specified range of hlim: try to increase it
image(allUD_LSCV)

Errors

The following warning appeared after running the above chunk:

Warning: The algorithm did not converge within the specified range of hlim: try to increase it This might be due to different reasons:

Sparse or Unevenly Distributed Data

If the data for some individuals (IND) is sparse, unevenly distributed, or clustered, the LSCV algorithm may fail to find an optimal bandwidth.

Range of hlim Too Narrow

The default range for hlim (the range of bandwidth values) might not be suitable for your dataset. If the optimal bandwidth lies outside this range, convergence will not be achieved.

Small Number of Points: THIS IS THE LIKELY REASON IN THIS CASE

LSCV requires a sufficient number of data points to calculate a reliable bandwidth. If some individuals have very few observations, the algorithm may struggle.

Highly Clustered Locations:

If locations are highly clustered, the kernel density estimate might become unstable, causing issues in bandwidth estimation.

Similar to the HREF method used before, we can plot the results by percetange for each individual hog:

# for Alan

# Subset allUD to only include "Alan" by indexing the list
allUDAlan <- allUD_LSCV[["Alan"]]

# Calculate the home range for Alan with a 95% contour - removing outliers
allUDHrefiso95Alan <- getverticeshr(allUDAlan, percent = 95)
# Calculate the home range for Alan with a 90% contour - most of HR
allUDHrefiso90Alan <- getverticeshr(allUDAlan, percent = 90)
# Calculate the home range for Alan with a 50% contour - core area
allUDHrefiso50Alan <- getverticeshr(allUDAlan, percent = 50)

# Plot all the contours overlapping
ggplot() +
  # Plot the 95% contour
  geom_sf(data = st_as_sf(allUDHrefiso95Alan), 
          fill = "blue", 
          alpha = 0.3) + # transparency level 0.30 opaque, 0.7 transparent
  # Plot the 90% contour
  geom_sf(data = st_as_sf(allUDHrefiso90Alan), fill = "red", 
          alpha = 0.3) +
  # Plot the 50% contour
  geom_sf(data = st_as_sf(allUDHrefiso50Alan), 
          fill = "green", 
          alpha = 0.3) +
  theme_minimal() +
  labs(title = "Home Range Contours for Alan",
       fill = "Contour Percent")

And for Screamer:

# for Screamer

# Subset allUD to only include "Screamer" by indexing the list
allUDScreamer <- allUD_LSCV[["Screamer"]]

# Calculate the home range for Screamer with different contours
allUDHrefiso95Screamer <- getverticeshr(allUDScreamer, percent = 95)
allUDHrefiso90Screamer <- getverticeshr(allUDScreamer, percent = 90)
allUDHrefiso50Screamer <- getverticeshr(allUDScreamer, percent = 50)

# Plot all the contours overlapping for Screamer
ggplot() +
  # Plot the 95% contour
  geom_sf(data = st_as_sf(allUDHrefiso95Screamer), 
          fill = "blue", 
          alpha = 0.3) +
  # Plot the 90% contour
  geom_sf(data = st_as_sf(allUDHrefiso90Screamer), 
          fill = "red", 
          alpha = 0.3) +
  # Plot the 50% contour
  geom_sf(data = st_as_sf(allUDHrefiso50Screamer), 
          fill = "green", 
          alpha = 0.3) +
  theme_minimal() +
  labs(title = "Home Range Contours for Screamer",
       fill = "Contour Percent")

This analysis is suffering from undersmoothing, which is a risk associated to the LSCV smoothing method.

This is the end of this tutorial so I am clearing all from the current session:

rm(list=ls())

Home Ranges and Utilisation Distributions: dynamic Brownian Bridge Movement Model and Autocorrelated Kernel Density Estimates

With the widespread use of GPS technology to track animals in near real-time, estimators of home range and movement have been developed to deal with more intense and highly autocorrelated data. Traditional point-based estimators (e.g., MCP and KDE, explored in above) only incorporate the density of locations into home range estimation.

Newer methodologies to estimate the spatial use of animals take advantage of the temporal structure and movement paths inherent in high-density GPS data. These methods, such as autocorrelated kernel density estimation (AKDE) and dynamic Brownian bridge movement models (dBBMM), explicitly account for the autocorrelation between consecutive locations. By incorporating information about the time and sequence of animal movements, these approaches provide more accurate and ecologically meaningful representations of spatial use, capturing the underlying movement processes more effectively than traditional point-based estimators.

Data Call and prepatation

# import dataset
dat <- read.csv("WD_hedgehogs2022.csv",
               header = TRUE)

# select only the relevant variables

df <- dat %>%
 dplyr::select ( #select following variables
    eventid = event.id,
    id = individual.local.identifier,
    sex = study.specific.measurement,
    behaviour = behavioural.classification,
    timestamp = timestamp,
    utm_x = utm.easting,
    utm_y = utm.northing,
    long = location.long,
    lat = location.lat
    )
# check structure
str(df)
'data.frame':   5899 obs. of  9 variables:
 $ eventid  : num  2.38e+10 2.38e+10 2.38e+10 2.38e+10 2.38e+10 ...
 $ id       : chr  "Anne 21" "Anne 21" "Anne 21" "Anne 21" ...
 $ sex      : chr  "F" "F" "F" "F" ...
 $ behaviour: chr  "R" "R" "R" "R" ...
 $ timestamp: chr  "2021-05-01 19:50:09.000" "2021-05-01 20:40:10.000" "2021-05-01 21:00:11.000" "2021-05-01 21:10:06.000" ...
 $ utm_x    : num  549255 549257 549257 549264 549272 ...
 $ utm_y    : num  5872578 5872581 5872569 5872579 5872569 ...
 $ long     : num  -2.27 -2.27 -2.27 -2.27 -2.27 ...
 $ lat      : num  53 53 53 53 53 ...
# Remove rows with any missing data
clean_data <- df %>%
  drop_na()

# Ensure `id` is a character vector
df$id <- as.character(df$id)

# make sure the date/time is in POSIXct format 
df$timestamp <- as.POSIXct(df$timestamp, format="%Y-%m-%d %H:%M:%S", tz="UTC")

# Ensure data in date order
df <- df[order(df$timestamp), ]

# Check for duplicates in the 'id' column and 'timestamp'
duplicates <- df[duplicated(df[, c("id", "timestamp")]), ]
duplicates
[1] eventid   id        sex       behaviour timestamp utm_x     utm_y    
[8] long      lat      
<0 rows> (or 0-length row.names)

We are using the same dataset as above and have already performed the basic data exploration, so we will nto be repeating this here.

Spatial Use - Utalisation Distribution

Unlike the previous Home Range Estimations, here we are calculation utilisation distributions, these represent a more realistc and dynamic approach to undersatnding spatial use of animals. It does this as with GPR and similar Langragian movement data, the above work used Eulerian methods instead.

Dynamic Brownian Bridge Movement Models (dBBMM)

Methods such as the Brownian Bridge Movement Models (BBMM), quantifiy the utilisation distribution of an animal based on its movement path rather than individual points and accounts for temporal autocorrelation and high data volumes. However, BBBM assumes homogeneous movement behaviour across all data, which is not a realistic view of the complex ecology of some species. This is espeically true for species that show seasonal changes in their movement patterns and habitat use.

The dynamic dynamic Brownian Bridge Movement Models (dBBMM) allow for changes in behaviour, using likelihood statistics to determine change points along the animal’s movement path; it is also able to work with tracks that are not sampled regularly, due for example to failed fixes or dynamic sampling schedules) (Kranstauber et al. 2012).

The dBBMM is a method to calculate the occurrence distribution of a given track, i.e. it estimates where the animal was located during the observed (tracking) period. It calculates the probability landscape for the transition between any two known consecutive locations given the amount of time it had available (assuming a conditional random (Brownian) motion between locations). It is “dynamic” because it allows the variance to change along the trajectory, using the behavioral change point analysis in a sliding window.

# Define parameters for the dBBMM analysis.
set_grid.ext <- 10          # Grid extension for spatial analysis
set_dimsize <- 600         # Dimension size for the grid
ws <- 15                    # Window size for smoothing
mrg <- 5                    # Margin size for dynamic calculations
settings <- paste0("mrg", mrg, "_ws", ws) # Create a string to represent settings
gps.acc <- 10               # Average GPS accuracy in meters

# Create directories to store outputs if they don't exist
dir.create("dBBMMfigures", showWarnings = FALSE)
dir.create("dBBMMshapefiles", showWarnings = FALSE)

# Initialize lists to store results across individuals
iter <- 0
data.var.list <- vector(mode = "list", length = length(unique(df$id)))
all.area.list <- vector(mode = "list", length = length(unique(df$id)))
dbbmm.list <- vector(mode = "list", length = length(unique(df$id)))
Code Explanation

dBBM Parameters explanation:

Grid Extension (set_grid.ext):

This determines how far beyond the furthest point your grid will extend. If your area is small, say under 5 km², reducing this value will ensure you’re not unnecessarily adding a large buffer around your points.

  • Start by examining the range of your animal’s movement (e.g., the spatial extent of GPS points in UTM coordinates).

  • Choose a value that ensures the area of interest (e.g., home range or activity space) is fully covered.

  • Test values empirically, e.g., 5–10 units larger than the maximum observed range, to ensure no truncation of important movement data.

Grid Size (set_dimsize):

This sets the resolution of your grid. A smaller study area will generally require a finer grid resolution, so setting this too large might result in unnecessary computation or inefficient processing.

  • The resolution should balance computational efficiency and detail. Higher values create finer grids but increase computation time.

  • Use a value that corresponds to the precision of your GPS data (e.g., if locations are accurate to 10 meters, ensure grid cells are smaller than this).

  • Typical starting points:

    • For high-accuracy data (e.g., <10 m), try 500–1000.

    • For lower-accuracy data, try 100–500.

Our study area is roughly 2,000x2,000km. However, the actual area that seems used by individual hedgehogs is not bigger than 750 by 750 meters. By using a set_grid.ext = 10 we extended the analysis area to by 10 meters on each side

Based on this, set_dimize = 600 means that we have divided the analysis area into 600x600 cells, with cells that will be typically smaller than 1.5m.

ws <- 15 (Window Size for Smoothing):

The ws parameter defines the size of the moving window used in the dynamic Brownian Bridge Movement Model (dBBMM) analysis. This window determines the range of locations considered when calculating the dynamic behavior of the animal’s movement. A larger window size results in smoother estimates of movement, while a smaller window captures finer details.

  • Consider the frequency and regularity of your GPS data:

    • For frequent data (e.g., locations every 1–5 minutes), a larger ws (e.g., 15–25) may be needed to smooth short-term variations.

    • For sparse data (e.g., locations every hour or day), use a smaller ws (e.g., 5–10) to capture finer-scale dynamics.

  • Adjust based on biological knowledge:

    • If the animal is known to move over larger distances consistently, a larger window might make sense.

    • Use smaller windows if you want to capture fine-scale movement details.

mrg <- 5 (Margin Size for Dynamic Calculations):

The mrg parameter specifies the margin size for the smoothing window. It controls the buffer around the moving window to handle edge effects during the calculations. The margin ensures that movement dynamics are accurately captured without losing information near the edges of the data.

  • Should typically be an odd number and smaller than ws (e.g., 1/3 or 1/4 of ws).

  • Larger margins can smooth over potential noise at the edges, but excessively large margins may dampen dynamic variability.

  • Test values empirically (e.g., 3–7) and evaluate how they influence the results.

The below code works correctly, but does not render so I have disabled it

# Initialize a list to store the final area data
final_area_df <- data.frame()

# Loop Through Each Individual
# Analyze movement data for each individual in the dataset

for (indi in unique(df$id)) {
  
  # Subset data for the current individual and ensure unique timestamps
  data <- df %>% 
    filter(id == indi) %>% 
    distinct(timestamp, # duplicate rows where timestamp is the same
             .keep_all = TRUE) # retains all other columns
  iter<- 0 # This imitialises the loop iterations
  iter <- iter + 1 # keeps track of the iteration number, which is associated to each hedgehog id
  
  # Convert the individual's data to a move object for spatial analysis
  move <- move(
    x = data$utm_x, 
    y = data$utm_y, 
    proj = "+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs", 
    time = as.POSIXct(data$timestamp, tz = "UTC")
  )
  
  # Calculate dBBMM and Extract Motion Variance
  dbbmm <- brownian.bridge.dyn(
    object = move,            # move object
    location.error = gps.acc, # location error
    margin = mrg,             # margin
    window.size = ws,         # window size
    ext = set_grid.ext,       # grid extension
    dimSize = set_dimsize,    # resolution
    verbose = FALSE)           # if TRUE the function prints the progress of calculation...we don't need that
  
  data$var <- getMotionVariance(dbbmm) # Add motion variance to the dataset

  # Plot and Save Motion Variance Over Time
  ggplot(data) +
    geom_area(aes(x = timestamp, y = var), 
              alpha = 0.5, 
              fill = "black") +
    geom_line(aes(x = timestamp, y = var)) +
    theme_bw() +
    scale_x_datetime(
      labels = scales::date_format("%d-%b-%Y"),  # Format as day-month-year
      breaks = scales::date_breaks("1 day"))+     # Breaks every day
    labs(
      x = "Date", 
      y = expression(paste(sigma^2, " m")), # variation in movement (m)
      title = paste0(indi, " Motion Variance")
    ) +
    theme(
      axis.text.x = element_text(angle = 45, 
                                 hjust = 1, 
                                 size = 7),
      axis.title.y = element_text(angle = 90, 
                                  vjust = 0.5, 
                                  face = 2),
      axis.title.x = element_text(hjust = 0.5, 
                                  face = 2),
      plot.title = element_text(face = 4)
    )
# Ensure the directory exists
output_dir <- "hogdBBMMfigures"
if (!dir.exists(output_dir)) {
  dir.create(output_dir)
}
# Save the plot
ggsave(
  file = paste0(output_dir, "/MotionVariance_", indi, ".png"),
  width = 180, height = 100, dpi = 600, units = "mm"
)

  # Calculate and Store dBBMM Contours
  dbbmm.sp <- as(dbbmm, "SpatialPixelsDataFrame") # transforms the output from the dbbmm analyis in a spatial dataframe
  dbbmm.sp.ud <- new("estUD", dbbmm.sp) # creates the UD
  dbbmm.sp.ud@vol <- FALSE # volumne of the UD not calculated as it is not needed right now
  dbbmm.sp.ud@h$meth <- "dBBMM" # UD is estimated using dBBMM
  dbbmm.ud <- getvolumeUD(dbbmm.sp.ud, standardize = TRUE) # now is the time to compute the volume under the UD. 
  
  # Initialize lists for storing contour and area data for each individual
  area.list <- vector(mode = "list", length = 3)
  j <- 0
  
  # Loop through confidence intervals (75%, 90%, 95%)
  for (con in c(75, 90, 95)) {
    j <- j + 1
    k <- which (c(75, 90, 95) == con)
    
    # Generate contours and convert to sf object
    poly <- getverticeshr(dbbmm.ud, percent = con)  # delimits the contour
    poly_sf <- st_as_sf(poly)
    
    # Save contour as shapefile
    if (!dir.exists("hogdBBMMshapefiles")) {
  dir.create("hogdBBMMshapefiles")
}

    invisible(suppressMessages({
  suppressWarnings({
    st_write(
      poly_sf,
      dsn = paste0("hogdBBMMshapefiles/", indi, "_dBBMM_", con, "_", settings, ".shp"),
      layer = paste0(indi, "_dBBMM_", con, "_", settings),
      delete_layer = TRUE
    )
  })
}))
    
        # Calculate contour area and store
    area <- as.numeric(sum(st_area(poly_sf)) / 10000) # Area in hectares
    area.list[[j]] <- data.frame("id" = indi, 
                                 "Contour" = con, # 75,90 or 95%
                                 "Area" = area)   # area in hectares
   }

   # Combine all contour and area data
   area.df <- dplyr::bind_rows(area.list)

   # Store individual results
   all.area.list[[iter]] <- area.df

  # Save results for the individual
  dbbmm.list[[iter]] <- dbbmm
  data.var.list[[iter]] <- data
  
  # Combine individual area results into a data frame
  area.df <- dplyr::bind_rows(area.list)
  
  # Reshape the data frame so each contour is a separate column
  area_wide <- reshape(area.df, 
                       idvar = "id", 
                       timevar = "Contour", 
                       direction = "wide",
                       v.names = "Area")
  
  # Clean column names to make them more readable
  colnames(area_wide) <- gsub("Area.", "", colnames(area_wide))
  colnames(area_wide) <- paste0("Contour_", colnames(area_wide))
  
  # Merge the reshaped table into the final table
  final_area_df <- rbind(final_area_df, area_wide)
}

We can then print and save this table:

# Display the final table, use:
print(final_area_df)
# Optionally, save the table to a CSV file
write.csv(final_area_df, "dBBMM_contour_areas.csv", row.names = FALSE)
Motion Variance

These motion variance graphs have been generated for all the hogs as art of the above code, but how do we interpret them!

X-axis (Date):

Represents the timestamps (data$timestamp) of the observations for the specific individual hedgehog (indi).

Y-axis (Motion Variance σ2^2σ2):

Displays the motion variance values (data$var) estimated at each timestamp.

The black-filled area:

Shows the area under the curve, which emphasizes the magnitude of motion variance over time.

The line:

Represents the trend or fluctuation in motion variance (σ2^2σ2) across time.

Peaks in the line suggest moments of higher movement variance, while troughs indicate periods of lower variance.

Interpreting the Motion Variance:

Motion Variance (σ2^2σ2):

A higher variance suggests increased mobility or less predictable movement patterns during that time frame. A lower variance indicates more restricted or consistent movement patterns.

Trends:

If the variance fluctuates significantly over time, it may correspond to changes in the hedgehog’s activity (e.g., foraging vs. resting behavior) or external factors (e.g., weather, habitat structure).

Periods of consistent variance might indicate steady movement within a limited area.

The Motion Variance for Whatshisname seems to indicate that it was active during the 25th to 27th of April, but its level of activity dropped after possibly returning to its nest on the 27th.

The shapefile of the 75, 90 and 95% UD contours for Whatshisname, obtained by dBBMM seem to indicate different centers of activity and how the main activity of this hedgehog is limited to the Memorial Garden and the Walled garden.

This is the end of this tutorial so I am clearing all from the current session:

rm(list=ls())

Autocorrelated Kernel Density Estimation (AKDE)

Autocorrelated Kernel Density Estimation (AKDE) methods were designed to be statistically efficient while explicitly dealing with the complexities and biases of modern movement data, such as autocorrelation, small sample sizes, and missing or irregularly sampled data.

We will continue using the hedgehog dataset previously presented, which was downloaded from Movebank.

We will be using the ctmm package (Calabrese, Fleming, and Gurarie 2016) Reference manual.

Data call and prep:

# Load dataset 
dat <- read.csv("WD_hedgehogs2022.csv", header = TRUE)

# Select relevant variables and clean the data
df <- dat %>%
  dplyr::select(
    eventid = event.id,
    id = individual.local.identifier,
    sex = study.specific.measurement,
    behaviour = behavioural.classification,
    timestamp = timestamp,
    utm_x = utm.easting,
    utm_y = utm.northing,
    long = location.long,
    lat = location.lat
  ) %>%
  drop_na() %>%  # Remove rows with missing data
  mutate(
    id = as.character(id),  # Ensure `id` is a character vector
    timestamp = as.POSIXct(timestamp, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
  ) %>%
  arrange(timestamp)  # Ensure timestamps are in order

# Check for duplicates
duplicates <- df[duplicated(df[, c("id", "timestamp")]), ]
if (nrow(duplicates) > 0) {
  warning("Duplicates found in the dataset. Consider addressing them before proceeding.")
}

Example for a single individual - Mr Whatshisname

Filtering the dataset to select a single individual

# select only Mr Whatshisname
what_df <- df %>%
  filter(id == "Mr Whatshisname")  # Replace with the desired hedgehog ID

Dat formatiing for this analysis:

# convert the dataset to a telemetry object
telemetry <- as.telemetry(what_df,
                          timeformat="auto",
                          timezone="UTC",
                          projection="+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs",
                          datum="WGS84")
Minimum sampling interval of 9.9 minutes in Mr Whatshisname
plot(telemetry)
DOP values missing. Assuming DOP=1.

In general, the as.telemetry() function will identify the columns and check they are correctly named, convert the projection if needed, and then output the minimum sampling interval for each individual in the dataset. In this example, Mr Whatshisname` has a minimum sampling interval of 9.9 minutes (R console output). The plot shows the locations for this hedgehog as circles.

DOP

The message DOP values missing. Assuming DOP=1. appears because the telemetry object does not include Dilution of Precision (DOP) values.

DOP is typically used in GPS data to indicate the quality of location fixes, with lower values representing higher precision. If these values are not available in your dataset, the ctmm package assumes a default DOP value of 1 for all fixes.

This assumption is generally fine for exploratory analysis or datasets where precision is expected to be uniform. Otherwise, you will have to parameterise as.telemetry with an argument dop = xx.

Selecting the movement model

It is necessary to choose a home range estimator that accounts for the autocorrelated structure of the data. We need to test what movement model may explain the autocorrelated structure of our tracking data.

We can run different movement processes with maximum likelihood (ML) or other parameter estimators, such as perturbative Hybrid REML (pHREML). The default is pHREML. In this case ML has been used because later on the model would not work for some hedgehogs when using pHREML.

# calculate fit guess object
  GUESS <- ctmm.guess(telemetry, interactive= FALSE) # produces a guess of the initial value of the maximum likelihood parameter
  
#Fit a movement model 
  model <- ctmm.fit(telemetry, GUESS,method = 'ML')
  
summary(model)
$name
[1] "OUF anisotropic"

$DOF
      mean       area  diffusion      speed 
 9.6149250 13.3290393 72.7624533  0.1378726 

$CI
                                  low       est         high
area (hectares)          4.445827e+00  8.274388 1.327238e+01
τ[position] (hours)      3.323986e+00  6.744769 1.368595e+01
τ[velocity] (seconds)    2.992233e-11 24.886290 2.069783e+13
speed (kilometers/day)   6.188969e-05 18.772321 5.555578e+01
diffusion (hectares/day) 2.458836e+00  3.138232 3.899248e+00

What this means: Within these summaries,

  • $name provides the selected best-fit model,

  • $DOF provides information on the degrees of freedom (where $DOF[“area”] corresponds to the effective sample size of the home-range area estimate)

    • Effective sample size (N): number of range crossings that occurred during the observation period. Can be roughly estimated by dividing the duration of the tracking dataset by the – Home range crossing time: the time required for an animal to cross the linear extent of its home range. (Fleming et al. 2019)
  • CI are the parameter outputs (area, position autocorrelation timescale, velocity autocorrelation timescale, and speed).

Estimating the Home Range

Now we can fit this movement process into the akde() function, and estimate the home range of Mr Whatshisname. This function currently defaults to the area-corrected AKDE, or AKDEc (Silva et al. 2021)

# Calculate UD (AKDE object) - area-corrected AKDE
UD <- akde(telemetry, model, level.UD = 0.95)

# area of the 95% confidence interval
summary (UD)
$DOF
     area bandwidth 
 13.32904  14.49390 

$CI
                     low      est     high
area (hectares) 4.068557 7.572227 12.14609

attr(,"class")
[1] "area"

This output shows a home range for Mr Whatshisname of 7.57 ha (with 95% confidence intervals of 4.07 - 12.15 ha).

Plotting and saving AKDE of Home Range

# Plot data with AKDE 
EXT <- extent(UD, level = 0.95)

plot(telemetry,UD=UD, ext = EXT)
DOP values missing. Assuming DOP=1.

# saving the results as shapefiles  
  
# Directory to save shapefiles
shapefile_dir <- "AKDEoutputs"
if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)

# save the UD as shapefile
writeVector(UD, 
            file.path(shapefile_dir, 
                      paste0("MrWhatshisname.shp")), 
            filetype="ESRI Shapefile",
            overwrite=TRUE)
  #save the locations as shapefile
locations<-SpatialPointsDataFrame.telemetry(
                  telemetry)
locations_sf <- st_as_sf(locations)
  
st_write(locations_sf,
          file.path(shapefile_dir, 
                       paste0("locations.shp")),
     delete_layer = TRUE,
      quiet = TRUE)

Example for a population

In the below code, we run the same process as above, but in a single chunk and loop through for each individual in our population.

# Load dataset (example)
dat <- read.csv("WD_hedgehogs2022.csv", header = TRUE)

# Select relevant variables and clean the data
df <- dat %>%
  dplyr::select(
    eventid = event.id,
    id = individual.local.identifier,
    sex = study.specific.measurement,
    behaviour = behavioural.classification,
    timestamp = timestamp,
    utm_x = utm.easting,
    utm_y = utm.northing,
    long = location.long,
    lat = location.lat
  ) %>%
  drop_na() %>%  # Remove rows with missing data
  mutate(
    id = as.character(id),  # Ensure `id` is a character vector
    timestamp = as.POSIXct(timestamp, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
  ) %>%
  arrange(timestamp)  # Ensure timestamps are in order

# Check for duplicates
duplicates <- df[duplicated(df[, c("id", "timestamp")]), ]
if (nrow(duplicates) > 0) {
  warning("Duplicates found in the dataset. Consider addressing them before proceeding.")
}

# Directory to save shapefiles
shapefile_dir <- "AKDEoutputs"
if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)

# Initialize an empty list to store summaries
summaries <- list()

# Iterate through all unique hedgehog IDs
unique_ids <- unique(df$id)

for (hedgehog_id in unique_ids) {
  # Filter for the current hedgehog ID
  hedgehog_df <- df %>% filter(id == hedgehog_id)
  
  # Convert to telemetry object
  telemetry <- as.telemetry(
    hedgehog_df,
    timeformat = "auto",
    timezone = "UTC",
    projection = "+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs",
    datum = "WGS84"
  )
  
  # Fit GUESS object and model
  GUESS <- ctmm.guess(telemetry, interactive = FALSE)
  
  # Fit a movement model 
  model <- ctmm.fit(telemetry, GUESS,method = 'ML')
  
  # Calculate UD (AKDE object)
  UD <- akde(telemetry, model)
  
  # Extract summary for the individual
  ud_summary <- summary(UD)
  ud_summary$id <- hedgehog_id  # Add the ID for tracking
  summaries[[hedgehog_id]] <- ud_summary
  
 #Save UD as a shapefile
  writeVector(
    UD,
    file.path(shapefile_dir, paste0(hedgehog_id, 
                                    "_UD.shp")),
    filetype = "ESRI Shapefile",
    overwrite = TRUE)

  # Save locations as a shapefile
  locations <- SpatialPointsDataFrame.telemetry(telemetry)
  locations_sf <- st_as_sf(locations)
  st_write(
    locations_sf,
    file.path(shapefile_dir, paste0(hedgehog_id, "_locations.shp")),
    delete_layer = TRUE,
    quiet = TRUE)
  
}

# Combine all summaries into a single data frame
summary_df <- do.call(rbind, summaries)

# Save combined summary as a CSV file
write.csv(summary_df, "hedgehog_AKDE_summaries.csv", row.names = FALSE)
rm(list=ls())

Track Analysis (starting 5th of March)