This Tutorial is an introduction to some basic methods of Home Range Estimation. MCP is Minimum Convex Polygons and KDDE is Kernal Density Estimates.
The format of the above chunk is to create a list of the required pacakges as the object List.of.packages, then call this list, check installation status, install any missing ones and then load them all ready for our use.
Data Preparations and Inital Exploration
# import datasetdat <-read.csv("WD_hedgehogs2022.csv",header =TRUE)# select only the relevant variablesdf <- dat %>% dplyr::select ( #select following variableseventid = event.id,id = individual.local.identifier,sex = study.specific.measurement,behaviour = behavioural.classification,timestamp = timestamp,utm_x = utm.easting,utm_y = utm.northing,long = location.long,lat = location.lat )# check structurestr(df)
'data.frame': 5899 obs. of 9 variables:
$ eventid : num 2.38e+10 2.38e+10 2.38e+10 2.38e+10 2.38e+10 ...
$ id : chr "Anne 21" "Anne 21" "Anne 21" "Anne 21" ...
$ sex : chr "F" "F" "F" "F" ...
$ behaviour: chr "R" "R" "R" "R" ...
$ timestamp: chr "2021-05-01 19:50:09.000" "2021-05-01 20:40:10.000" "2021-05-01 21:00:11.000" "2021-05-01 21:10:06.000" ...
$ utm_x : num 549255 549257 549257 549264 549272 ...
$ utm_y : num 5872578 5872581 5872569 5872579 5872569 ...
$ long : num -2.27 -2.27 -2.27 -2.27 -2.27 ...
$ lat : num 53 53 53 53 53 ...
This has selected the variabels of interest from the dataset.
# make sure the date/time is in POSIXct format df$timestamp <-as.POSIXct(df$timestamp, format="%Y-%m-%d %H:%M:%S", tz="UTC")# confirmstr(df$timestamp)
# Count rows with any NA values - works with numerical variablescat("Number of rows with NA: ", sum(rowSums(is.na(df)) >0))
Number of rows with NA: 0
# order dataset by id and timestampdf <- df[order(df$id, df$timestamp),]head (df)
eventid id sex behaviour timestamp utm_x utm_y
4406 23752857921 Alan M W 2022-05-07 00:00:04 548309.5 5873005
4407 23752857926 Alan M W 2022-05-07 00:10:11 548235.7 5873004
4408 23752857941 Alan M W 2022-05-07 01:20:05 548240.9 5873016
4409 23752857945 Alan M W 2022-05-07 01:30:04 548296.7 5873010
4410 23752857947 Alan M W 2022-05-07 01:40:04 548334.3 5873007
4411 23752857950 Alan M W 2022-05-07 01:50:04 548326.1 5873018
long lat
4406 -2.28007 53.00442
4407 -2.28117 53.00442
4408 -2.28109 53.00453
4409 -2.28026 53.00447
4410 -2.27970 53.00444
4411 -2.27982 53.00454
# Identify timestamp duplicates for each hedgehogduplicates <- df %>%group_by(id) %>%filter(duplicated(timestamp) |duplicated(timestamp, fromLast =TRUE))# Count duplicates per idduplicates_count <- duplicates %>%count(id)print(duplicates_count)
# A tibble: 0 × 2
# Groups: id [0]
# ℹ 2 variables: id <chr>, n <int>
To help identify outliers, lets look at the data. Remember we are choosing the appropriate visual to check our data - as this is movement data, it makes sense to look at it as a map.
# basic plottingid_colour <-factor (df$id) # hedgehog ID made factor so we can set up a colour for each level of the variable idplot(df$utm_x, df$utm_y, # simple plot col=id_colour) # different colour by hedgehog id
This suggests a group of outliers where the df$utm_x is less than 54700 - lets remove them:
# let's remove all locations with utm_x < 547000# this needs to be done using a data frame objectdf <- df[df$utm_x >547000, ]# confirm this has workedplot(df$utm_x, df$utm_y,col=id_colour)
Now we can start to look at our data to help answer any questions:
# variables of interest # id (unique hedgehog identifier)# sex (F for female, M for male)# behaviour (R for rehabilitated, W for wild)# timestamp (tracking date and time)# Convert timestamp to Date (ignoring time)df$date <-as.Date(df$timestamp)# Summarize the data by sex and behavioursummary_stats <- df %>%group_by(sex, behaviour) %>%# group hedgehogs by their sex and behaviour# For each hedgehog, calculate the number of days tracked and locationssummarise(num_hedgehogs =n_distinct(id), # Number of unique hedgehogstotal_days_tracked =n_distinct(date), # Total number of days tracked (unique dates)avg_days_tracked =round(mean(sapply(id, function(x) length(unique(df[df$id == x,]$date))) ), 2), # Average number of days tracked (per hedgehog), rounded to 2 decimal pointsmin_days_tracked =min(sapply(id, function(x) length(unique(df[df$id == x,]$date))) ), # Minimum days tracked (per hedgehog)max_days_tracked =max(sapply(id, function(x) length(unique(df[df$id == x,]$date))) ), # Maximum days tracked (per hedgehog)total_locations =n_distinct(paste(utm_x, utm_y)), # Total number of unique locationsavg_locations =round(mean(sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y)))) ), 2), # Average number of locations (per hedgehog), rounded to 2 decimal pointsmin_locations =min(sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y)))) ), # Minimum locations (per hedgehog)max_locations =max(sapply(id, function(x) length(unique(paste(df[df$id == x,]$utm_x, df[df$id == x,]$utm_y)))) ) # Maximum locations (per hedgehog) ) %>%ungroup()
`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
# Create a nice table using kableExtrasummary_table <- summary_stats %>%kable("html", caption ="Summary of Hedgehog Tracking Data by Sex and Behaviour") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), # alternative row colours, highlights rows when mouse hovers and reduction of row height to make the table more compactfull_width =FALSE, # kpeeps table width narrowposition ="center"# position of the table on the page ) %>%column_spec(1:2, bold =TRUE) %>%# Make the first two columns boldcolumn_spec(3:10) # background color of the data columns# Print the tablesummary_table
Summary of Hedgehog Tracking Data by Sex and Behaviour
sex
behaviour
num_hedgehogs
total_days_tracked
avg_days_tracked
min_days_tracked
max_days_tracked
total_locations
avg_locations
min_locations
max_locations
F
R
13
38
8.21
4
11
2124
198.17
32
334
F
W
9
36
7.87
7
9
2069
239.17
144
310
M
R
8
34
7.92
6
11
1416
194.72
51
256
M
W
3
11
5.41
1
8
100
43.77
16
59
Code Explanation
Explanation of the code used to calculate the Average Days Tracked:
This part calculates the average number of days each hedgehog (id) was tracked, based on the unique dates when GPS locations were recorded.
avg_days_tracked = round(mean(
sapply(id,
function(x) length(unique(df[dfid==x,]date))) )
, 2)
Steps:
df[df$id == x,]: For each hedgehog (id == x), we filter the data frame df to get all rows corresponding to that hedgehog.
df[df\(id == x,]\)date: Extracts the date column for the specific hedgehog.
unique(…): Finds all unique dates for the specific hedgehog. This ensures that repeated detections on the same day are not counted multiple times.
length(…): Counts the number of unique days that a particular hedgehog was tracked.
sapply(id, function(x) …): The sapply function applies this calculation for each unique id (hedgehog) in the dataset.
mean(…): The mean() function calculates the average number of days tracked across all hedgehogs.
round(…, 2): Finally, the result is rounded to 2 decimal points for easier interpretation..
Lets map it!
# Ensure df$id has categorical values (factors or character) for color assignmentpalette <-colorFactor(palette ="Set1", domain = df$id)# Create the leaflet mapm <-leaflet(df) %>%# Create leaflet object using the dataframeaddTiles() %>%# Add the default basemapaddCircleMarkers( # Add circles for stationslng = df$long, lat = df$lat, # Ensure these are the correct columnscolor =~palette(id) # Apply the color factor based on 'id' )
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
# Show the mapm
Base Maps
You can change the base map and use some satellite imagery
# Ensure df$id has categorical values (factors or character) for color assignmentpalette <-colorFactor(palette ="Set1", domain = df$id)# Create the leaflet mapm <-leaflet(df) %>%# Create leaflet object using the dataframe addProviderTiles(providers$Esri.WorldImagery) %>%#Add Esri Wrold imagery addCircleMarkers( # Add circles for stationslng = df$long, lat = df$lat, # Ensure these are the correct columnscolor =~palette(id) # Apply the color factor based on 'id' )
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
Warning in RColorBrewer::brewer.pal(max(3, n), palette): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors
# Show the mapm
Home Range Estimation
Minimum Convex Polygon
This is the simplest delineation of a home range, obtained by creating the polygon of minimum area around a certain percentage of relocation points obtained by telemetry. Due to its simplicity has been widely used in ecology.
Creating Spatial Objects (SF)
Spatial objects integrate both attribute data (e.g., animal id, timestamps) and spatial geometry (e.g., coordinates, polygons) into a single structure. The SF format ensures the data is in a standardised form that is compatible with spatial analysis tools and GIS platforms. Many R packages and functions for home range estimation, such as adehabitatHR and visualization tools like ggplot2, require spatial objects as input. The SF format makes the data compatible with these tools, streamlining workflows.
# Create a Simple Features (SF) object where each row in the data represents a pointGPS_sf <-st_sf(IND = df$id, # The id column as an attributegeometry =st_sfc(lapply(1:nrow(df), function(i) st_point(c(df$utm_x[i], df$utm_y[i])))) # Create points for each row. )# Set the CRS (Coordinate Reference System) to UTM Zone 30N (EPSG: 32630)GPS_sf <-st_set_crs(GPS_sf, 32630)
For the first part of this function:
st_sf(): This function creates a Simple Features object, which is a standard format used to represent spatial data (points, lines, polygons, etc.) in R. It’s part of the sf package.
The first argument inside st_sf() defines the attribute (non-spatial) data you want to include in the SF object. In this case, IND = df$id assigns the id column from your df dataset to a new attribute named IND in the GPS_sf object.
The geometry argument specifies the spatial component (coordinates) of the SF object.
lapply() iterates over each row of your dataset (df).
For each row i, it creates a point using st_point(). This function takes a vector of two coordinates (x and y) and creates a point geometry for that location.
df\(utm_x[i] and df\)utm_y[i] are the UTM coordinates for each point in your dataset. The i indicates that the function applies to each row of the dataset.
The result is a list of points, one for each row in your dataset.
This creates a Simple Features object where each row in the df dataset corresponds to a point (geometry) in the GPS_sf object.
The next part:
st_set_crs(): This function assigns a Coordinate Reference System (CRS) to the SF object.
EPSG: 32630: This is the code for the UTM (Universal Transverse Mercator) coordinate system for Zone 30N, which is a standard projection used in the northern hemisphere for locations near 30°N latitude (England!).
By setting the CRS to EPSG: 32630, you’re telling R that your point coordinates are in this specific projection system (UTM Zone 30N).
Generate the Minimum Convex Polygon
# Convert SF object to an SP objectGPS_sp <-as(GPS_sf, "Spatial")# this is because the package AdehabitatHR uses an older spatial library (sp)# Calculate MCP for each individual (id) using the AdehabitatHR function mcpmcp_result <-mcp(GPS_sp, percent =95) # For 95% MCP# Convert the result into a Simple Features (SF) objectmcp_sf <-st_as_sf(mcp_result)
We have now created the object, so we need to plot it:
# easiest plotggplot() +geom_sf(data = mcp_sf, # MCP polygonsaes(fill = id), # colour by hedgehogalpha =0.5) +# transparencygeom_sf(data = GPS_sf, # locationsaes(color = IND), # colour by hedgehogshow.legend =FALSE) +# not showing legend for locationslabs(fill ="Hedgehog") +#legend titletheme_minimal() +theme(legend.position ="bottom", # Position the legend at the bottomlegend.key.size =unit(0.4, "cm"), # Adjust the size of the legend keyslegend.text =element_text(size =6), # Adjust the size of the textlegend.title =element_text(size =8) # Adjust the size of the title )
We can also plot this using the tmap package (an alternate to leaflet)
# plotting the MCPs using tmap package# Create the map with all categories displayedtm_shape(mcp_sf) +tm_tiles("OpenStreetMap") +# Add a basemaptm_borders(lwd =2) +# Add borders for MCP polygonstm_fill(col ="id", # Color polygons by 'id'palette ="Set2", # Use a palette (consider changing if needed)alpha =0.5,legend.show =TRUE, # Ensure legend is showngrouping =FALSE# Prevent combining categories ) +tm_scale_bar(position =c("left", "bottom")) +# Add a scale bartm_layout(legend.outside =TRUE, # Place legend outsidemain.title ="Minimum Convex Polygons (MCPs) Map",main.title.size =1.5,legend.text.size =0.8, # Adjust text size for claritylegend.title.size =1.0# Adjust legend title size )
[v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
= tm_scale(<HERE>).
[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
(instead of 'col'), and 'col' for the outlines (instead of 'border.col').
[v3->v4] `tm_polygons()`: use `fill_alpha` instead of `alpha`.
[tm_polygons()] Argument `grouping` unknown.
! `tm_scale_bar()` is deprecated. Please use `tm_scalebar()` instead.
[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
[cols4all] color palettes: use palettes from the R package cols4all. Run
`cols4all::c4a_gui()` to explore them. The old palette name "Set2" is named
"brewer.set2"
Multiple palettes called "set2" found: "brewer.set2", "hcl.set2". The first one, "brewer.set2", is returned.
Warning: Number of levels of the variable assigned to the aesthetic "fill" of
the layer "polygons" is 33, which is larger than n.max (which is 30), so levels
are combined.
[plot mode] fit legend/component: Some legend items or map compoments do not
fit well, and are therefore rescaled.
ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
I had a few goes at this, but I could not increase the number of catagoies beyond 30 - for this dataset tmap is not a suitable alternative to leaflet.
Now we have the MCP, we can calcuate the area for each individual, in this example we have asked for a 95% estimate:
mcp_area_95 <-mcp.area(GPS_sp, # spatial objectpercent =95, # percentage of locationsunin =c("m"), # input is in m (UTM)unout =c("ha"), # output in hectaresplotit =FALSE) # we want the numbers, not a plotprint (mcp_area_95)
Alan Anne 21 Anne 22 Bessie Betsy Carolina Crunchy Diana
95 1.656166 0.129607 0.9191941 0.2849418 2.988091 2.388883 4.853271 2.347311
Dorothy Erin Frannie George Hedgey Hettie Jean Jeff
95 0.416141 0.6820773 0.296092 2.733784 1.28539 4.265623 5.525065 0.4321219
Joe Loggerhead Monty Moonface 2022 Mr Whatshisname Pumpkin
95 96.56358 0.1619925 7.424848 5.128733 4.285415 0.6924634
Remy Rosie Saucepan Screamer Shelia Silkie Smiggle Stone
95 1.953729 0.6090874 0.366028 12.90894 4.966871 0.2799846 1.247345 1.419408
Timmy Twitch Vera
95 4.02342 2.21729 4.637315
We can examine how the home range estimate varies with percentages of data for an individual. This is helpful as it can inform us how much of our data is needed tor an accuarate assessment of home range size.
GPS_sp.alan <- GPS_sp[GPS_sp$IND =="Alan", ] # subset the data for Alanmcp.area(GPS_sp.alan, percent =seq(20,100, by =5), # from 20 to 100% in 5% incrementsunin =c("m"), # specify that you have entered the data in munout =c("ha"), #specify that you want the output in haplotit =TRUE) # we want to plot it
We can see for the Hedgehog Alan, that even with all the data included, the home range size is still not stabalising. It may be that we need more data to be bale to estimate Alan’s home range accuratley.
Saving the MCPs for Spatial Analysis
# save a shapefile containing all MCPs in the working folder# Directory to save shapefilesshapefile_dir <-"HogsMCPoutputs"if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)st_write(mcp_sf,file.path(shapefile_dir, paste0("MCP95.shp")),delete_layer =TRUE,quiet =TRUE)
Kernel Density Estimation (KDE): Utilisation Distribution (UD)
The concept of using kernel density estimators to define utilisation distributions (i.e. probabilistic representation of how an animal uses its habitat within a given spatial area over a specific period) was coined by Worton in 1989 (B. J. Worton 1989). This is a more complex approach that simply delimiting the borders of an area using the relocation of animals (e.g. MCP).
In KDE, a kernel distribution (i.e. a three-dimensional hill or kernel) is placed on each telemetry location. The height of the hill is determined by the bandwidth of the distribution, and many distributions and methods are available (e.g. fixed versus adaptive, univariate versus bivariate bandwidth). The choice of the bandwitch introduces a clear element of bias in this method.
Smaller bandwidth: Captures finer details but may overfit.
Larger bandwidth: Produces a smoother UD but may miss finer details.
The most common bandwidth used in ecology are the Reference bandwidth selection (href), the Least Squares Cross-Validation (LSCV) and the Plug-In Bandwidth Selection:
href: Uses a rule-of-thumb formula based on the standard deviation of the data and assumes data are normally distributed.
LCSV: Selects the bandwidth by minimizing the error between the estimated and actual density at the data points, using a cross-validation approach.
Plug-In: It works by estimating the second derivative (curvature) of the true density, which tells how much the density changes. This information is then “plugged in” to calculate a bandwidth that balances smoothness and detail in the density estimate. It’s faster than cross-validation and works well when the data has a relatively simple structure.
Cross Validation Explanation
Cross-validation is a statistical method used to evaluate and optimize models by splitting data into training and validation subsets.
The model is trained on one subset and tested on another. This process is repeated across multiple splits to assess performance.
Cross-validation helps identify the best parameters or model configuration by minimizing prediction error and avoiding overfitting.
Computationally demanding, may oversmooth complex areas
KDE (href)
Estimation of UD by individual:
# if we have not created the spatial object containing the coordinates of the locations with the hedgehog identities, we will need to run the following 3 steps:# 1) Create a Simple Features (SF) object where each row in the data represents a point#GPS_sf <- st_sf(# IND = df$id, # The id column as an attribute# geometry = st_sfc(lapply(1:nrow(df), function(i) st_point(c(df$utm_x[i], # df$utm_y[i])))) # Create points for each row. # )# 2) Set the CRS (Coordinate Reference System) to UTM Zone 30N (EPSG: 32630)# GPS_sf <- st_set_crs(GPS_sf, 32630)# 3) Convert SF object to an SP object# GPS_sp <- as(GPS_sf, "Spatial")#We can skip this as we did this earlier# now we calculate the UD for ALL hedgehogs allUD <-kernelUD(subset(GPS_sp, select=IND), # to produce a KDE for each idh="href", # href bandwidth methodgrid=150, # grid resolution for the estimation of UDsame4all=FALSE) # for each id a different href is used # a 150x150 grid will result in 22,500 cells. #Higher resolution provides more detailed density but it is more computational intensive.#Each of this cells contain a value of the estimated density for that area.image(allUD)
The above show different shapes of UDs, from a very narrow and vertical UD (e.g. Jeff), to a long horizontal UD (e.g. Monty). It also shows how some hedgehogs seem to have multiple centers of activity (e.g. Screamer).
This is a helpful visual display, but we want to compare the sizes, so we need to calculate the area of these ramges. Here, we use the function getverticeshr to convert the continuous UD into a discrete polygon representing a specific threshold, typically the home range for a given percentage of utilisation. For example, for a 95% HR, the function draws a polygon around the area that covers 95% of the total UD area.
# for Alan# Subset allUD to only include "Alan" by indexing the listallUDAlan <- allUD[["Alan"]]# Calculate the home range for Alan with a 95% contour - removing outliersallUDHrefiso95Alan <-getverticeshr(allUDAlan, percent =95)# Calculate the home range for Alan with a 90% contour - most of HRallUDHrefiso90Alan <-getverticeshr(allUDAlan, percent =90)# Calculate the home range for Alan with a 50% contour - core areaallUDHrefiso50Alan <-getverticeshr(allUDAlan, percent =50)# Plot all the contours overlappingggplot() +# Plot the 95% contourgeom_sf(data =st_as_sf(allUDHrefiso95Alan), fill ="blue", alpha =0.3) +# transparency level 0.30 opaque, 0.7 transparent# Plot the 90% contourgeom_sf(data =st_as_sf(allUDHrefiso90Alan), fill ="red", alpha =0.3) +# Plot the 50% contourgeom_sf(data =st_as_sf(allUDHrefiso50Alan), fill ="green", alpha =0.3) +theme_minimal() +labs(title ="Home Range Contours for Alan",fill ="Contour Percent")
# Subset allUD to only include "Screamer" by indexing the listallUDScreamer <- allUD[["Screamer"]]# Calculate the home range for Screamer with different contoursallUDHrefiso95Screamer <-getverticeshr(allUDScreamer, percent =95)allUDHrefiso90Screamer <-getverticeshr(allUDScreamer, percent =90)allUDHrefiso50Screamer <-getverticeshr(allUDScreamer, percent =50)# Plot all the contours overlapping for Screamerggplot() +# Plot the 95% contourgeom_sf(data =st_as_sf(allUDHrefiso95Screamer), fill ="blue", alpha =0.3) +# Plot the 90% contourgeom_sf(data =st_as_sf(allUDHrefiso90Screamer), fill ="red", alpha =0.3) +# Plot the 50% contourgeom_sf(data =st_as_sf(allUDHrefiso50Screamer), fill ="green", alpha =0.3) +theme_minimal() +labs(title ="Home Range Contours for Screamer",fill ="Contour Percent")
These show a difference in the space used by Screamer and Alan - this is good!
If we want the areas for all hogs. I have disabled the visual from running as it would not render!
allUDHref95 <-getverticeshr(allUD, percent =95)# Convert to Simple Features (SF) object for plottingallUDHref95_sf <-st_as_sf(allUDHref95)
# Plot with different fill colors based on hedgehog IDggplot() +geom_sf(data = allUDHref95_sf, aes(fill = id), alpha =0.5) +scale_fill_viridis_d() +# Using a color scale for discrete valuestheme_minimal() +labs(title ="95% Home Range Contour by Hedgehog ID", fill ="Hedgehog ID") +theme(legend.position ="bottom", # Position the legend at the bottomlegend.key.size =unit(0.4, "cm"), # Adjust the size of the legend keyslegend.text =element_text(size =6), # Adjust the size of the textlegend.title =element_text(size =8) # Adjust the size of the title)
And then to calcuate the areas:
as.data.frame(allUDHref95)
id area
Alan Alan 10.8816742
Anne 21 Anne 21 0.2387152
Anne 22 Anne 22 1.1429249
Bessie Bessie 0.4678121
Betsy Betsy 4.4207416
Carolina Carolina 2.8500652
Crunchy Crunchy 9.7106681
Diana Diana 2.6482701
Dorothy Dorothy 0.7216981
Erin Erin 0.9812055
Frannie Frannie 0.4671336
George George 4.5300362
Hedgey Hedgey 2.9363113
Hettie Hettie 5.6547323
Jean Jean 9.6023052
Jeff Jeff 1.0346386
Joe Joe 154.5607442
Loggerhead Loggerhead 0.5481730
Monty Monty 16.3311304
Moonface 2022 Moonface 2022 21.1008450
Mr Whatshisname Mr Whatshisname 6.0410639
Pumpkin Pumpkin 1.0647545
Remy Remy 2.7423964
Rosie Rosie 0.9276362
Saucepan Saucepan 0.6496031
Screamer Screamer 35.5762332
Shelia Shelia 6.6063573
Silkie Silkie 0.4829821
Smiggle Smiggle 2.0679313
Stone Stone 1.6419157
Timmy Timmy 7.6381287
Twitch Twitch 2.9055795
Vera Vera 6.2948385
To save these for QGIS etc use:
# Directory to save shapefilesshapefile_dir <-"hogsKDEoutputs"if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)st_write(allUDHref95_sf,file.path(shapefile_dir, paste0("KDE_href_95.shp")),delete_layer =TRUE,quiet =TRUE)
Least Square Validation (LSCV)
Create all UDs using the LSCV method:
# now we calculate the UD for ALL hedgehogs allUD_LSCV <-kernelUD(subset(GPS_sp, select=IND), # to produce a KDE for each idh="LSCV", # href bandwidth methodgrid=150, # grid resolution for the estimation of UDsame4all=FALSE) # for each id a different href is used
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
Warning in .kernelUDs(SpatialPoints(x, proj4string = CRS(as.character(pfs1))), : The algorithm did not converge
within the specified range of hlim: try to increase it
image(allUD_LSCV)
Errors
The following warning appeared after running the above chunk:
Warning: The algorithm did not converge within the specified range of hlim: try to increase it This might be due to different reasons:
Sparse or Unevenly Distributed Data
If the data for some individuals (IND) is sparse, unevenly distributed, or clustered, the LSCV algorithm may fail to find an optimal bandwidth.
Range of hlim Too Narrow
The default range for hlim (the range of bandwidth values) might not be suitable for your dataset. If the optimal bandwidth lies outside this range, convergence will not be achieved.
Small Number of Points: THIS IS THE LIKELY REASON IN THIS CASE
LSCV requires a sufficient number of data points to calculate a reliable bandwidth. If some individuals have very few observations, the algorithm may struggle.
Highly Clustered Locations:
If locations are highly clustered, the kernel density estimate might become unstable, causing issues in bandwidth estimation.
Similar to the HREF method used before, we can plot the results by percetange for each individual hog:
# for Alan# Subset allUD to only include "Alan" by indexing the listallUDAlan <- allUD_LSCV[["Alan"]]# Calculate the home range for Alan with a 95% contour - removing outliersallUDHrefiso95Alan <-getverticeshr(allUDAlan, percent =95)# Calculate the home range for Alan with a 90% contour - most of HRallUDHrefiso90Alan <-getverticeshr(allUDAlan, percent =90)# Calculate the home range for Alan with a 50% contour - core areaallUDHrefiso50Alan <-getverticeshr(allUDAlan, percent =50)# Plot all the contours overlappingggplot() +# Plot the 95% contourgeom_sf(data =st_as_sf(allUDHrefiso95Alan), fill ="blue", alpha =0.3) +# transparency level 0.30 opaque, 0.7 transparent# Plot the 90% contourgeom_sf(data =st_as_sf(allUDHrefiso90Alan), fill ="red", alpha =0.3) +# Plot the 50% contourgeom_sf(data =st_as_sf(allUDHrefiso50Alan), fill ="green", alpha =0.3) +theme_minimal() +labs(title ="Home Range Contours for Alan",fill ="Contour Percent")
And for Screamer:
# for Screamer# Subset allUD to only include "Screamer" by indexing the listallUDScreamer <- allUD_LSCV[["Screamer"]]# Calculate the home range for Screamer with different contoursallUDHrefiso95Screamer <-getverticeshr(allUDScreamer, percent =95)allUDHrefiso90Screamer <-getverticeshr(allUDScreamer, percent =90)allUDHrefiso50Screamer <-getverticeshr(allUDScreamer, percent =50)# Plot all the contours overlapping for Screamerggplot() +# Plot the 95% contourgeom_sf(data =st_as_sf(allUDHrefiso95Screamer), fill ="blue", alpha =0.3) +# Plot the 90% contourgeom_sf(data =st_as_sf(allUDHrefiso90Screamer), fill ="red", alpha =0.3) +# Plot the 50% contourgeom_sf(data =st_as_sf(allUDHrefiso50Screamer), fill ="green", alpha =0.3) +theme_minimal() +labs(title ="Home Range Contours for Screamer",fill ="Contour Percent")
This analysis is suffering from undersmoothing, which is a risk associated to the LSCV smoothing method.
This is the end of this tutorial so I am clearing all from the current session:
rm(list=ls())
Home Ranges and Utilisation Distributions: dynamic Brownian Bridge Movement Model and Autocorrelated Kernel Density Estimates
With the widespread use of GPS technology to track animals in near real-time, estimators of home range and movement have been developed to deal with more intense and highly autocorrelated data. Traditional point-based estimators (e.g., MCP and KDE, explored in above) only incorporate the density of locations into home range estimation.
Newer methodologies to estimate the spatial use of animals take advantage of the temporal structure and movement paths inherent in high-density GPS data. These methods, such as autocorrelated kernel density estimation (AKDE) and dynamic Brownian bridge movement models (dBBMM), explicitly account for the autocorrelation between consecutive locations. By incorporating information about the time and sequence of animal movements, these approaches provide more accurate and ecologically meaningful representations of spatial use, capturing the underlying movement processes more effectively than traditional point-based estimators.
Data Call and prepatation
# import datasetdat <-read.csv("WD_hedgehogs2022.csv",header =TRUE)# select only the relevant variablesdf <- dat %>% dplyr::select ( #select following variableseventid = event.id,id = individual.local.identifier,sex = study.specific.measurement,behaviour = behavioural.classification,timestamp = timestamp,utm_x = utm.easting,utm_y = utm.northing,long = location.long,lat = location.lat )# check structurestr(df)
'data.frame': 5899 obs. of 9 variables:
$ eventid : num 2.38e+10 2.38e+10 2.38e+10 2.38e+10 2.38e+10 ...
$ id : chr "Anne 21" "Anne 21" "Anne 21" "Anne 21" ...
$ sex : chr "F" "F" "F" "F" ...
$ behaviour: chr "R" "R" "R" "R" ...
$ timestamp: chr "2021-05-01 19:50:09.000" "2021-05-01 20:40:10.000" "2021-05-01 21:00:11.000" "2021-05-01 21:10:06.000" ...
$ utm_x : num 549255 549257 549257 549264 549272 ...
$ utm_y : num 5872578 5872581 5872569 5872579 5872569 ...
$ long : num -2.27 -2.27 -2.27 -2.27 -2.27 ...
$ lat : num 53 53 53 53 53 ...
# Remove rows with any missing dataclean_data <- df %>%drop_na()# Ensure `id` is a character vectordf$id <-as.character(df$id)# make sure the date/time is in POSIXct format df$timestamp <-as.POSIXct(df$timestamp, format="%Y-%m-%d %H:%M:%S", tz="UTC")# Ensure data in date orderdf <- df[order(df$timestamp), ]# Check for duplicates in the 'id' column and 'timestamp'duplicates <- df[duplicated(df[, c("id", "timestamp")]), ]duplicates
[1] eventid id sex behaviour timestamp utm_x utm_y
[8] long lat
<0 rows> (or 0-length row.names)
We are using the same dataset as above and have already performed the basic data exploration, so we will nto be repeating this here.
Spatial Use - Utalisation Distribution
Unlike the previous Home Range Estimations, here we are calculation utilisation distributions, these represent a more realistc and dynamic approach to undersatnding spatial use of animals. It does this as with GPR and similar Langragian movement data, the above work used Eulerian methods instead.
Dynamic Brownian Bridge Movement Models (dBBMM)
Methods such as the Brownian Bridge Movement Models (BBMM), quantifiy the utilisation distribution of an animal based on its movement path rather than individual points and accounts for temporal autocorrelation and high data volumes. However, BBBM assumes homogeneous movement behaviour across all data, which is not a realistic view of the complex ecology of some species. This is espeically true for species that show seasonal changes in their movement patterns and habitat use.
The dynamic dynamic Brownian Bridge Movement Models (dBBMM) allow for changes in behaviour, using likelihood statistics to determine change points along the animal’s movement path; it is also able to work with tracks that are not sampled regularly, due for example to failed fixes or dynamic sampling schedules) (Kranstauber et al. 2012).
The dBBMM is a method to calculate the occurrence distribution of a given track, i.e. it estimates where the animal was located during the observed (tracking) period. It calculates the probability landscape for the transition between any two known consecutive locations given the amount of time it had available (assuming a conditional random (Brownian) motion between locations). It is “dynamic” because it allows the variance to change along the trajectory, using the behavioral change point analysis in a sliding window.
# Define parameters for the dBBMM analysis.set_grid.ext <-10# Grid extension for spatial analysisset_dimsize <-600# Dimension size for the gridws <-15# Window size for smoothingmrg <-5# Margin size for dynamic calculationssettings <-paste0("mrg", mrg, "_ws", ws) # Create a string to represent settingsgps.acc <-10# Average GPS accuracy in meters# Create directories to store outputs if they don't existdir.create("dBBMMfigures", showWarnings =FALSE)dir.create("dBBMMshapefiles", showWarnings =FALSE)# Initialize lists to store results across individualsiter <-0data.var.list <-vector(mode ="list", length =length(unique(df$id)))all.area.list <-vector(mode ="list", length =length(unique(df$id)))dbbmm.list <-vector(mode ="list", length =length(unique(df$id)))
Code Explanation
dBBM Parameters explanation:
Grid Extension (set_grid.ext):
This determines how far beyond the furthest point your grid will extend. If your area is small, say under 5 km², reducing this value will ensure you’re not unnecessarily adding a large buffer around your points.
Start by examining the range of your animal’s movement (e.g., the spatial extent of GPS points in UTM coordinates).
Choose a value that ensures the area of interest (e.g., home range or activity space) is fully covered.
Test values empirically, e.g., 5–10 units larger than the maximum observed range, to ensure no truncation of important movement data.
Grid Size (set_dimsize):
This sets the resolution of your grid. A smaller study area will generally require a finer grid resolution, so setting this too large might result in unnecessary computation or inefficient processing.
The resolution should balance computational efficiency and detail. Higher values create finer grids but increase computation time.
Use a value that corresponds to the precision of your GPS data (e.g., if locations are accurate to 10 meters, ensure grid cells are smaller than this).
Typical starting points:
For high-accuracy data (e.g., <10 m), try 500–1000.
For lower-accuracy data, try 100–500.
Our study area is roughly 2,000x2,000km. However, the actual area that seems used by individual hedgehogs is not bigger than 750 by 750 meters. By using a set_grid.ext = 10 we extended the analysis area to by 10 meters on each side
Based on this, set_dimize = 600 means that we have divided the analysis area into 600x600 cells, with cells that will be typically smaller than 1.5m.
ws <- 15 (Window Size for Smoothing):
The ws parameter defines the size of the moving window used in the dynamic Brownian Bridge Movement Model (dBBMM) analysis. This window determines the range of locations considered when calculating the dynamic behavior of the animal’s movement. A larger window size results in smoother estimates of movement, while a smaller window captures finer details.
Consider the frequency and regularity of your GPS data:
For frequent data (e.g., locations every 1–5 minutes), a larger ws (e.g., 15–25) may be needed to smooth short-term variations.
For sparse data (e.g., locations every hour or day), use a smaller ws (e.g., 5–10) to capture finer-scale dynamics.
Adjust based on biological knowledge:
If the animal is known to move over larger distances consistently, a larger window might make sense.
Use smaller windows if you want to capture fine-scale movement details.
mrg <- 5 (Margin Size for Dynamic Calculations):
The mrg parameter specifies the margin size for the smoothing window. It controls the buffer around the moving window to handle edge effects during the calculations. The margin ensures that movement dynamics are accurately captured without losing information near the edges of the data.
Should typically be an odd number and smaller than ws (e.g., 1/3 or 1/4 of ws).
Larger margins can smooth over potential noise at the edges, but excessively large margins may dampen dynamic variability.
Test values empirically (e.g., 3–7) and evaluate how they influence the results.
The below code works correctly, but does not render so I have disabled it
# Initialize a list to store the final area datafinal_area_df <-data.frame()# Loop Through Each Individual# Analyze movement data for each individual in the datasetfor (indi inunique(df$id)) {# Subset data for the current individual and ensure unique timestamps data <- df %>%filter(id == indi) %>%distinct(timestamp, # duplicate rows where timestamp is the same.keep_all =TRUE) # retains all other columns iter<-0# This imitialises the loop iterations iter <- iter +1# keeps track of the iteration number, which is associated to each hedgehog id# Convert the individual's data to a move object for spatial analysis move <-move(x = data$utm_x, y = data$utm_y, proj ="+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs", time =as.POSIXct(data$timestamp, tz ="UTC") )# Calculate dBBMM and Extract Motion Variance dbbmm <-brownian.bridge.dyn(object = move, # move objectlocation.error = gps.acc, # location errormargin = mrg, # marginwindow.size = ws, # window sizeext = set_grid.ext, # grid extensiondimSize = set_dimsize, # resolutionverbose =FALSE) # if TRUE the function prints the progress of calculation...we don't need that data$var <-getMotionVariance(dbbmm) # Add motion variance to the dataset# Plot and Save Motion Variance Over Timeggplot(data) +geom_area(aes(x = timestamp, y = var), alpha =0.5, fill ="black") +geom_line(aes(x = timestamp, y = var)) +theme_bw() +scale_x_datetime(labels = scales::date_format("%d-%b-%Y"), # Format as day-month-yearbreaks = scales::date_breaks("1 day"))+# Breaks every daylabs(x ="Date", y =expression(paste(sigma^2, " m")), # variation in movement (m)title =paste0(indi, " Motion Variance") ) +theme(axis.text.x =element_text(angle =45, hjust =1, size =7),axis.title.y =element_text(angle =90, vjust =0.5, face =2),axis.title.x =element_text(hjust =0.5, face =2),plot.title =element_text(face =4) )# Ensure the directory existsoutput_dir <-"hogdBBMMfigures"if (!dir.exists(output_dir)) {dir.create(output_dir)}# Save the plotggsave(file =paste0(output_dir, "/MotionVariance_", indi, ".png"),width =180, height =100, dpi =600, units ="mm")# Calculate and Store dBBMM Contours dbbmm.sp <-as(dbbmm, "SpatialPixelsDataFrame") # transforms the output from the dbbmm analyis in a spatial dataframe dbbmm.sp.ud <-new("estUD", dbbmm.sp) # creates the UD dbbmm.sp.ud@vol <-FALSE# volumne of the UD not calculated as it is not needed right now dbbmm.sp.ud@h$meth <-"dBBMM"# UD is estimated using dBBMM dbbmm.ud <-getvolumeUD(dbbmm.sp.ud, standardize =TRUE) # now is the time to compute the volume under the UD. # Initialize lists for storing contour and area data for each individual area.list <-vector(mode ="list", length =3) j <-0# Loop through confidence intervals (75%, 90%, 95%)for (con inc(75, 90, 95)) { j <- j +1 k <-which (c(75, 90, 95) == con)# Generate contours and convert to sf object poly <-getverticeshr(dbbmm.ud, percent = con) # delimits the contour poly_sf <-st_as_sf(poly)# Save contour as shapefileif (!dir.exists("hogdBBMMshapefiles")) {dir.create("hogdBBMMshapefiles")}invisible(suppressMessages({suppressWarnings({st_write( poly_sf,dsn =paste0("hogdBBMMshapefiles/", indi, "_dBBMM_", con, "_", settings, ".shp"),layer =paste0(indi, "_dBBMM_", con, "_", settings),delete_layer =TRUE ) })}))# Calculate contour area and store area <-as.numeric(sum(st_area(poly_sf)) /10000) # Area in hectares area.list[[j]] <-data.frame("id"= indi, "Contour"= con, # 75,90 or 95%"Area"= area) # area in hectares }# Combine all contour and area data area.df <- dplyr::bind_rows(area.list)# Store individual results all.area.list[[iter]] <- area.df# Save results for the individual dbbmm.list[[iter]] <- dbbmm data.var.list[[iter]] <- data# Combine individual area results into a data frame area.df <- dplyr::bind_rows(area.list)# Reshape the data frame so each contour is a separate column area_wide <-reshape(area.df, idvar ="id", timevar ="Contour", direction ="wide",v.names ="Area")# Clean column names to make them more readablecolnames(area_wide) <-gsub("Area.", "", colnames(area_wide))colnames(area_wide) <-paste0("Contour_", colnames(area_wide))# Merge the reshaped table into the final table final_area_df <-rbind(final_area_df, area_wide)}
We can then print and save this table:
# Display the final table, use:print(final_area_df)# Optionally, save the table to a CSV filewrite.csv(final_area_df, "dBBMM_contour_areas.csv", row.names =FALSE)
Motion Variance
These motion variance graphs have been generated for all the hogs as art of the above code, but how do we interpret them!
X-axis (Date):
Represents the timestamps (data$timestamp) of the observations for the specific individual hedgehog (indi).
Y-axis (Motion Variance σ2^2σ2):
Displays the motion variance values (data$var) estimated at each timestamp.
The black-filled area:
Shows the area under the curve, which emphasizes the magnitude of motion variance over time.
The line:
Represents the trend or fluctuation in motion variance (σ2^2σ2) across time.
Peaks in the line suggest moments of higher movement variance, while troughs indicate periods of lower variance.
Interpreting the Motion Variance:
Motion Variance (σ2^2σ2):
A higher variance suggests increased mobility or less predictable movement patterns during that time frame. A lower variance indicates more restricted or consistent movement patterns.
Trends:
If the variance fluctuates significantly over time, it may correspond to changes in the hedgehog’s activity (e.g., foraging vs. resting behavior) or external factors (e.g., weather, habitat structure).
Periods of consistent variance might indicate steady movement within a limited area.
The Motion Variance for Whatshisname seems to indicate that it was active during the 25th to 27th of April, but its level of activity dropped after possibly returning to its nest on the 27th.
The shapefile of the 75, 90 and 95% UD contours for Whatshisname, obtained by dBBMM seem to indicate different centers of activity and how the main activity of this hedgehog is limited to the Memorial Garden and the Walled garden.
This is the end of this tutorial so I am clearing all from the current session:
rm(list=ls())
Autocorrelated Kernel Density Estimation (AKDE)
Autocorrelated Kernel Density Estimation (AKDE) methods were designed to be statistically efficient while explicitly dealing with the complexities and biases of modern movement data, such as autocorrelation, small sample sizes, and missing or irregularly sampled data.
We will continue using the hedgehog dataset previously presented, which was downloaded from Movebank.
We will be using the ctmm package (Calabrese, Fleming, and Gurarie 2016) Reference manual.
Data call and prep:
# Load dataset dat <-read.csv("WD_hedgehogs2022.csv", header =TRUE)# Select relevant variables and clean the datadf <- dat %>% dplyr::select(eventid = event.id,id = individual.local.identifier,sex = study.specific.measurement,behaviour = behavioural.classification,timestamp = timestamp,utm_x = utm.easting,utm_y = utm.northing,long = location.long,lat = location.lat ) %>%drop_na() %>%# Remove rows with missing datamutate(id =as.character(id), # Ensure `id` is a character vectortimestamp =as.POSIXct(timestamp, format ="%Y-%m-%d %H:%M:%S", tz ="UTC") ) %>%arrange(timestamp) # Ensure timestamps are in order# Check for duplicatesduplicates <- df[duplicated(df[, c("id", "timestamp")]), ]if (nrow(duplicates) >0) {warning("Duplicates found in the dataset. Consider addressing them before proceeding.")}
Example for a single individual - Mr Whatshisname
Filtering the dataset to select a single individual
# select only Mr Whatshisnamewhat_df <- df %>%filter(id =="Mr Whatshisname") # Replace with the desired hedgehog ID
Dat formatiing for this analysis:
# convert the dataset to a telemetry objecttelemetry <-as.telemetry(what_df,timeformat="auto",timezone="UTC",projection="+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs",datum="WGS84")
Minimum sampling interval of 9.9 minutes in Mr Whatshisname
plot(telemetry)
DOP values missing. Assuming DOP=1.
In general, the as.telemetry() function will identify the columns and check they are correctly named, convert the projection if needed, and then output the minimum sampling interval for each individual in the dataset. In this example, Mr Whatshisname` has a minimum sampling interval of 9.9 minutes (R console output). The plot shows the locations for this hedgehog as circles.
DOP
The message DOP values missing. Assuming DOP=1. appears because the telemetry object does not include Dilution of Precision (DOP) values.
DOP is typically used in GPS data to indicate the quality of location fixes, with lower values representing higher precision. If these values are not available in your dataset, the ctmm package assumes a default DOP value of 1 for all fixes.
This assumption is generally fine for exploratory analysis or datasets where precision is expected to be uniform. Otherwise, you will have to parameterise as.telemetry with an argument dop = xx.
Selecting the movement model
It is necessary to choose a home range estimator that accounts for the autocorrelated structure of the data. We need to test what movement model may explain the autocorrelated structure of our tracking data.
We can run different movement processes with maximum likelihood (ML) or other parameter estimators, such as perturbative Hybrid REML (pHREML). The default is pHREML. In this case ML has been used because later on the model would not work for some hedgehogs when using pHREML.
# calculate fit guess object GUESS <-ctmm.guess(telemetry, interactive=FALSE) # produces a guess of the initial value of the maximum likelihood parameter#Fit a movement model model <-ctmm.fit(telemetry, GUESS,method ='ML')summary(model)
$name
[1] "OUF anisotropic"
$DOF
mean area diffusion speed
9.6149250 13.3290393 72.7624533 0.1378726
$CI
low est high
area (hectares) 4.445827e+00 8.274388 1.327238e+01
τ[position] (hours) 3.323986e+00 6.744769 1.368595e+01
τ[velocity] (seconds) 2.992233e-11 24.886290 2.069783e+13
speed (kilometers/day) 6.188969e-05 18.772321 5.555578e+01
diffusion (hectares/day) 2.458836e+00 3.138232 3.899248e+00
What this means: Within these summaries,
$name provides the selected best-fit model,
$DOF provides information on the degrees of freedom (where $DOF[“area”] corresponds to the effective sample size of the home-range area estimate)
Effective sample size (N): number of range crossings that occurred during the observation period. Can be roughly estimated by dividing the duration of the tracking dataset by the – Home range crossing time: the time required for an animal to cross the linear extent of its home range. (Fleming et al. 2019)
CI are the parameter outputs (area, position autocorrelation timescale, velocity autocorrelation timescale, and speed).
Estimating the Home Range
Now we can fit this movement process into the akde() function, and estimate the home range of Mr Whatshisname. This function currently defaults to the area-corrected AKDE, or AKDEc (Silva et al. 2021)
# Calculate UD (AKDE object) - area-corrected AKDEUD <-akde(telemetry, model, level.UD =0.95)# area of the 95% confidence intervalsummary (UD)
$DOF
area bandwidth
13.32904 14.49390
$CI
low est high
area (hectares) 4.068557 7.572227 12.14609
attr(,"class")
[1] "area"
This output shows a home range for Mr Whatshisname of 7.57 ha (with 95% confidence intervals of 4.07 - 12.15 ha).
Plotting and saving AKDE of Home Range
# Plot data with AKDE EXT <-extent(UD, level =0.95)plot(telemetry,UD=UD, ext = EXT)
DOP values missing. Assuming DOP=1.
# saving the results as shapefiles # Directory to save shapefilesshapefile_dir <-"AKDEoutputs"if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)# save the UD as shapefilewriteVector(UD, file.path(shapefile_dir, paste0("MrWhatshisname.shp")), filetype="ESRI Shapefile",overwrite=TRUE)#save the locations as shapefilelocations<-SpatialPointsDataFrame.telemetry( telemetry)locations_sf <-st_as_sf(locations)st_write(locations_sf,file.path(shapefile_dir, paste0("locations.shp")),delete_layer =TRUE,quiet =TRUE)
Example for a population
In the below code, we run the same process as above, but in a single chunk and loop through for each individual in our population.
# Load dataset (example)dat <-read.csv("WD_hedgehogs2022.csv", header =TRUE)# Select relevant variables and clean the datadf <- dat %>% dplyr::select(eventid = event.id,id = individual.local.identifier,sex = study.specific.measurement,behaviour = behavioural.classification,timestamp = timestamp,utm_x = utm.easting,utm_y = utm.northing,long = location.long,lat = location.lat ) %>%drop_na() %>%# Remove rows with missing datamutate(id =as.character(id), # Ensure `id` is a character vectortimestamp =as.POSIXct(timestamp, format ="%Y-%m-%d %H:%M:%S", tz ="UTC") ) %>%arrange(timestamp) # Ensure timestamps are in order# Check for duplicatesduplicates <- df[duplicated(df[, c("id", "timestamp")]), ]if (nrow(duplicates) >0) {warning("Duplicates found in the dataset. Consider addressing them before proceeding.")}# Directory to save shapefilesshapefile_dir <-"AKDEoutputs"if (!dir.exists(shapefile_dir)) dir.create(shapefile_dir)# Initialize an empty list to store summariessummaries <-list()# Iterate through all unique hedgehog IDsunique_ids <-unique(df$id)for (hedgehog_id in unique_ids) {# Filter for the current hedgehog ID hedgehog_df <- df %>%filter(id == hedgehog_id)# Convert to telemetry object telemetry <-as.telemetry( hedgehog_df,timeformat ="auto",timezone ="UTC",projection ="+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs",datum ="WGS84" )# Fit GUESS object and model GUESS <-ctmm.guess(telemetry, interactive =FALSE)# Fit a movement model model <-ctmm.fit(telemetry, GUESS,method ='ML')# Calculate UD (AKDE object) UD <-akde(telemetry, model)# Extract summary for the individual ud_summary <-summary(UD) ud_summary$id <- hedgehog_id # Add the ID for tracking summaries[[hedgehog_id]] <- ud_summary#Save UD as a shapefilewriteVector( UD,file.path(shapefile_dir, paste0(hedgehog_id, "_UD.shp")),filetype ="ESRI Shapefile",overwrite =TRUE)# Save locations as a shapefile locations <-SpatialPointsDataFrame.telemetry(telemetry) locations_sf <-st_as_sf(locations)st_write( locations_sf,file.path(shapefile_dir, paste0(hedgehog_id, "_locations.shp")),delete_layer =TRUE,quiet =TRUE)}# Combine all summaries into a single data framesummary_df <-do.call(rbind, summaries)# Save combined summary as a CSV filewrite.csv(summary_df, "hedgehog_AKDE_summaries.csv", row.names =FALSE)