library("readr")
library("sf")
library("dplyr")
library(ggplot2)
library(tidyverse)
library("SimilarityMeasures")
library("patchwork")
wildschwein <- read_delim("Data/wildschwein_BE_2056.csv", ",")
# Careful! What Timezone is assumed?
sabi <- wildschwein |>
st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
filter(
TierName == "Sabi",
DatetimeUTC >= "2015-07-01",
DatetimeUTC < "2015-07-03"
)
ggplot(sabi) +
geom_sf() +
geom_path(aes(E, N)) +
theme_minimal()WEEK 4 EXERCISES
1 Input: Segmentation
1.1 Step a): Specify a temporal wondow
#define the function
distance_by_element <- function(later, now){
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}
# Ensure your data is sorted by time
sabi <- sabi |>
arrange(DatetimeUTC) |>
mutate(
# Distance from pos[n-2] to pos[n]
dist_n_minus_2 = distance_by_element(lag(geometry, 2), geometry),
# Distance from pos[n-1] to pos[n]
dist_n_minus_1 = distance_by_element(lag(geometry, 1), geometry),
# Distance from pos[n] to pos[n+1]
dist_n_plus_1 = distance_by_element(geometry, lead(geometry, 1)),
# Distance from pos[n] to pos[n+2]
dist_n_plus_2 = distance_by_element(geometry, lead(geometry, 2))
)
sabi <- sabi |>
rowwise() |>
mutate(
stepMean = mean(c(dist_n_minus_2, dist_n_minus_1, dist_n_plus_1, dist_n_plus_2)) #with na.rm = TRUE the beginning and End would stay
)|>
ungroup()
sabiSimple feature collection with 192 features and 11 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: CH1903+ / LV95
# A tibble: 192 × 12
TierID TierName CollarID DatetimeUTC E N
<chr> <chr> <dbl> <dttm> <dbl> <dbl>
1 002A Sabi 12275 2015-06-30 22:00:13 2569972. 1205366.
2 002A Sabi 12275 2015-06-30 22:16:06 2569975. 1205637.
3 002A Sabi 12275 2015-06-30 22:30:19 2570266. 1205857.
4 002A Sabi 12275 2015-06-30 22:45:13 2570208. 1205913.
5 002A Sabi 12275 2015-06-30 23:00:10 2570247. 1205731.
6 002A Sabi 12275 2015-06-30 23:15:17 2570512. 1205279.
7 002A Sabi 12275 2015-06-30 23:30:38 2570684. 1205103.
8 002A Sabi 12275 2015-06-30 23:45:16 2570526. 1205051.
9 002A Sabi 12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A Sabi 12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 6 more variables: geometry <POINT [m]>, dist_n_minus_2 <dbl>,
# dist_n_minus_1 <dbl>, dist_n_plus_1 <dbl>, dist_n_plus_2 <dbl>,
# stepMean <dbl>
1.2 Step c): Remove “static points”
We can now determine if an animal is moving or not by specifying a threshold distance on stepMean. In our example, we use the mean value as a threshold: Positions with distances below this value are considered static.
sabi <- sabi |>
mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
sabi_filter <- sabi |>
filter(!static)
sabi_filter |>
ggplot(aes(E, N)) +
geom_point(data = sabi, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")2 Exercise A: Segmentation
With the skills from Input: Segmentation you can now implement the segmentation algorithm described in Laube and Purves (2011) to either your own movement data or to a different wild boar using different sampling intervals.
2.1 Task 1: Calculate distances
Now, you can Step a): Specify a temporal window v and Step b): Measure the distance to every point within v, which you had used with sabi, on your own movement data or to a different wild boar using different sampling intervals.
unique(wildschwein$TierName)[1] "Sabi" "Rosa" "Ruth"
now <- wildschwein$DatetimeUTC
later <- lead(now)
difftime_secs <- function(later, now){
as.numeric(difftime(later, now))
}
wildschwein <- wildschwein |>
group_by(TierID) |>
mutate(
timelag = difftime_secs(lead(DatetimeUTC), DatetimeUTC)
)
wildschwein |>
filter(TierName == "Rosa") |>
summarise(start_time = min(DatetimeUTC, na.rm = TRUE))# A tibble: 1 × 2
TierID start_time
<chr> <dttm>
1 016A 2014-11-07 07:45:44
rosa <- wildschwein |>
st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
filter(
TierName == "Rosa",
DatetimeUTC >= "2014-11-07",
DatetimeUTC < "2014-11-10"
)
ggplot(rosa) +
geom_sf() +
geom_path(aes(E, N)) +
theme_minimal()rosa <- rosa |>
arrange(DatetimeUTC) |>
mutate(
# Distance from pos[n-2] to pos[n]
dist_n_minus_2 = distance_by_element(lag(geometry, 2), geometry),
# Distance from pos[n-1] to pos[n]
dist_n_minus_1 = distance_by_element(lag(geometry, 1), geometry),
# Distance from pos[n] to pos[n+1]
dist_n_plus_1 = distance_by_element(geometry, lead(geometry, 1)),
# Distance from pos[n] to pos[n+2]
dist_n_plus_2 = distance_by_element(geometry, lead(geometry, 2))
)
rosa <- rosa |>
rowwise() |>
mutate(
stepMean = mean(c(dist_n_minus_2, dist_n_minus_1, dist_n_plus_1, dist_n_plus_2)) #with na.rm = TRUE the beginning and End would stay
)|>
ungroup()
rosaSimple feature collection with 253 features and 12 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 2569889 ymin: 1204537 xmax: 2570395 ymax: 1205037
Projected CRS: CH1903+ / LV95
# A tibble: 253 × 13
TierID TierName CollarID DatetimeUTC E N timelag
<chr> <chr> <dbl> <dttm> <dbl> <dbl> <dbl>
1 016A Rosa 13972 2014-11-07 07:45:44 2570385. 1204812. 866
2 016A Rosa 13972 2014-11-07 08:00:10 2570389. 1204821. 908
3 016A Rosa 13972 2014-11-07 08:15:18 2570375. 1204787. 905
4 016A Rosa 13972 2014-11-07 08:30:23 2570387. 1204828. 945
5 016A Rosa 13972 2014-11-07 08:46:08 2570387. 1204853. 843
6 016A Rosa 13972 2014-11-07 09:00:11 2570387. 1204857. 904
7 016A Rosa 13972 2014-11-07 09:15:15 2570392. 1204821. 896
8 016A Rosa 13972 2014-11-07 09:30:11 2570395. 1204818. 905
9 016A Rosa 13972 2014-11-07 09:45:16 2570391. 1204838. 928
10 016A Rosa 13972 2014-11-07 10:00:44 2569917. 1205031. 869
# ℹ 243 more rows
# ℹ 6 more variables: geometry <POINT [m]>, dist_n_minus_2 <dbl>,
# dist_n_minus_1 <dbl>, dist_n_plus_1 <dbl>, dist_n_plus_2 <dbl>,
# stepMean <dbl>
2.2 Task 2: Specify and apply threshold d
After calculating the Euclidean distances to positions within the temporal window v in task 1, you can explore these values (we stored them in the column stepMean) using summary statistics (histograms, boxplot, summary()): This way we can define a reasonable threshold value to differentiate between stops and moves. There is no “correct” way of doing this, specifying a threshold always depends on data as well as the question that needs to be answered. In this exercise, use the mean of all stepMean values.
Store the new information (boolean to differentiate between stops (TRUE) and moves (FALSE)) in a new column named static.
# Compute the threshold from the stepMean values
threshold <- mean(rosa$stepMean, na.rm = TRUE)
# Create the 'static' column: TRUE when stepMean is below the threshold (stop), FALSE otherwise (move)
rosa <- rosa |>
mutate(static = stepMean < threshold)
# Summary statistics
summary(rosa$stepMean) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.7326 2.3506 3.5855 11.5894 6.6988 265.2851 4
# Histogram of stepMean values
hist(rosa$stepMean,
breaks = 30,
main = "Histogram of stepMean Values",
xlab = "stepMean",
col = "lightblue",
border = "black")# Boxplot of stepMean values
boxplot(rosa$stepMean,
main = "Boxplot of stepMean Values",
ylab = "stepMean",
col = "lightgreen")threshold = mean = 11.5
2.3 Task 3: Visualize segmented trajectories
Now visualize the segmented trajectory spatially. Just like last week, you can use ggplot with geom_path(), geom_point() and coord_equal(). Assign colour = static within aes() to distinguish between segments with “movement” and without.
# Extract coordinates into columns
rosa_coords <- cbind(rosa, st_coordinates(rosa))
ggplot(rosa_coords) +
geom_path(aes(x = X, y = Y, color = static), size = 1) +
geom_point(aes(x = X, y = Y, color = static), size = 2) +
coord_equal() +
labs(title = "Segmented Trajectory of Rosa",
x = "Easting", y = "Northing",
color = "Static") +
theme_minimal()Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# or
rosa_filter <- rosa |>
filter(!static)
rosa_filter |>
ggplot(aes(E, N)) +
geom_point(data = rosa, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")2.4 Task 4: Segment-based analysis
In applying Laube and Purves (2011), we’ve come as far as step b) in Figure 16.1. In order to complete the last steps (c and d), we need a unique ID for each segment that we can use as a grouping variable. The following function does just that (it assigns unique IDs based on the column static which you created in Task 2). You will learn about functions next week. For now, just copy the following code chunk into your script and run it.
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}
rosa <- rosa |>
mutate(segment_id = rle_id(static))
# Calculate duration for each segment in minutes
segment_info <- rosa |>
group_by(segment_id) |>
summarise(
start_time = min(DatetimeUTC),
end_time = max(DatetimeUTC),
duration = as.numeric(difftime(end_time, start_time, units = "mins"))
)
# Remove segments with duration < 5 minutes
valid_segments <- segment_info |>
filter(duration >= 5)
# Keep only observations in valid segments
rosa_valid <- rosa |>
filter(segment_id %in% valid_segments$segment_id)
# Extract coordinates for plotting
rosa_coords <- cbind(rosa_valid, st_coordinates(rosa_valid))
# Create the plot with path and point layers
ggplot(rosa_coords) +
geom_path(aes(x = X, y = Y, color = segment_id), size = 1) +
geom_point(aes(x = X, y = Y, color = segment_id), size = 2) +
coord_equal() +
labs(title = "Segmented Trajectory by Segment ID",
x = "Easting", y = "Northing",
color = "Segment ID") +
theme_minimal()3 Exercise B: Similarity
library(“SimilarityMeasures”) added in the first code chunk of Week 4 exercise. SimilarityMeasures is a package that provides a collection of similarity measures for time series data. The package is available on CRAN and can be installed using install.packages(“SimilarityMeasures”). ## Task 1
3.0.1 Read df
ped <- read_delim("Data/pedestrian.csv")Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (3): TrajID, E, N
dttm (1): DatetimeUTC
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ped <- ped |>
st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
mutate(TrajID = as.factor(TrajID))3.0.2 Visualize Trajectories
ggplot(ped, aes(x = E, y = N, colour = TrajID)) +
geom_point() +
geom_path(aes(E,N)) +
facet_wrap(~ TrajID) + # Small multiples by 'group'
labs(title = "Visual comparison of the 6 trajectories",
x = "E", y = "N")3.1 Task 2: Calculate similarity
3.1.1 Split trajcetories into matrices
tra1 <- ped |>
filter(TrajID == 1) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
tra2 <- ped |>
filter(TrajID == 2) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
tra3 <- ped |>
filter(TrajID == 3) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
tra4 <- ped |>
filter(TrajID == 4) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
tra5 <- ped |>
filter(TrajID == 5) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
tra6 <- ped |>
filter(TrajID == 6) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()3.1.2 With function
ped_as_fUnction <- function(data, trajnr) {
tra <- data |>
filter(TrajID == trajnr) |>
select(c(E,N)) |>
st_drop_geometry() |>
as.matrix()
}
tra1 <- ped_as_fUnction(ped,1)
tra2 <- ped_as_fUnction(ped,2)
tra3 <- ped_as_fUnction(ped,3)
tra4 <- ped_as_fUnction(ped,4)
tra5 <- ped_as_fUnction(ped,5)
tra6 <- ped_as_fUnction(ped,6)3.2 Comparisons
Similarity Functions
help(package="SimilarityMeasures")3.2.1 DTW
The dynamic time warping algorithm (DTW) calculates the smallest warp path for the two trajectories. This is the DTW version discussed by Berndt and Clifford (1994). Several variations of calculating the warping cost exist. In this version, the warping path is equal to the sum of the distances at each point along the path.
dtw12 <- DTW(tra1, tra2, pointSpacing=-1)
dtw13 <- DTW(tra1, tra3, pointSpacing=-1)
dtw14 <- DTW(tra1, tra4, pointSpacing=-1)
dtw15 <- DTW(tra1, tra5, pointSpacing=-1)
dtw16 <- DTW(tra1, tra6, pointSpacing=-1)
comp_dtw <- c("2", "3", "4", "5", "6")
dtw <- c(dtw12, dtw13, dtw14, dtw15, dtw16)
dtw_comp <- tibble(comp_dtw, dtw)
ggplot(dtw_comp, aes(x = comp_dtw, y = dtw, fill = factor(comp_dtw))) +
geom_col() +
ggtitle("DTW") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)plot_dtw <- ggplot(dtw_comp, aes(x = comp_dtw, y = dtw, fill = factor(comp_dtw))) +
geom_col() +
ggtitle("DTW") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)3.2.2 EditDist
The edit distance algorithm calculates the minimum number of edits required to allow the two trajectories to be considered equivalent. This function uses Edit Distance on Real sequence (EDR) as described by Chen, Özsu, and Oria (2005).
ED12 <- EditDist(tra1, tra2, pointDistance=20)
ED13 <- EditDist(tra1, tra3, pointDistance=20)
ED14 <- EditDist(tra1, tra4, pointDistance=20)
ED15 <- EditDist(tra1, tra5, pointDistance=20)
ED16 <- EditDist(tra1, tra6, pointDistance=20)
comp_ED <- c("2", "3", "4", "5", "6")
ED <- c(ED12, ED13, ED14, ED15, ED16)
ED_comp <- tibble(comp_ED, ED)
ggplot(ED_comp, aes(x = comp_ED, y = ED, fill = factor(comp_ED))) +
geom_col() +
ggtitle("EditDist") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)plot_ED <- ggplot(ED_comp, aes(x = comp_ED, y = ED, fill = factor(comp_ED))) +
geom_col() +
ggtitle("EditDist") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)3.2.3 Frechet
This algorithm calculates the Frechet distance. The Frechet metric (or distance) is generally described in the following way: A man is walking a dog on a leash, the man walks on one curve while the dog walks on the other (Alt and Godau 1995). The dog and the man are able to vary their speeds, or even stop, but not go backwards. The Frechet metric is the minimum leash length required to complete the traversal of both curves.
F12 <- Frechet(tra1, tra2, testLeash=-1)
F13 <- Frechet(tra1, tra3, testLeash=-1)
F14 <- Frechet(tra1, tra4, testLeash=-1)
F15 <- Frechet(tra1, tra5, testLeash=-1)
F16 <- Frechet(tra1, tra6, testLeash=-1)
comp_F <- c("2", "3", "4", "5", "6")
F <- c(F12, F13, F14, F15, F16)
F_comp <- tibble(comp_F, F)
ggplot(F_comp, aes(x = comp_F, y = F, fill = factor(comp_F))) +
geom_col() +
ggtitle("Frechet") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)plot_F <- ggplot(F_comp, aes(x = comp_F, y = F, fill = factor(comp_F))) +
geom_col() +
ggtitle("Frechet") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)3.2.4 LCSS
The LCSS algorithm calculates the largest number of equivalent points between the two trajectories when traversed in a monotone way. Possible translations are calculated in each dimension and used to provide the maximum LCSS. The accuracy of the algorithm can be varied to provide faster or slower calculations (Vlachos, Kollios, and Gunopulos 2002).
LCSS12 <- LCSS(tra1, tra2, pointSpacing=-1, pointDistance=20,
errorMarg=5, returnTrans=FALSE)
LCSS13 <- LCSS(tra1, tra3, pointSpacing=-1, pointDistance=20,
errorMarg=5, returnTrans=FALSE)
LCSS14 <- LCSS(tra1, tra4, pointSpacing=-1, pointDistance=20,
errorMarg=5, returnTrans=FALSE)
LCSS15 <- LCSS(tra1, tra5, pointSpacing=-1, pointDistance=20,
errorMarg=5, returnTrans=FALSE)
LCSS16 <- LCSS(tra1, tra6, pointSpacing=-1, pointDistance=20,
errorMarg=5, returnTrans=FALSE)
comp_LCSS <- c("2", "3", "4", "5", "6")
LCSS <- c(LCSS12, LCSS13, LCSS14, LCSS15, LCSS16)
LCSS_comp <- tibble(comp_LCSS, LCSS)
ggplot(LCSS_comp, aes(x = comp_LCSS, y = LCSS, fill = factor(comp_LCSS))) +
geom_col() +
ggtitle("LCSS") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)plot_LCSS <- ggplot(LCSS_comp, aes(x = comp_LCSS, y = LCSS, fill = factor(comp_LCSS))) +
geom_col() +
ggtitle("LCSS") +
ylab("") +
xlab("Comparison") +
theme_minimal() + # Optional for better aesthetics
theme(
legend.position = "none", # Remove legend
plot.title = element_text(hjust = 0.5) # Center title
)3.3 Visualize results
plot_list <- list(
plot_dtw,
plot_ED,
plot_F,
plot_LCSS
)
combined_plot <- (plot_list[[1]] + plot_list[[2]] +
plot_list[[3]] + plot_list[[4]]) +
plot_layout(ncol = 2) & # Arrange plots in 2 columns
labs(x = "Comparison", y = "Value") # Shared axis labels
combined_plot + plot_annotation(title = "Compute Similarities")