library("sf")
library("sp")
library("readr")
library("ggplot2")
library("dplyr")
library("ggrepel")
library("readr")CMA 2025 - exercise week 4
Preparation
Input segmentation
wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")
sabi <- wildschwein |>
st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
filter(
TierName == "Sabi",
DatetimeUTC >= "2015-07-01",
DatetimeUTC < "2015-07-03"
)# step a)
temporal_window <- 60
fixes_in_window <- temporal_window / 15 # 4 fixes
# step b) distance to point
distance_by_element <- function(later, now) {
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}
sabi <- sabi |>
mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance to pos -30 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance to pos -15 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)) # distance to pos +30 minutes
)
# mean distance
sabi <- sabi |>
rowwise() |>
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
) |>
ungroup()
sabi
# step c) remove static points
sabi <- sabi |>
mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
sabi_filter <- sabi |>
filter(!static)
# visualization
sabi_filter |>
ggplot(aes(E, N)) +
geom_point(data = sabi, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")Exercise A: Segmentation
Task 1
Preparation
The following data is from a bike-fieldtrip last week.
# import
velo <- st_read("data/velotour/velo.gpx", layer = "track_points")
# keep only relevant
velo <- velo %>%
select(track_seg_point_id, ele, time, geometry) %>%
rename(id = track_seg_point_id, altitude = ele)
# transform to LV95
velo <- st_transform(velo, crs = 2056) %>%
st_as_sf(velo, coords = c("E", "N"), crs = 2056)
# Extract E (Easting) and N (Northing) from geometry
velo <- velo |> mutate(
E = st_coordinates(geometry)[,1],
N = st_coordinates(geometry)[,2]
)# look at map
library(tmap)
tmap_mode("plot")
tm_shape(velo) +
tm_dots()step a) temporal window v
We have a sampling interval of 1 second, the goal is to set a temporal window of 60 seconds, what means that the window includes 60 fixes.
# define function
distance_by_element <- function(later, now) {
as.numeric(st_distance(later, now, by_element = TRUE))
}
# data sorted by time
velo <- velo |> arrange(time)step b) measure distance to every point within v
# compute distances for a 60-second interval
velo <- velo |> mutate(
nMinus30 = distance_by_element(lag(geometry, 30), geometry), # distance to pos -30 seconds
nMinus15 = distance_by_element(lag(geometry, 15), geometry), # distance to pos -15 seconds
nPlus15 = distance_by_element(geometry, lead(geometry, 15)), # distance to pos +15 seconds
nPlus30 = distance_by_element(geometry, lead(geometry, 30)) # distance to pos +30 seconds
)
# mean distance
velo <- velo |>
rowwise() |>
mutate(
stepMean = mean(c(nMinus30, nMinus15, nPlus15, nPlus30))
) |>
ungroup()Task 2
Step c) remove static points
First, I use the mean value as a threshold, like we did before.
# step c) remove static points
velo <- velo |>
mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
velo_filter <- velo |>
filter(!static)
velo_filter |>
ggplot(aes(E, N)) +
geom_point(data = velo, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")Now, I use a fixed distance threshold to identify when I stopped or got off the bike. I’ve set the threshold to 100 meters per 60 seconds, which corresponds to a velocity of 6 km/h.
# step c) remove static points
velo <- velo |>
mutate(
static = stepMean < 100
)
# Filter out static points
velo_filter <- velo |>
filter(!static)
# Visualize data
velo_filter |>
ggplot(aes(E, N)) +
geom_point(data = velo, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")# look at map for comparison
# look at map
library(tmap)
tmap_mode("plot")
tm_shape(velo_filter) +
tm_dots()tm_shape(velo) +
tm_dots(col = "static", palette = c("red", "green"), title = "Movement Status") However, we take a look at the summary statistics to find a reasonable threshold value.
We use the mean of all StepMean values.
# look at summary
summary(velo$stepMean) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.141 10.377 24.305 52.187 81.607 238.385 60
# histogram
ggplot(velo, aes(x = stepMean)) +
geom_histogram(binwidth = 5, fill = "lightblue", color = "black", alpha = 0.7) +
labs(title = "Histogram of stepMean", x = "Step Mean (Euclidean Distance)", y = "Frequency") +
theme_minimal()# boxplot
ggplot(velo, aes(y = stepMean)) +
geom_boxplot(fill = "lightblue", color = "black", alpha = 0.7) +
labs(title = "Boxplot of stepMean", y = "Step Mean (Euclidean Distance)") +
theme_minimal()# taking the mean of StepMean
threshold_d <- mean(velo$stepMean, na.rm = TRUE)
velo <- velo |>
mutate(
static = stepMean < threshold_d
)Task 3: Visualization
# take a look at result
ggplot(velo, aes(x = E, y = N, color = factor(static))) +
geom_point() +
scale_color_manual(values = c("red", "green")) +
theme_minimal() +
labs(title = "Static vs. Moving Points", color = "Static")velo <- velo |>
mutate(static = factor(static, levels = c(TRUE, FALSE), labels = c("Static", "Moving")))
# Plot colored by static/moving status
ggplot(velo, aes(x = E, y = N, color = static)) +
geom_path() +
geom_point() +
coord_equal() +
theme_minimal() +
labs(
title = "Segmented Trajectory of Movement (Static vs Moving)",
x = "Easting (E)",
y = "Northing (N)",
color = "Movement Status"
)Task 4: Segment-based analysis
# needed function
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}
# unique ID
velo <- velo |>
mutate(static = as.character(static),
segment_id = rle_id(static))
# segment durations
segment_durations <- velo |>
group_by(segment_id) |>
summarise(start_time = min(time),
end_time = max(time),
duration = as.numeric(difftime(max(time), min(time), units = "mins")))
# remove short segments < 5 min
long_segments <- segment_durations |> filter(duration >= 5)
velo_segments <- velo |> filter(segment_id %in% long_segments$segment_id)
ggplot(velo_segments, aes(x = E, y = N, color = segment_id)) +
geom_path(linewidth = 1) +
geom_point(size = 2, alpha = 0.7) +
coord_equal() +
labs(title = "Segmented Trajectory (> 5 min)",
x = "Longitude",
y = "Latitude",
color = "Segment ID") +
theme_minimal()Exercise B: Similarity
Task 1: Similarity measures
# import data
pedestrian_data <- read_csv("data/pedestrian.csv")
head(pedestrian_data)# A tibble: 6 × 4
TrajID E N DatetimeUTC
<dbl> <dbl> <dbl> <dttm>
1 1 2571414. 1205804. 2015-03-01 12:01:00
2 1 2571396. 1205791. 2015-03-01 12:02:00
3 1 2571373. 1205770. 2015-03-01 12:03:00
4 1 2571347. 1205753. 2015-03-01 12:04:00
5 1 2571336. 1205744. 2015-03-01 12:05:00
6 1 2571321. 1205732. 2015-03-01 12:06:00
# explore data
str(pedestrian_data)spc_tbl_ [289 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ TrajID : num [1:289] 1 1 1 1 1 1 1 1 1 1 ...
$ E : num [1:289] 2571414 2571396 2571373 2571347 2571336 ...
$ N : num [1:289] 1205804 1205791 1205770 1205753 1205744 ...
$ DatetimeUTC: POSIXct[1:289], format: "2015-03-01 12:01:00" "2015-03-01 12:02:00" ...
- attr(*, "spec")=
.. cols(
.. TrajID = col_double(),
.. E = col_double(),
.. N = col_double(),
.. DatetimeUTC = col_datetime(format = "")
.. )
- attr(*, "problems")=<externalptr>
# separate plots
ggplot(pedestrian_data, aes(x = E, y = N, group = TrajID)) +
geom_point(aes(color = factor(TrajID)), size = 2) +
geom_path(data = pedestrian_data, aes(x = E, y = N), color = "darkgrey", alpha = 0.5) +
facet_wrap(~TrajID) +
labs(title = "Visual comparison of the 6 trajectories",
subtitle = "Each subplot highlights a trajectory",
x = "E", y = "N") +
theme_minimal() +
theme(legend.position = "none") Task 2: Calculate Similarity
The trajectories 1 and 6 seem very similar, while 1 and 4 appear to be most dissimilar.
# packages
library(SimilarityMeasures)
library(ggplot2)
library(dplyr)
library(tidyr)
# Extract individual trajectories as matrices
get_trajectory_matrix <- function(traj_id) {
pedestrian_data %>%
filter(TrajID == traj_id) %>%
select(E, N) %>%
as.matrix()
}
# Reference trajectory (TrajID = 1)
traj1 <- get_trajectory_matrix(1)
# similarity measures for TrajID 1 against 2-6
similarity_results <- data.frame(
Comparison = 2:6,
DTW = sapply(2:6, function(i) DTW(traj1, get_trajectory_matrix(i))),
EditDist = sapply(2:6, function(i) EditDist(traj1, get_trajectory_matrix(i))),
Frechet = sapply(2:6, function(i) Frechet(traj1, get_trajectory_matrix(i)))
)
# Transform data for ggplot2
similarity_long <- similarity_results %>%
pivot_longer(cols = -Comparison, names_to = "Measure", values_to = "Value")
# Plot similarity measures
ggplot(similarity_long, aes(x = as.factor(Comparison), y = Value, fill = as.factor(Comparison))) +
geom_bar(stat = "identity") +
facet_wrap(~Measure, scales = "free") +
labs(
title = "Computed similarities using different measures\nbetween trajectory 1 to all other trajectories",
x = "Comparison trajectory",
y = "Value"
) +
theme_minimal() +
theme(legend.position = "none") # Remove legendFor a next time… maps were problematic ://
knitr::opts_chunk$set(eval = FALSE)