CMA 2025 - exercise week 4

Author

Anna

Preparation

library("sf")
library("sp")
library("readr")
library("ggplot2")
library("dplyr")
library("ggrepel")
library("readr")

Input segmentation

wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")

sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

# step a)
temporal_window <- 60
fixes_in_window <- temporal_window / 15  # 4 fixes

# step b) distance to point
distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )

# mean distance
sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()
sabi

# step c) remove static points
sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

# visualization
sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A: Segmentation

Task 1

Preparation

The following data is from a bike-fieldtrip last week.

# import
velo <- st_read("data/velotour/velo.gpx", layer = "track_points")

# keep only relevant
velo <- velo %>% 
  select(track_seg_point_id, ele, time, geometry) %>% 
  rename(id = track_seg_point_id, altitude = ele)

# transform to LV95
velo <- st_transform(velo, crs = 2056) %>% 
  st_as_sf(velo, coords = c("E", "N"), crs = 2056)

# Extract E (Easting) and N (Northing) from geometry
velo <- velo |> mutate(
  E = st_coordinates(geometry)[,1],  
  N = st_coordinates(geometry)[,2]   
)

# look at map
library(tmap)
tmap_mode("plot")

tm_shape(velo) + 
  tm_dots()

step a) temporal window v

We have a sampling interval of 1 second, the goal is to set a temporal window of 60 seconds, what means that the window includes 60 fixes.

# define function
distance_by_element <- function(later, now) {
  as.numeric(st_distance(later, now, by_element = TRUE))
}

# data sorted by time
velo <- velo |> arrange(time)

step b) measure distance to every point within v

# compute distances for a 60-second interval
velo <- velo |> mutate(
    nMinus30 = distance_by_element(lag(geometry, 30), geometry),  # distance to pos -30 seconds
    nMinus15 = distance_by_element(lag(geometry, 15), geometry),  # distance to pos -15 seconds
    nPlus15  = distance_by_element(geometry, lead(geometry, 15)), # distance to pos +15 seconds
    nPlus30  = distance_by_element(geometry, lead(geometry, 30))  # distance to pos +30 seconds
)

# mean distance
velo <- velo |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus30, nMinus15, nPlus15, nPlus30))
    ) |>
    ungroup()

Task 2

Step c) remove static points

First, I use the mean value as a threshold, like we did before.

# step c) remove static points

velo <- velo |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

velo_filter <- velo |>
    filter(!static)

velo_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = velo, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Now, I use a fixed distance threshold to identify when I stopped or got off the bike. I’ve set the threshold to 100 meters per 60 seconds, which corresponds to a velocity of 6 km/h.

# step c) remove static points
velo <- velo |>
  mutate(
    static = stepMean < 100  
  )

# Filter out static points 
velo_filter <- velo |>
  filter(!static) 

# Visualize data
velo_filter |>
  ggplot(aes(E, N)) +
  geom_point(data = velo, col = "red") + 
  geom_path() +                          
  geom_point() +                          
  coord_fixed() + 
  theme(legend.position = "bottom")

# look at map for comparison
# look at map
library(tmap)
tmap_mode("plot")

tm_shape(velo_filter) +
  tm_dots()

tm_shape(velo) + 
  tm_dots(col = "static", palette = c("red", "green"), title = "Movement Status")

However, we take a look at the summary statistics to find a reasonable threshold value.

We use the mean of all StepMean values.

# look at summary
summary(velo$stepMean)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  1.141  10.377  24.305  52.187  81.607 238.385      60

# histogram
ggplot(velo, aes(x = stepMean)) +
  geom_histogram(binwidth = 5, fill = "lightblue", color = "black", alpha = 0.7) +
  labs(title = "Histogram of stepMean", x = "Step Mean (Euclidean Distance)", y = "Frequency") +
  theme_minimal()

# boxplot
ggplot(velo, aes(y = stepMean)) +
  geom_boxplot(fill = "lightblue", color = "black", alpha = 0.7) +
  labs(title = "Boxplot of stepMean", y = "Step Mean (Euclidean Distance)") +
  theme_minimal()

# taking the mean of StepMean
threshold_d <- mean(velo$stepMean, na.rm = TRUE)

velo <- velo |>
  mutate(
    static = stepMean < threshold_d  
  )

Task 3: Visualization

# take a look at result
ggplot(velo, aes(x = E, y = N, color = factor(static))) +
  geom_point() +
  scale_color_manual(values = c("red", "green")) +
  theme_minimal() +
  labs(title = "Static vs. Moving Points", color = "Static")

velo <- velo |>
  mutate(static = factor(static, levels = c(TRUE, FALSE), labels = c("Static", "Moving")))

# Plot colored by static/moving status
ggplot(velo, aes(x = E, y = N, color = static)) +
  geom_path() +
  geom_point() +
  coord_equal() +
  theme_minimal() +
  labs(
    title = "Segmented Trajectory of Movement (Static vs Moving)",
    x = "Easting (E)",
    y = "Northing (N)",
    color = "Movement Status"
  )

Task 4: Segment-based analysis

# needed function
rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

# unique ID
velo <- velo |>
    mutate(static = as.character(static),
           segment_id = rle_id(static))

# segment durations
segment_durations <- velo |> 
  group_by(segment_id) |> 
  summarise(start_time = min(time), 
            end_time = max(time),
            duration = as.numeric(difftime(max(time), min(time), units = "mins")))

# remove short segments < 5 min

long_segments <- segment_durations |> filter(duration >= 5)

velo_segments <- velo |> filter(segment_id %in% long_segments$segment_id)

ggplot(velo_segments, aes(x = E, y = N, color = segment_id)) +
  geom_path(linewidth = 1) +
  geom_point(size = 2, alpha = 0.7) +
  coord_equal() +
  labs(title = "Segmented Trajectory (> 5 min)",
       x = "Longitude",
       y = "Latitude",
       color = "Segment ID") +
  theme_minimal()

Exercise B: Similarity

Task 1: Similarity measures

# import data
pedestrian_data <- read_csv("data/pedestrian.csv")
head(pedestrian_data)

# A tibble: 6 × 4
  TrajID        E        N DatetimeUTC        
   <dbl>    <dbl>    <dbl> <dttm>             
1      1 2571414. 1205804. 2015-03-01 12:01:00
2      1 2571396. 1205791. 2015-03-01 12:02:00
3      1 2571373. 1205770. 2015-03-01 12:03:00
4      1 2571347. 1205753. 2015-03-01 12:04:00
5      1 2571336. 1205744. 2015-03-01 12:05:00
6      1 2571321. 1205732. 2015-03-01 12:06:00

# explore data
str(pedestrian_data)

spc_tbl_ [289 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ TrajID     : num [1:289] 1 1 1 1 1 1 1 1 1 1 ...
 $ E          : num [1:289] 2571414 2571396 2571373 2571347 2571336 ...
 $ N          : num [1:289] 1205804 1205791 1205770 1205753 1205744 ...
 $ DatetimeUTC: POSIXct[1:289], format: "2015-03-01 12:01:00" "2015-03-01 12:02:00" ...
 - attr(*, "spec")=
  .. cols(
  ..   TrajID = col_double(),
  ..   E = col_double(),
  ..   N = col_double(),
  ..   DatetimeUTC = col_datetime(format = "")
  .. )
 - attr(*, "problems")=<externalptr>

# separate plots
ggplot(pedestrian_data, aes(x = E, y = N, group = TrajID)) +
  geom_point(aes(color = factor(TrajID)), size = 2) +  
  geom_path(data = pedestrian_data, aes(x = E, y = N), color = "darkgrey", alpha = 0.5) +  
  facet_wrap(~TrajID) +  
  labs(title = "Visual comparison of the 6 trajectories",
       subtitle = "Each subplot highlights a trajectory",
       x = "E", y = "N") +
  theme_minimal() +
  theme(legend.position = "none")

Task 2: Calculate Similarity

The trajectories 1 and 6 seem very similar, while 1 and 4 appear to be most dissimilar.

# packages
library(SimilarityMeasures)
library(ggplot2)
library(dplyr)
library(tidyr)

# Extract individual trajectories as matrices
get_trajectory_matrix <- function(traj_id) {
  pedestrian_data %>%
    filter(TrajID == traj_id) %>%
    select(E, N) %>%
    as.matrix()
}

# Reference trajectory (TrajID = 1)
traj1 <- get_trajectory_matrix(1)

# similarity measures for TrajID 1 against 2-6
similarity_results <- data.frame(
  Comparison = 2:6,
  DTW = sapply(2:6, function(i) DTW(traj1, get_trajectory_matrix(i))),
  EditDist = sapply(2:6, function(i) EditDist(traj1, get_trajectory_matrix(i))),
  Frechet = sapply(2:6, function(i) Frechet(traj1, get_trajectory_matrix(i)))
)

# Transform data for ggplot2 
similarity_long <- similarity_results %>%
  pivot_longer(cols = -Comparison, names_to = "Measure", values_to = "Value")

# Plot similarity measures
ggplot(similarity_long, aes(x = as.factor(Comparison), y = Value, fill = as.factor(Comparison))) +
  geom_bar(stat = "identity") +
  facet_wrap(~Measure, scales = "free") +  
  labs(
    title = "Computed similarities using different measures\nbetween trajectory 1 to all other trajectories",
    x = "Comparison trajectory",
    y = "Value"
  ) +
  theme_minimal() +
  theme(legend.position = "none")  # Remove legend

For a next time… maps were problematic ://

knitr::opts_chunk$set(eval = FALSE)