E4

library("readr")
library("sf")
Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library("dplyr")

Attache Paket: 'dplyr'
Die folgenden Objekte sind maskiert von 'package:stats':

    filter, lag
Die folgenden Objekte sind maskiert von 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(SimilarityMeasures)
library(tidyr)

Input: Segmentation

wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Careful! What Timezone is assumed?
sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )
ggplot(sabi) +
  geom_sf()+
  geom_path(aes(E, N))

Step a) & b): Specify a temporal window v

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )

sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

sabi
Simple feature collection with 192 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: CH1903+ / LV95
# A tibble: 192 × 12
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2015-06-30 22:00:13 2569972. 1205366.
 2 002A   Sabi        12275 2015-06-30 22:16:06 2569975. 1205637.
 3 002A   Sabi        12275 2015-06-30 22:30:19 2570266. 1205857.
 4 002A   Sabi        12275 2015-06-30 22:45:13 2570208. 1205913.
 5 002A   Sabi        12275 2015-06-30 23:00:10 2570247. 1205731.
 6 002A   Sabi        12275 2015-06-30 23:15:17 2570512. 1205279.
 7 002A   Sabi        12275 2015-06-30 23:30:38 2570684. 1205103.
 8 002A   Sabi        12275 2015-06-30 23:45:16 2570526. 1205051.
 9 002A   Sabi        12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A   Sabi        12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 6 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>

Step c): Remove “static points”

sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A: Segmentation

Task 1

# Careful! What Timezone is assumed?
rosa <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Rosa", 
      DatetimeUTC >= "2014-11-07",
      DatetimeUTC < "2014-11-09"
      )

# Übersicht
ggplot(rosa) +
  geom_sf()+
  geom_path(aes(E, N))

max(rosa$DatetimeUTC)
[1] "2014-11-08 22:45:09 UTC"
min(rosa$DatetimeUTC)
[1] "2014-11-07 07:45:44 UTC"
distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


rosa <- rosa |>
    mutate(
        nMinus60 = distance_by_element(lag(geometry, 4), geometry),  # distance to pos -60 minutes
        nMinus45 = distance_by_element(lag(geometry, 3), geometry),  # distance to pos -45 minutes
        nMinus30 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus15 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -60 minutes
        nPlus15  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus30  = distance_by_element(geometry, lead(geometry, 2)),  # distance to pos +30 minutes
        nPlus45  = distance_by_element(geometry, lead(geometry, 3)),  # distance to pos +45 minutes
        nPlus60  = distance_by_element(geometry, lead(geometry, 4)),  # distance to pos +45 minutes
    )

rosa <- rosa |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus60, nMinus45, nMinus30, nMinus15, nPlus15, nPlus30,  nPlus45,  nPlus60))
    ) |>
    ungroup()

Task 2

hist(rosa$stepMean, breaks = 12) # 50 schein sinnvoll

# Mittlere Distanz zwischen den Zeitschritten (15 min-Intervall), darf über ein Zeitfenster von 1h nicht kleiner 50(?) sein.
rosa <- rosa |>
    mutate(static = stepMean < 50)

rosa_filter <- rosa |>
    filter(!static)

Task 3

ggplot(rosa, aes(E, N)) +
    geom_point(data = rosa, aes(col = static)) +
    geom_path() +
    coord_equal() +
    theme(legend.position = "bottom")+
  theme_minimal()

Task 4

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}


rosa_summary <- rosa |>
  filter(!is.na(static)) |> 
  mutate(segment_id = rle_id(static)) 

rosa_summary |> 
  filter(static == FALSE) |> 
ggplot(aes(E, N)) +
    geom_point(aes(col = segment_id)) +
    geom_path() +
    coord_equal() +
    theme(legend.position = "bottom")+
  theme_minimal()

min_duration_thr <- 300

rosa_summary |> 
  group_by(
    segment_id) |> 
  summarise(
    max_dur = max(DatetimeUTC),
    min_dur = min(DatetimeUTC),
    duration = max_dur - min_dur
  ) |> 
  filter(
    duration > min_duration_thr
  )
Simple feature collection with 4 features and 4 fields
Geometry type: MULTIPOINT
Dimension:     XY
Bounding box:  xmin: 2569889 ymin: 1204645 xmax: 2570395 ymax: 1205037
Projected CRS: CH1903+ / LV95
# A tibble: 4 × 5
  segment_id max_dur             min_dur             duration  
* <fct>      <dttm>              <dttm>              <drtn>    
1 2          2014-11-07 10:45:13 2014-11-07 09:00:11  6302 secs
2 3          2014-11-07 23:30:30 2014-11-07 11:00:09 45021 secs
3 4          2014-11-08 01:15:33 2014-11-07 23:45:48  5385 secs
4 5          2014-11-08 21:45:15 2014-11-08 01:30:18 72897 secs
# ℹ 1 more variable: geometry <MULTIPOINT [m]>

Exercise B

Task 1

# Import
ped <- read_delim("data/pedestrian.csv") |> 
  mutate(
    TrajID = as.factor(TrajID),
    E = as.numeric(E),
    N = as.numeric(N)
  )
Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Plot
ggplot(ped, aes(E, N, colour = TrajID)) +
  facet_wrap(~TrajID) +
  geom_point() +
  geom_line() +
  coord_equal() +
  theme(
    legend.position = "none"
  )

Task 2

Trajectory pairs T1_T2, T1_T3, T1_T6 seem to be similar.

High similarity of T1-T3 is shown in DTW and in Frechet metric.

Similarity of T1-T6 is visible in the LCSS metric

The intuitive similarity of T1-T2 is not shown in any metric.

ped_sel <- ped |> 
  select(E, N)

# Jede Trajektorie in eine eigene Matrix umwandeln
trajectories <- split(ped_sel, ped$TrajID)
traj_names <- c("t1", "t2", "t3", "t4", "t5", "t6")
names(trajectories) <- traj_names



traj1 <- as.matrix(trajectories[["t1"]])
traj2 <- as.matrix(trajectories[["t2"]])
traj3 <- as.matrix(trajectories[["t3"]])
traj4 <- as.matrix(trajectories[["t4"]])
traj5 <- as.matrix(trajectories[["t5"]])
traj6 <- as.matrix(trajectories[["t6"]])

DTW12 <- DTW(traj1, traj2)
DTW13 <- DTW(traj1, traj3)
DTW14 <- DTW(traj1, traj4)
DTW15 <- DTW(traj1, traj5)
DTW16 <- DTW(traj1, traj6)

EditDist12 <- EditDist(traj1, traj2)
EditDist13 <- EditDist(traj1, traj3)
EditDist14 <- EditDist(traj1, traj4)
EditDist15 <- EditDist(traj1, traj5)
EditDist16 <- EditDist(traj1, traj6)

Frechet12 <- Frechet(traj1, traj2)
Frechet13 <- Frechet(traj1, traj3)
Frechet14 <- Frechet(traj1, traj4)
Frechet15 <- Frechet(traj1, traj5)
Frechet16 <- Frechet(traj1, traj6)

LCSS12 <- LCSS(traj1, traj2, pointSpacing = 1, pointDistance = 40, errorMarg = 0.4)
LCSS13 <- LCSS(traj1, traj3, pointSpacing = 1, pointDistance = 40, errorMarg = 0.4)
LCSS14 <- LCSS(traj1, traj4, pointSpacing = 1, pointDistance = 40, errorMarg = 0.4)
LCSS15 <- LCSS(traj1, traj5, pointSpacing = 1, pointDistance = 40, errorMarg = 0.4)
LCSS16 <- LCSS(traj1, traj6, pointSpacing = 1, pointDistance = 40, errorMarg = 0.4)

results <- data.frame(
  Pair = c("t1-t2", "t1-t3", "t1-t4", "t1-t5", "t1-t6"),
  DTW = c(DTW12, DTW13, DTW14, DTW15, DTW16),
  EditDist = c(EditDist12, EditDist13, EditDist14, EditDist15, EditDist16),
  Frechet = c(Frechet12, Frechet13, Frechet14, Frechet15, Frechet16),
  LCSS = c(LCSS12, LCSS13, LCSS14, LCSS15, LCSS16)
) |>  
  pivot_longer(cols = c(DTW, EditDist, Frechet, LCSS),
               names_to = "Metric",
               values_to = "Value")
  

ggplot(results, aes(Pair, Value)) +
  geom_bar(stat = "identity") + 
  facet_wrap(~Metric, scales = "free_y")