Segmentation & Similarity

Segmentation

library("readr")

wildschwein <- read_delim("wildschwein_BE_2056.csv", ",")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library("sf")
Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library("dplyr")

Attache Paket: 'dplyr'

Die folgenden Objekte sind maskiert von 'package:stats':

    filter, lag

Die folgenden Objekte sind maskiert von 'package:base':

    intersect, setdiff, setequal, union
sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )
library(ggplot2)
ggplot(sabi) +
  geom_sf() +
  geom_path(aes(E, N))

Step B

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}

sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )
sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

sabi
Simple feature collection with 192 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: CH1903+ / LV95
# A tibble: 192 × 12
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2015-06-30 22:00:13 2569972. 1205366.
 2 002A   Sabi        12275 2015-06-30 22:16:06 2569975. 1205637.
 3 002A   Sabi        12275 2015-06-30 22:30:19 2570266. 1205857.
 4 002A   Sabi        12275 2015-06-30 22:45:13 2570208. 1205913.
 5 002A   Sabi        12275 2015-06-30 23:00:10 2570247. 1205731.
 6 002A   Sabi        12275 2015-06-30 23:15:17 2570512. 1205279.
 7 002A   Sabi        12275 2015-06-30 23:30:38 2570684. 1205103.
 8 002A   Sabi        12275 2015-06-30 23:45:16 2570526. 1205051.
 9 002A   Sabi        12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A   Sabi        12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 6 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>

Step C

sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A

Task 1: Calculate distances

Temporal window v of 90 min

  1. pos[n-3] to pos[n]

  2. pos[n-2] to pos[n]

  3. pos[n-1] to pos[n]

  4. pos[n] to pos[n+1]

  5. pos[n] to pos[n+2]

  6. pos[n] to pos[n+3]

library("readr")
library("sf")
library("dplyr")

wildschwein <- read_delim("wildschwein_BE_2056.csv", ",")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ruth <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Ruth", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}

ruth <- ruth |>
    mutate(
        nMinus3 = distance_by_element(lag(geometry, 3), geometry),  # distance to pos -45 minutes
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2)),  # distance to pos +30 minutes
        nPlus3  = distance_by_element(geometry, lead(geometry, 3)), # distance to pos +45 mintues
    )

Task 2: Specify and apply threshold d

summary(ruth)
    TierID            TierName            CollarID    
 Length:180         Length:180         Min.   :13974  
 Class :character   Class :character   1st Qu.:13974  
 Mode  :character   Mode  :character   Median :13974  
                                       Mean   :13974  
                                       3rd Qu.:13974  
                                       Max.   :13974  
                                                      
  DatetimeUTC                          E                 N          
 Min.   :2015-06-30 22:01:14.0   Min.   :2568162   Min.   :1205617  
 1st Qu.:2015-07-01 09:27:08.5   1st Qu.:2568471   1st Qu.:1205917  
 Median :2015-07-01 22:23:19.0   Median :2568877   Median :1206042  
 Mean   :2015-07-01 22:03:18.7   Mean   :2568801   Mean   :1206102  
 3rd Qu.:2015-07-02 10:35:05.5   3rd Qu.:2568919   3rd Qu.:1206090  
 Max.   :2015-07-02 21:46:01.0   Max.   :2570123   Max.   :1207095  
                                                                    
          geometry      nMinus3             nMinus2         
 POINT        :180   Min.   :   0.7328   Min.   :   0.3887  
 epsg:2056    :  0   1st Qu.:   5.3496   1st Qu.:   4.7802  
 +proj=some...:  0   Median :  21.6565   Median :  15.6777  
                     Mean   : 146.1967   Mean   : 108.7790  
                     3rd Qu.: 137.1378   3rd Qu.:  71.0626  
                     Max.   :1370.0820   Max.   :1339.5191  
                     NA's   :3           NA's   :2          
    nMinus1              nPlus1              nPlus2         
 Min.   :   0.1296   Min.   :   0.1296   Min.   :   0.3887  
 1st Qu.:   4.4568   1st Qu.:   4.4568   1st Qu.:   4.7802  
 Median :  11.6751   Median :  11.6751   Median :  15.6777  
 Mean   :  64.5200   Mean   :  64.5200   Mean   : 108.7790  
 3rd Qu.:  48.0356   3rd Qu.:  48.0356   3rd Qu.:  71.0626  
 Max.   :1155.2635   Max.   :1155.2635   Max.   :1339.5191  
 NA's   :1           NA's   :1           NA's   :2          
     nPlus3         
 Min.   :   0.7328  
 1st Qu.:   5.3496  
 Median :  21.6565  
 Mean   : 146.1967  
 3rd Qu.: 137.1378  
 Max.   :1370.0820  
 NA's   :3          
barplot(ruth$nMinus3)

ruth <- ruth |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3))
    ) |>
    ungroup()

ruth <- ruth |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

Task 3: Visualize segmented trajectories

ggplot(ruth, aes(E, N, colour = static)) +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Task 4: Segment-based analysis

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

ruth <- ruth |>
    mutate(segment_id = rle_id(static))

ruth_filter <- ruth |>
    filter(!static)

ggplot(ruth_filter, aes(E, N, colour = segment_id)) +
    geom_path() +
    geom_point() +
    coord_fixed()

ruth_filter_clean <- ruth_filter |> 
  group_by(segment_id) |> 
  mutate(
    duration = as.numeric(difftime(max(DatetimeUTC), min(DatetimeUTC), units = "mins"))
  ) |> 
  filter(duration > 20) |> 
  ungroup()

ggplot(ruth_filter_clean, aes(E, N, colour = segment_id)) +
    geom_path() +
    geom_point() +
    coord_fixed()

Exercise B

Task 1: Similarity measures

library(ggplot2)
library(dplyr)
library(readr)

fussgaenger <- read_csv("pedestrian.csv")
Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(fussgaenger, aes(x = E, y = N)) +
  geom_point(color = "grey") +
  geom_path(aes(color = factor(TrajID))) +
  facet_wrap(~ TrajID) +
  labs(title = "Visual comparison of the 6 trajectories", 
       x = "E", y = "N", color = "TrajID") +
  theme_minimal()

Task 2: Calculate similarity

DTW

library(SimilarityMeasures)

traj1 = as.matrix(subset(fussgaenger, TrajID == 1)[, c("E", "N")])
traj2 = as.matrix(subset(fussgaenger, TrajID == 2)[, c("E", "N")])
traj3 = as.matrix(subset(fussgaenger, TrajID == 3)[, c("E", "N")])
traj4 = as.matrix(subset(fussgaenger, TrajID == 4)[, c("E", "N")])
traj5 = as.matrix(subset(fussgaenger, TrajID == 5)[, c("E", "N")])
traj6 = as.matrix(subset(fussgaenger, TrajID == 6)[, c("E", "N")])

DTW2=DTW(traj1, traj2, pointSpacing = 2)
DTW3=DTW(traj1, traj3, pointSpacing = 2)
DTW4=DTW(traj1, traj4, pointSpacing = 2)
DTW5=DTW(traj1, traj5, pointSpacing = -1)
DTW6=DTW(traj1, traj6, pointSpacing = 2)

dtw_results = data.frame(
  TrajID = factor(c(2,3,4,5,6)),
  DTW = c(DTW2,DTW3,DTW4,DTW5,DTW6)
)

ggplot(dtw_results, aes(x = TrajID, y = DTW, fill=TrajID)) +
  geom_bar(stat = "identity")

LCSS: longest common subsequence

LCSS2=LCSS(traj1, traj2, pointSpacing=2, pointDistance=10)
LCSS3=LCSS(traj1, traj3, pointSpacing=2, pointDistance=10)
LCSS4=LCSS(traj1, traj4, pointSpacing=2, pointDistance=10)
LCSS5=LCSS(traj1, traj5, pointSpacing=2, pointDistance=10)
LCSS6=LCSS(traj1, traj6, pointSpacing=2, pointDistance=10)

lcss_results = data.frame(
  TrajID = factor(c(2,3,4,5,6)),
  LCSS = c(LCSS2,LCSS3,LCSS4,LCSS5,LCSS6)
)

ggplot(lcss_results, aes(x = TrajID, y = LCSS, fill=TrajID)) +
  geom_bar(stat = "identity")

Frechet distance

Fr2=Frechet(traj1, traj2, testLeash=-1)
Fr3=Frechet(traj1, traj3, testLeash=-1)
Fr4=Frechet(traj1, traj4, testLeash=-1)
Fr5=Frechet(traj1, traj5, testLeash=-1)
Fr6=Frechet(traj1, traj6, testLeash=-1)

frechet_results = data.frame(
  TrajID = factor(c(2,3,4,5,6)),
  Frechet = c(Fr2,Fr3,Fr4,Fr5,Fr6)
)

ggplot(frechet_results, aes(x = TrajID, y = Frechet, fill=TrajID)) +
  geom_bar(stat = "identity")

Edit Dist

ED2=EditDist(traj1, traj2, pointDistance=10)
ED3=EditDist(traj1, traj3, pointDistance=10)
ED4=EditDist(traj1, traj4, pointDistance=10)
ED5=EditDist(traj1, traj5, pointDistance=10)
ED6=EditDist(traj1, traj6, pointDistance=10)

ED_results = data.frame(
  TrajID = factor(c(2,3,4,5,6)),
  ED = c(ED2,ED3,ED4,ED5,ED6)
)

ggplot(ED_results, aes(x = TrajID, y = ED, fill=TrajID)) +
  geom_bar(stat = "identity")