Segmentation

Import library

Show/Hide Code
library("readr")
library("sf")
Warning: package 'sf' was built under R version 4.3.3
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Show/Hide Code
library("dplyr")

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
Show/Hide Code
library ("ggplot2")
library("SimilarityMeasures")
library("tibble")
library("knitr")

import data and filter animal #sabi

Show/Hide Code
wildschwein <- read_delim("wildschwein_BE_2056.csv")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show/Hide Code
wildschwein
# A tibble: 51,246 × 6
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2014-08-22 21:00:12 2570409. 1204752.
 2 002A   Sabi        12275 2014-08-22 21:15:16 2570402. 1204863.
 3 002A   Sabi        12275 2014-08-22 21:30:43 2570394. 1204826.
 4 002A   Sabi        12275 2014-08-22 21:46:07 2570379. 1204817.
 5 002A   Sabi        12275 2014-08-22 22:00:22 2570390. 1204818.
 6 002A   Sabi        12275 2014-08-22 22:15:10 2570390. 1204825.
 7 002A   Sabi        12275 2014-08-22 22:30:13 2570387. 1204831.
 8 002A   Sabi        12275 2014-08-22 22:45:11 2570381. 1204840.
 9 002A   Sabi        12275 2014-08-22 23:00:27 2570316. 1204935.
10 002A   Sabi        12275 2014-08-22 23:15:41 2570393. 1204815.
# ℹ 51,236 more rows
Show/Hide Code
# Careful! What Timezone is assumed? #

sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

ggplot(sabi) +
  geom_sf()+
  geom_path(aes(E,N)) #connects the dots, specify names of columns. 

##Step a) : specify a temporal window v Consistency accross dataset is important -> here every 15 minutes. To pack into a 60 minutes windows: pos[n-2] to pos[n] pos[n-1] to pos[n] pos[n] to pos[n+1] pos[n] to pos[n+2]

##Step b) : Measure the distance to every point within v

Show/Hide Code
distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}

sabi <- sabi |> # calculates distance b and saves it into dataset
  mutate(
    nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
    nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
    nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues, use of lead to offset in the other direction.
    nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )


sabi <- sabi |>
  rowwise()|>
  mutate (
    stepMean = mean(c(nMinus2,nMinus1,nPlus1,nPlus2))
) |> 
  ungroup()

sabi <- sabi |>
  mutate(
    static = stepMean < mean(stepMean, na.rm = TRUE) # everything above mean of stepMean is moving point, everything under mean is stopping point
  )

sabi_filter <- sabi |> 
  filter(!static)

sabi_filter |>
ggplot(aes(E,N)) + 
  geom_point(data = sabi, col = "red") + # red points are the removed points. Black should be the sabi_filter one
  geom_path () + 
  geom_point()

Show/Hide Code
  coord_fixed()
<ggproto object: Class CoordFixed, CoordCartesian, Coord, gg>
    aspect: function
    backtransform_range: function
    clip: on
    default: FALSE
    distance: function
    expand: TRUE
    is_free: function
    is_linear: function
    labels: function
    limits: list
    modify_scales: function
    range: function
    ratio: 1
    render_axis_h: function
    render_axis_v: function
    render_bg: function
    render_fg: function
    setup_data: function
    setup_layout: function
    setup_panel_guides: function
    setup_panel_params: function
    setup_params: function
    train_panel_guides: function
    transform: function
    super:  <ggproto object: Class CoordFixed, CoordCartesian, Coord, gg>
Show/Hide Code
#nice way to quickly clean up data.

1 Exercice A Segmentation

1.1 Task 1 : Calculate distances

Step a) Temporal window -> every two hours

pos[n-4] to pos[n] pos[n-3] to pos[n] pos[n-2] to pos[n] pos[n-1] to pos[n] pos[n] to pos[n+1] pos[n] to pos[n+2] pos[n] to pos[n+3] pos[n] to pos[n+4]

Step b) Measure distance to every point

1.1.1 Ruth

Show/Hide Code
# select ruth's data
ruth <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Ruth", 
      DatetimeUTC >= "2014-11-07", 
      DatetimeUTC < "2014-11-09"
      )

#plot
ggplot(ruth) +
  geom_sf()+
  geom_path(aes(E,N)) #connects the dots, specify names of columns. 

Show/Hide Code
# calculates distance
ruth <- ruth |> # calculates distance b and saves it into dataset
  mutate(
    nMinus8 = distance_by_element(lag(geometry, 8), geometry),
    nMinus7 = distance_by_element(lag(geometry, 7), geometry),
    nMinus6 = distance_by_element(lag(geometry, 6), geometry),
    nMinus5 = distance_by_element(lag(geometry, 5), geometry), 
    nMinus4 = distance_by_element(lag(geometry, 4), geometry),
    nMinus3 = distance_by_element(lag(geometry, 3), geometry),
    nMinus2 = distance_by_element(lag(geometry, 2), geometry),
    nMinus1 = distance_by_element(lag(geometry, 1), geometry), 
    nPlus1  = distance_by_element(geometry, lead(geometry, 1)), 
    nPlus2  = distance_by_element(geometry, lead(geometry, 2)),
    nPlus3  = distance_by_element(geometry, lead(geometry, 3)),
    nPlus4  = distance_by_element(geometry, lead(geometry, 4)),
    nPlus5  = distance_by_element(geometry, lead(geometry, 5)), 
    nPlus6  = distance_by_element(geometry, lead(geometry, 6)),
    nPlus7  = distance_by_element(geometry, lead(geometry, 7)),
    nPlus8  = distance_by_element(geometry, lead(geometry, 8))
    )

1.2 Task 2: Specify and apply threshold d

Threshold = decision point between moving and stopping data.

Show/Hide Code
# Mean of distance is threshold

ruth <- ruth|>
  rowwise()|>
  mutate (
    stepMean = mean(c(nMinus8,nMinus7,nMinus6,nMinus5,nMinus3,nMinus4,nMinus2,nMinus1,nPlus1,nPlus2,nPlus3,nPlus4,nPlus5,nPlus6,nPlus7,nPlus8))
) |> 
  ungroup()


ruth <- ruth|>
  mutate(
    static = stepMean < mean (stepMean, na.rm = TRUE)
  )
    #store information 

1.3 Task 3: visualized segemented trajectories

1.4 Task 4: Segment-based Analysis

Show/Hide Code
rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

ruth <- ruth |>
    mutate(segment_id = rle_id(static))

ruth_filter <- ruth |> 
  filter(!static)

ruth_filter |>
ggplot(aes(E,N)) + 
  geom_path (data = ruth, aes(E,N), col = "blue") +
  geom_point(data = ruth, col = "blue") + # red points are the removed points. Black should be the sabi_filter one 
  geom_point() +
  geom_path(data = ruth_filter, aes(E,N, col = segment_id)) +
  coord_fixed()

Show/Hide Code
#red points are probably the first and last points (beginning + not really moving)

#Todo : calculate the duration of each segment to decide if keep in of not segment 11. #NOTIZEN - Task 4: Segment-based Analysis

Show/Hide Code
ruth_filter
Simple feature collection with 20 features and 25 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2568218 ymin: 1205674 xmax: 2570097 ymax: 1206189
Projected CRS: CH1903+ / LV95
# A tibble: 20 × 26
   TierID TierName CollarID DatetimeUTC                E        N
 * <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 018A   Ruth        13974 2014-11-07 20:00:12 2569741. 1206132.
 2 018A   Ruth        13974 2014-11-07 20:15:09 2570071. 1206144.
 3 018A   Ruth        13974 2014-11-07 20:45:15 2570078. 1206075.
 4 018A   Ruth        13974 2014-11-07 21:00:44 2570067. 1206120.
 5 018A   Ruth        13974 2014-11-07 21:15:10 2570077. 1206139.
 6 018A   Ruth        13974 2014-11-07 21:30:14 2570097. 1206189.
 7 018A   Ruth        13974 2014-11-07 21:45:08 2569843. 1205935.
 8 018A   Ruth        13974 2014-11-07 22:00:13 2569470. 1206046.
 9 018A   Ruth        13974 2014-11-07 22:15:08 2568997. 1206018.
10 018A   Ruth        13974 2014-11-07 22:30:14 2568617. 1206021.
11 018A   Ruth        13974 2014-11-07 22:45:10 2568492. 1205940.
12 018A   Ruth        13974 2014-11-07 23:00:39 2568347. 1205857.
13 018A   Ruth        13974 2014-11-07 23:15:17 2568350. 1205859.
14 018A   Ruth        13974 2014-11-07 23:30:43 2568318. 1205752.
15 018A   Ruth        13974 2014-11-07 23:45:16 2568304. 1205745.
16 018A   Ruth        13974 2014-11-08 00:00:12 2568226. 1205674.
17 018A   Ruth        13974 2014-11-08 00:15:08 2568218. 1205682.
18 018A   Ruth        13974 2014-11-08 08:30:14 2568222. 1205683.
19 018A   Ruth        13974 2014-11-08 08:45:09 2568426. 1205863.
20 018A   Ruth        13974 2014-11-08 09:00:13 2568444. 1205902.
# ℹ 20 more variables: geometry <POINT [m]>, nMinus8 <dbl>, nMinus7 <dbl>,
#   nMinus6 <dbl>, nMinus5 <dbl>, nMinus4 <dbl>, nMinus3 <dbl>, nMinus2 <dbl>,
#   nMinus1 <dbl>, nPlus1 <dbl>, nPlus2 <dbl>, nPlus3 <dbl>, nPlus4 <dbl>,
#   nPlus5 <dbl>, nPlus6 <dbl>, nPlus7 <dbl>, nPlus8 <dbl>, stepMean <dbl>,
#   static <lgl>, segment_id <fct>
Show/Hide Code
# data set without movement point.
# remove segment that are not long enough to be considered as movement.
# For that, we assign ID to short segments. 
#only one segement -> can be due to long hours 

2 Exercice B Similarity

2.1 Task 1 : Similarity

Import data set

Show/Hide Code
pedestrian <- read_delim("pedestrian.csv")
Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Show/Hide Code
#create coordinates

pedestrian <- pedestrian|>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE)

pedestrian <- pedestrian |> select(-E, -N)

# visualise
pedestrian |>
  ggplot() +
  geom_sf() +
  facet_wrap(~ TrajID)

All trajectories show a V-shape; meaning pedestrians turn at a similar point. Pedestrian 4 has however an other starting point. The graphic shows different walking patterns in regards to the velocity: Ped1: fast at the beginning; slow after, Ped2 slow at the beginning, fast after, Ped 3 regular walking spead; Ped 4; regular walking speed;

Question : which of these trajectories are similar? TrajID 1 seems to be a slow trajectory. TrajID 2 seems to be a bit faster TrajID 3 seems to be rather regular. TrajId 4 starts where else. TrajID 5 person is very fast, then slow. Person seems to have gone to the right. TrajID 6 - similar to 5.

2.2 Task 2 : Calculate similarity

Show/Hide Code
help(package = "SimilarityMeasures") 

Which two trajectories to you percieve to be most similar, which are most dissimilar? Now visualize the results from the computed similarity measures. Which measure reflects your own intuition the closest?

Hypotheses 1 : 1 is most similar to 6. Hypotheses 2: 1 is most dissimilar to 3 and 4.

3 Run Dynamic Time Warping Algorithm and compare paths with DTW

Show/Hide Code
# use of generative A.I. to perform computation to extract trajectories into Matrix

get_trajectory <- function(pedestrian, traj_id) {
  pedestrian |>
    filter(TrajID == traj_id) %>%
    st_coordinates()
}

traj1 <- get_trajectory(pedestrian, 1)
traj2 <- get_trajectory(pedestrian, 2)
traj3 <- get_trajectory(pedestrian, 3)
traj4 <- get_trajectory(pedestrian, 4)
traj5 <- get_trajectory(pedestrian, 5)
traj6 <- get_trajectory(pedestrian, 6)

# Calculate DTW and create data frame

dtw_results <- data.frame(
  Pair = c("traj1-traj2", "traj1-traj3", "traj1-traj4", "traj1-traj5", "traj1-traj6"),
  DTW = c(DTW(traj1, traj2, 4),
               DTW(traj1, traj3, 4),
               DTW(traj1, traj4, 4),
               DTW(traj1, traj5, 4),
               DTW(traj1, traj6, 4))
)

# create table
kable(dtw_results)
Pair DTW
traj1-traj2 30298.975
traj1-traj3 50785.511
traj1-traj4 9377.846
traj1-traj5 -1.000
traj1-traj6 1152.718
Show/Hide Code
#visualise
dtw_results |>
  ggplot(aes(x = Pair, y = DTW)) +
  geom_bar(stat = "identity") +
  labs(title = "DTW comparaison")

Hypothese 1 : confirmed - 1 is most similar to 5 and 6. Hypothese 2: no confirmed. 1 is most disimlar to traj 3 and 2. Traj 4 and one are not so dissimilar.

–> 1, 5, 6 have similar walking path. –> 1’s path is much different from 2’s and 3’s path.

! Difference of value for DTW traj 1-5 comparing to solution

4 Run Frecht and compare paths according to it.

Show/Hide Code
# Calculate Frechet and create table with Frechet results

frechet_results <- data.frame(
  Pair = c("traj1-traj2", "traj1-traj3", "traj1-traj4", "traj1-traj5", "traj1-traj6"),
  frechet = c(Frechet(traj1,traj2),
              Frechet(traj1,traj3),
              Frechet(traj1,traj4),
              Frechet(traj1,traj5),
              Frechet(traj1,traj6))
) 

# create table
kable(frechet_results)
Pair frechet
traj1-traj2 28.54075
traj1-traj3 2307.84366
traj1-traj4 1069.22917
traj1-traj5 717.98159
traj1-traj6 38.96272
Show/Hide Code
#visualise
frechet_results |>
  ggplot(aes(x = Pair, y = frechet)) +
  geom_bar(stat = "identity") +
  labs(title = "Frechet comparaison")

Intepretation: traj 1 is according to frechet the most similar to traj 2 and 6 and least similar to traj3-5.

Results seem different than the solution, howcome?