Segmentation

Import library

library("readr")
library("sf")

Warning: package 'sf' was built under R version 4.3.3

Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE

library("dplyr")


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library ("ggplot2")
library("SimilarityMeasures")

import data and filter animal #sabi

wildschwein <- read_delim("wildschwein_BE_2056.csv")

Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

wildschwein

# A tibble: 51,246 × 6
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2014-08-22 21:00:12 2570409. 1204752.
 2 002A   Sabi        12275 2014-08-22 21:15:16 2570402. 1204863.
 3 002A   Sabi        12275 2014-08-22 21:30:43 2570394. 1204826.
 4 002A   Sabi        12275 2014-08-22 21:46:07 2570379. 1204817.
 5 002A   Sabi        12275 2014-08-22 22:00:22 2570390. 1204818.
 6 002A   Sabi        12275 2014-08-22 22:15:10 2570390. 1204825.
 7 002A   Sabi        12275 2014-08-22 22:30:13 2570387. 1204831.
 8 002A   Sabi        12275 2014-08-22 22:45:11 2570381. 1204840.
 9 002A   Sabi        12275 2014-08-22 23:00:27 2570316. 1204935.
10 002A   Sabi        12275 2014-08-22 23:15:41 2570393. 1204815.
# ℹ 51,236 more rows

# Careful! What Timezone is assumed? #

sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

ggplot(sabi) +
  geom_sf()+
  geom_path(aes(E,N)) #connects the dots, specify names of columns.

##Step a) : specify a temporal window v Consistency accross dataset is important -> here every 15 minutes. To pack into a 60 minutes windows: pos[n-2] to pos[n] pos[n-1] to pos[n] pos[n] to pos[n+1] pos[n] to pos[n+2]

##Step b) : Measure the distance to every point within v

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}

sabi <- sabi |> # calculates distance b and saves it into dataset
  mutate(
    nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
    nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
    nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues, use of lead to offset in the other direction.
    nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )


sabi <- sabi |>
  rowwise()|>
  mutate (
    stepMean = mean(c(nMinus2,nMinus1,nPlus1,nPlus2))
) |> 
  ungroup()

sabi <- sabi |>
  mutate(
    static = stepMean < mean(stepMean, na.rm = TRUE) # everything above mean of stepMean is moving point, everything under mean is stopping point
  )

sabi_filter <- sabi |> 
  filter(!static)

sabi_filter |>
ggplot(aes(E,N)) + 
  geom_point(data = sabi, col = "red") + # red points are the removed points. Black should be the sabi_filter one
  geom_path () + 
  geom_point()

  coord_fixed()

<ggproto object: Class CoordFixed, CoordCartesian, Coord, gg>
    aspect: function
    backtransform_range: function
    clip: on
    default: FALSE
    distance: function
    expand: TRUE
    is_free: function
    is_linear: function
    labels: function
    limits: list
    modify_scales: function
    range: function
    ratio: 1
    render_axis_h: function
    render_axis_v: function
    render_bg: function
    render_fg: function
    setup_data: function
    setup_layout: function
    setup_panel_guides: function
    setup_panel_params: function
    setup_params: function
    train_panel_guides: function
    transform: function
    super:  <ggproto object: Class CoordFixed, CoordCartesian, Coord, gg>

#nice way to quickly clean up data.

Exercice A Segmentation

Task 1 : Calculate distances

Step a) Temporal window -> every two hours

pos[n-4] to pos[n] pos[n-3] to pos[n] pos[n-2] to pos[n] pos[n-1] to pos[n] pos[n] to pos[n+1] pos[n] to pos[n+2] pos[n] to pos[n+3] pos[n] to pos[n+4]

Step b) Measure distance to every point

Ruth

# select ruth's data
ruth <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Ruth", 
      DatetimeUTC >= "2014-11-07", 
      DatetimeUTC < "2014-11-09"
      )

#plot
ggplot(ruth) +
  geom_sf()+
  geom_path(aes(E,N)) #connects the dots, specify names of columns.

# calculates distance
ruth <- ruth |> # calculates distance b and saves it into dataset
  mutate(
    nMinus8 = distance_by_element(lag(geometry, 8), geometry),
    nMinus7 = distance_by_element(lag(geometry, 7), geometry),
    nMinus6 = distance_by_element(lag(geometry, 6), geometry),
    nMinus5 = distance_by_element(lag(geometry, 5), geometry), 
    nMinus4 = distance_by_element(lag(geometry, 4), geometry),
    nMinus3 = distance_by_element(lag(geometry, 3), geometry),
    nMinus2 = distance_by_element(lag(geometry, 2), geometry),
    nMinus1 = distance_by_element(lag(geometry, 1), geometry), 
    nPlus1  = distance_by_element(geometry, lead(geometry, 1)), 
    nPlus2  = distance_by_element(geometry, lead(geometry, 2)),
    nPlus3  = distance_by_element(geometry, lead(geometry, 3)),
    nPlus4  = distance_by_element(geometry, lead(geometry, 4)),
    nPlus5  = distance_by_element(geometry, lead(geometry, 5)), 
    nPlus6  = distance_by_element(geometry, lead(geometry, 6)),
    nPlus7  = distance_by_element(geometry, lead(geometry, 7)),
    nPlus8  = distance_by_element(geometry, lead(geometry, 8))
    )

Task 2: Specify and apply threshold d

Threshold = decision point between moving and stopping data.

# Mean of distance is threshold

ruth <- ruth|>
  rowwise()|>
  mutate (
    stepMean = mean(c(nMinus8,nMinus7,nMinus6,nMinus5,nMinus3,nMinus4,nMinus2,nMinus1,nPlus1,nPlus2,nPlus3,nPlus4,nPlus5,nPlus6,nPlus7,nPlus8))
) |> 
  ungroup()


ruth <- ruth|>
  mutate(
    static = stepMean < mean (stepMean, na.rm = TRUE)
  )
    #store information

Task 3: visualized segemented trajectories

Task 4: Segment-based Analysis

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

ruth <- ruth |>
    mutate(segment_id = rle_id(static))

ruth_filter <- ruth |> 
  filter(!static)

ruth_filter |>
ggplot(aes(E,N)) + 
  geom_path (data = ruth, aes(E,N), col = "blue") +
  geom_point(data = ruth, col = "blue") + # red points are the removed points. Black should be the sabi_filter one 
  geom_point() +
  geom_path(data = ruth_filter, aes(E,N, col = segment_id)) +
  coord_fixed()

#red points are probably the first and last points (beginning + not really moving)

#Todo : calculate the duration of each segment to decide if keep in of not segment 11. #NOTIZEN - Task 4: Segment-based Analysis

ruth_filter

Simple feature collection with 20 features and 25 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2568218 ymin: 1205674 xmax: 2570097 ymax: 1206189
Projected CRS: CH1903+ / LV95
# A tibble: 20 × 26
   TierID TierName CollarID DatetimeUTC                E        N
 * <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 018A   Ruth        13974 2014-11-07 20:00:12 2569741. 1206132.
 2 018A   Ruth        13974 2014-11-07 20:15:09 2570071. 1206144.
 3 018A   Ruth        13974 2014-11-07 20:45:15 2570078. 1206075.
 4 018A   Ruth        13974 2014-11-07 21:00:44 2570067. 1206120.
 5 018A   Ruth        13974 2014-11-07 21:15:10 2570077. 1206139.
 6 018A   Ruth        13974 2014-11-07 21:30:14 2570097. 1206189.
 7 018A   Ruth        13974 2014-11-07 21:45:08 2569843. 1205935.
 8 018A   Ruth        13974 2014-11-07 22:00:13 2569470. 1206046.
 9 018A   Ruth        13974 2014-11-07 22:15:08 2568997. 1206018.
10 018A   Ruth        13974 2014-11-07 22:30:14 2568617. 1206021.
11 018A   Ruth        13974 2014-11-07 22:45:10 2568492. 1205940.
12 018A   Ruth        13974 2014-11-07 23:00:39 2568347. 1205857.
13 018A   Ruth        13974 2014-11-07 23:15:17 2568350. 1205859.
14 018A   Ruth        13974 2014-11-07 23:30:43 2568318. 1205752.
15 018A   Ruth        13974 2014-11-07 23:45:16 2568304. 1205745.
16 018A   Ruth        13974 2014-11-08 00:00:12 2568226. 1205674.
17 018A   Ruth        13974 2014-11-08 00:15:08 2568218. 1205682.
18 018A   Ruth        13974 2014-11-08 08:30:14 2568222. 1205683.
19 018A   Ruth        13974 2014-11-08 08:45:09 2568426. 1205863.
20 018A   Ruth        13974 2014-11-08 09:00:13 2568444. 1205902.
# ℹ 20 more variables: geometry <POINT [m]>, nMinus8 <dbl>, nMinus7 <dbl>,
#   nMinus6 <dbl>, nMinus5 <dbl>, nMinus4 <dbl>, nMinus3 <dbl>, nMinus2 <dbl>,
#   nMinus1 <dbl>, nPlus1 <dbl>, nPlus2 <dbl>, nPlus3 <dbl>, nPlus4 <dbl>,
#   nPlus5 <dbl>, nPlus6 <dbl>, nPlus7 <dbl>, nPlus8 <dbl>, stepMean <dbl>,
#   static <lgl>, segment_id <fct>

# data set without movement point.
# remove segment that are not long enough to be considered as movement.
# For that, we assign ID to short segments. 
#only one segement -> can be due to long hours

Exercice B Similarity

Task 1 : Similarity

Import data set

pedestrian <- read_delim("pedestrian.csv")

Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#create coordinates
pedestrian <- pedestrian|>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE)

# visualise
pedestrian |>
  ggplot(aes(E,N)) +
  geom_point() +
  facet_wrap(~ TrajID)

All trajectories show a V-shape; meaning pedestrians turn at a similar point. Pedestrian 4 has however an other starting point. The graphic shows different walking patterns in regards to the velocity: Ped1: fast at the beginning; slow after, Ped2 slow at the beginning, fast after, Ped 3 regular walking spead; Ped 4; regular walking speed;

Question : which of these trajectories are similar? TrajID 1 seems to be a slow trajectory. TrajID 2 seems to be a bit faster TrajID 3 seems to be rather regular. TrajId 4 starts where else. TrajID 5 person is very fast, then slow. Person seems to have gone to the right. TrajID 6 - similar to 5.

Task 2 : Calculate similarity

help(package = "SimilarityMeasures")

Which two trajectories to you percieve to be most similar, which are most dissimilar? Now visualize the results from the computed similarity measures. Which measure reflects your own intuition the closest?

1 is most similar to 6. And one is most disimmlar to 3 and 4.

Run Dynamic Time Warping Algorithm

#Segment
pedestrian <- pedestrian %>%
  mutate(E = as.numeric(E),
         N = as.numeric(N))

pedestrian <- pedestrian |>
    mutate(segment_id = rle_id(TrajID))

# use of generative A.I. to extract trajectories into Matrix

get_trajectory <- function(pedestrian, traj_id) {
  subset <- pedestrian[pedestrian$TrajID == traj_id, c("E", "N")]
  return(as.matrix(subset))  # Convert to matrix
}

traj1 <- get_trajectory(pedestrian, 1)
traj2 <- get_trajectory(pedestrian, 2)
traj3 <- get_trajectory(pedestrian, 3)
traj4 <- get_trajectory(pedestrian, 4)
traj5 <- get_trajectory(pedestrian, 5)
traj6 <- get_trajectory(pedestrian, 6)