Segmenting wildboar trajectories

Import dataset

library(readr)
Warning: Paket 'readr' wurde unter R Version 4.5.3 erstellt
wildschwein <- read_delim("wildschwein_BE_2056.csv") #read_delim assumes UTCtime
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(sf)
Warning: Paket 'sf' wurde unter R Version 4.5.3 erstellt
Linking to GEOS 3.14.1, GDAL 3.12.1, PROJ 9.7.1; sf_use_s2() is TRUE
wildschwein_sf <- st_as_sf(wildschwein, coords= c("E", "N"), crs=2058, remove=FALSE) #to get geometry from coords, but still let coords appear

library(dplyr)

Attache Paket: 'dplyr'

Die folgenden Objekte sind maskiert von 'package:stats':

    filter, lag

Die folgenden Objekte sind maskiert von 'package:base':

    intersect, setdiff, setequal, union
sabi <- filter(wildschwein_sf, TierName=="Sabi", DatetimeUTC > "2015-07-01", DatetimeUTC < "2015-07-03")

library(ggplot2)
Warning: Paket 'ggplot2' wurde unter R Version 4.5.3 erstellt
ggplot(sabi) + geom_sf() + geom_path(aes(E, N)) #specify at the end that axis are E and N, geom_path respects temporal order of data

Step a): Specifiy a temporal window v

#calculate 4 distances for each point

v is 60 minutes, our sampling interval is 15 min. So we need to calculate the following distances.

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE) #by_element=True, because otherwise all distances between all points are calculated, and not only between later and now
  )
}


sabi <- sabi |>
  mutate(nMinus2=distance_by_element(lag(geometry, n=2), geometry),
         nMinus1=distance_by_element(lag(geometry, n=1), geometry),
         nPlus1=distance_by_element(geometry, lead(geometry, n=1)),
         nPlus2=distance_by_element(geometry, lead(geometry, n=2)))
sabi <- sabi |>
  rowwise() |> #mean for each row,
  mutate(
    stepMean=mean(c(nMinus2, nMinus1, nPlus1, nPlus2), na.rn=FALSE) #mean of moving window of one point
  ) 

Step C) remove static points$

#first, specify treshold for asimple approach, wel use mean value as treshold value

#find means that are lowed, to be removed (non-movement)

treshold <- mean(sabi$stepMean, na.rm = TRUE)

sabi <- sabi |>
  mutate(
    static=stepMean < treshold
  )|> ungroup()
sabi
Simple feature collection with 192 features and 12 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: ED50(ED77) / UTM zone 38N
# A tibble: 192 × 13
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2015-06-30 22:00:13 2569972. 1205366.
 2 002A   Sabi        12275 2015-06-30 22:16:06 2569975. 1205637.
 3 002A   Sabi        12275 2015-06-30 22:30:19 2570266. 1205857.
 4 002A   Sabi        12275 2015-06-30 22:45:13 2570208. 1205913.
 5 002A   Sabi        12275 2015-06-30 23:00:10 2570247. 1205731.
 6 002A   Sabi        12275 2015-06-30 23:15:17 2570512. 1205279.
 7 002A   Sabi        12275 2015-06-30 23:30:38 2570684. 1205103.
 8 002A   Sabi        12275 2015-06-30 23:45:16 2570526. 1205051.
 9 002A   Sabi        12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A   Sabi        12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 7 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>, static <lgl>
ggplot(sabi) +
  geom_path(aes(E,N))+
  geom_sf(aes(color=static))

sabi_move <- sabi |>
  filter(!static)

Exercise A: Segmentation (The following analysis was supported by the use of AI)

wildschwein <- read_delim("wildschwein_BE_2056.csv") #read_delim assumes UTCtime
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(sf)

wildschwein_sf <- st_as_sf(wildschwein, coords= c("E", "N"), crs=2058, remove=FALSE) #to get geometry from coords, but still let coords appear

library(dplyr)
Ruth <- filter(wildschwein_sf, TierName=="Ruth", DatetimeUTC > "2015-07-01", DatetimeUTC < "2015-07-03")

library(ggplot2)

ggplot(Ruth) + geom_sf() + geom_path(aes(E, N)) #specify at the end that axis are E and N, geom_path respects temporal order of data

Step a): Specifiy a temporal window v

#calculate 4 distances for each point

v is 60 minutes, our sampling interval is 15 min. So we need to calculate the following distances.

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE) #by_element=True, because otherwise all distances between all points are calculated, and not only between later and now
  )
}


Ruth <- Ruth |>
  mutate(nMinus2=distance_by_element(lag(geometry, n=2), geometry),
         nMinus1=distance_by_element(lag(geometry, n=1), geometry),
         nPlus1=distance_by_element(geometry, lead(geometry, n=1)),
         nPlus2=distance_by_element(geometry, lead(geometry, n=2)))
Ruth <- Ruth |>
  rowwise() |> #mean for each row,
  mutate(
    stepMean=mean(c(nMinus2, nMinus1, nPlus1, nPlus2), na.rn=FALSE) #mean of moving window of one point
  ) 

Explore

summary(Ruth$stepMean)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
  1.677   5.898  20.312  82.202  90.998 760.357       4 
hist(Ruth$stepMean)

threshold <- mean(Ruth$stepMean, na.rm = TRUE)

Ruth <- Ruth |>
  mutate(
    static = stepMean <= threshold
  )
ggplot(Ruth) +
  geom_path(aes(E, N, color = static, group = 1)) +
  geom_point(aes(E, N)) #specify at the end that axis are E and N, geom_path respects temporal order of data

Define id

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}
Ruth <- Ruth |>
  mutate(segment_id = rle_id(static))
ggplot(Ruth) +
  geom_path(aes(E, N, group = segment_id, color = segment_id)) +
  geom_point(aes(E, N)) +
  theme_minimal()

For row-based data

segment_info <- Ruth |>
  group_by(segment_id) |>
  summarise(
    duration = n(),
    .groups = "drop"
  )
valid_segments <- segment_info |>
  filter(duration >= 5)

Ruth_filtered <- Ruth |>
  mutate(duration = segment_info$duration[match(segment_id, segment_info$segment_id)]) |>
  filter(duration >= 5)

Task B, Similarity measures (supported by use of AI)

library(readr)

pedestrian <- read_delim("pedestrian.csv")
Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore trajectories

library(ggplot2)
library(dplyr)

pedestrian <- pedestrian |>
  arrange(TrajID, DatetimeUTC)

ggplot(pedestrian, aes(x=E, y=N, group=TrajID, color=factor(TrajID))) +
  geom_path() +
  geom_point(size=0.5) +
  theme_minimal()

help(package = "SimilarityMeasures")
library(SimilarityMeasures)
Warning: Paket 'SimilarityMeasures' wurde unter R Version 4.5.2 erstellt
trajs <- lapply(1:6, function(i) {
  as.matrix(pedestrian[pedestrian$TrajID == i, c("E", "N")])
})
dtw_vals <- sapply(2:6, function(i) DTW(trajs[[1]], trajs[[i]]))
edit_vals <- sapply(2:6, function(i) EditDist(trajs[[1]], trajs[[i]]))
frechet_vals <- sapply(2:6, function(i) {
  Frechet(trajs[[1]], trajs[[i]])
})
lcss_vals <- sapply(2:6, function(i) {
  LCSS(
    trajs[[1]],
    trajs[[i]],
    pointSpacing = 5,
    pointDistance = 2,
    errorMarg = 2
  )
})
results <- data.frame(
  Traj = 2:6,
  DTW = dtw_vals,
  EditDist = edit_vals,
  Frechet = frechet_vals,
  LCSS = lcss_vals
)
barplot(
  t(as.matrix(results[, c("DTW", "EditDist", "Frechet", "LCSS")])),
  beside = TRUE,
  names.arg = results$Traj,
  col = c("steelblue", "tomato", "darkgreen", "purple"),
  legend.text = c("DTW", "EditDist", "Frechet", "LCSS"),
  args.legend = list(x = "topright"),
  main = "Trajectory Distances Comparison",
  xlab = "Trajectory ID",
  ylab = "Distance"
)