Week_4

Input

library("readr")
library("sf")
Linking to GEOS 3.12.2, GDAL 3.9.3, PROJ 9.4.1; sf_use_s2() is TRUE
library("dplyr")

Attache Paket: 'dplyr'
Die folgenden Objekte sind maskiert von 'package:stats':

    filter, lag
Die folgenden Objekte sind maskiert von 'package:base':

    intersect, setdiff, setequal, union
library("ggplot2")
wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Careful! What Timezone is assumed?
sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03")
distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE))}

sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
        )
sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()
sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A

wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")
Rows: 51246 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Careful! What Timezone is assumed?
Rosa <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Rosa", 
      DatetimeUTC >= "2015-03-26", 
      DatetimeUTC < "2015-03-27")
distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE))}

Rosa <- Rosa |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
        )
Rosa <- Rosa |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

ggplot(Rosa, aes(y = stepMean)) +
  geom_boxplot() #stepMean of 30 seams to be a fitting threshold
Warning: Removed 4 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Rosa <- Rosa |>
    mutate(static_1 = stepMean < mean(stepMean, na.rm = TRUE)) |>
  mutate(static_2 = stepMean < 50)

Rosa_filter_1 <- Rosa |> #Threshold 1
    filter(!static_1)

Rosa_filter_2 <- Rosa |> #Threshold 2
    filter(!static_2)

p1 <- Rosa_filter_1 |>
    ggplot(aes(E, N)) +
    geom_point(data = Rosa, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

p2 <- Rosa_filter_2 |>
    ggplot(aes(E, N)) +
    geom_point(data = Rosa, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

library(patchwork)
p1 + p2 #Differences between 2 thresholds 

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))}

Rosa_filter <- Rosa_filter_1 |>
    mutate(segment_id = rle_id(static_1))

ggplot(data = Rosa_filter, aes(E, N)) +
  geom_point(data = Rosa, col = "red") +
  geom_path() + #path without color, to connect different segments
  geom_path(aes(color = segment_id)) + #color for the path
  geom_point(aes(color = segment_id)) + 
  coord_fixed() +
  theme(legend.position = "bottom")

Exercise B

pedestrian <- read_delim("data/pedestrian.csv")
Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pedestrian |>
    ggplot(aes(E, N)) +
    facet_wrap(~TrajID) +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

#Create marix for each trajectory/pedestrian (needed for analysis)
pedestrian_1 <- pedestrian |>
  filter(TrajID == 1) |>
  select(E, N) |>
  as.matrix()
pedestrian_2 <- pedestrian |>
  filter(TrajID == 2) |>
  select(E, N) |>
  as.matrix()
pedestrian_3 <- pedestrian |>
  filter(TrajID == 3) |>
  select(E, N) |>
  as.matrix()
pedestrian_4 <- pedestrian |>
  filter(TrajID == 4) |>
  select(E, N) |>
  as.matrix()
pedestrian_5 <- pedestrian |>
  filter(TrajID == 5) |>
  select(E, N) |>
  as.matrix()
pedestrian_6 <- pedestrian |>
  filter(TrajID == 6) |>
  select(E, N) |>
  as.matrix()
library("SimilarityMeasures")
#Compare each pedestrian with all other pedestrians 
Compare_1 <- c(DTW(pedestrian_1, pedestrian_1),
                DTW(pedestrian_1, pedestrian_2),
                DTW(pedestrian_1, pedestrian_3),
                DTW(pedestrian_1, pedestrian_4),
                DTW(pedestrian_1, pedestrian_5),
                DTW(pedestrian_1, pedestrian_6))
Compare_2 <- c(DTW(pedestrian_2, pedestrian_1),
                DTW(pedestrian_2, pedestrian_2),
                DTW(pedestrian_2, pedestrian_3),
                DTW(pedestrian_2, pedestrian_4),
                DTW(pedestrian_2, pedestrian_5),
                DTW(pedestrian_2, pedestrian_6))
Compare_3 <- c(DTW(pedestrian_3, pedestrian_1),
                DTW(pedestrian_3, pedestrian_2),
                DTW(pedestrian_3, pedestrian_3),
                DTW(pedestrian_3, pedestrian_4),
                DTW(pedestrian_3, pedestrian_5),
                DTW(pedestrian_3, pedestrian_6))
Compare_4 <- c(DTW(pedestrian_4, pedestrian_1),
                DTW(pedestrian_4, pedestrian_2),
                DTW(pedestrian_4, pedestrian_3),
                DTW(pedestrian_4, pedestrian_4),
                DTW(pedestrian_4, pedestrian_5),
                DTW(pedestrian_4, pedestrian_6))
Compare_5 <- c(DTW(pedestrian_5, pedestrian_1),
                DTW(pedestrian_5, pedestrian_2),
                DTW(pedestrian_5, pedestrian_3),
                DTW(pedestrian_5, pedestrian_4),
                DTW(pedestrian_5, pedestrian_5),
                DTW(pedestrian_5, pedestrian_6))
Compare_6 <- c(DTW(pedestrian_6, pedestrian_1),
                DTW(pedestrian_6, pedestrian_2),
                DTW(pedestrian_6, pedestrian_3),
                DTW(pedestrian_6, pedestrian_4),
                DTW(pedestrian_6, pedestrian_5),
                DTW(pedestrian_6, pedestrian_6))

#Create a matrix with all values
DTW_matrix <- data.frame(pedestrian_1 = Compare_1, pedestrian_2 = Compare_2, pedestrian_3 = Compare_3, pedestrian_4 = Compare_4, pedestrian_5 = Compare_5, pedestrian_6 = Compare_6)
rownames(DTW_matrix) <- c("pedestrian_1", "pedestrian_2", "pedestrian_3", "pedestrian_4", "pedestrian_5", "pedestrian_6")
DTW_matrix[DTW_matrix == 0] <- NA

#Look for most similar and least similar trajectories
min(DTW_matrix, na.rm = TRUE) #1152.718 is the lowest value
[1] 1152.718
# pedestrian_1 and 6 are the most similar ones!
max(DTW_matrix, na.rm = TRUE) #57604.43 is the highest value
[1] 57604.43
# pedestrian_2 and 3 are the least similar ones!

#Same comparisons can be performed with the other functions
EditDist(pedestrian_1, pedestrian_2)
[1] 45
Frechet(pedestrian_1, pedestrian_2)
         
28.54075 
LCSS(pedestrian_1, pedestrian_5, 2, 2, 0.5)
[1] 3