cma_week4_rexercise

Author

simonbehringer

library("readr")
library("sf")

Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE

library("dplyr")


Attache Paket: 'dplyr'

Die folgenden Objekte sind maskiert von 'package:stats':

    filter, lag

Die folgenden Objekte sind maskiert von 'package:base':

    intersect, setdiff, setequal, union

library("ggplot2")

wildschwein <- read_delim("Datasets-20250219/wildschwein_BE_2056.csv", ",")

Rows: 51246 Columns: 6

── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (2): TierID, TierName
dbl  (3): CollarID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Careful! What Timezone is assumed?
sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

ggplot(sabi)+
  geom_sf()+
  geom_path(aes(x=E,y=N))

Example

a) Specify a temporal window v

pos[n-2] to pos[n]
pos[n-1] to pos[n]
pos[n] to pos[n+1
pos[n] to pos[n+2]

b) Measure the distance to every point within v

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )

sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

sabi

Simple feature collection with 192 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: CH1903+ / LV95
# A tibble: 192 × 12
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2015-06-30 22:00:13 2569972. 1205366.
 2 002A   Sabi        12275 2015-06-30 22:16:06 2569975. 1205637.
 3 002A   Sabi        12275 2015-06-30 22:30:19 2570266. 1205857.
 4 002A   Sabi        12275 2015-06-30 22:45:13 2570208. 1205913.
 5 002A   Sabi        12275 2015-06-30 23:00:10 2570247. 1205731.
 6 002A   Sabi        12275 2015-06-30 23:15:17 2570512. 1205279.
 7 002A   Sabi        12275 2015-06-30 23:30:38 2570684. 1205103.
 8 002A   Sabi        12275 2015-06-30 23:45:16 2570526. 1205051.
 9 002A   Sabi        12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A   Sabi        12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 6 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>

c) Remove “static points”

sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A

another wildboar

wildschwein |> 
  distinct(TierName)

# A tibble: 3 × 1
  TierName
  <chr>   
1 Sabi    
2 Rosa    
3 Ruth

rosa <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Rosa", 
      DatetimeUTC >= "2015-04-01", 
      DatetimeUTC < "2015-04-05"
      )

Task 1 Calculate Distances

specifiy temporal window

pos[n-2] to pos[n]
pos[n-1] to pos[n]
pos[n] to pos[n+1
pos[n] to pos[n+2]

Measure the distance to every point within v

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


rosa <- rosa |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )

rosa <- rosa |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

rosa

Simple feature collection with 384 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2570350 ymin: 1204328 xmax: 2571001 ymax: 1205636
Projected CRS: CH1903+ / LV95
# A tibble: 384 × 12
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 016A   Rosa        13972 2015-03-31 22:00:18 2570778. 1204985.
 2 016A   Rosa        13972 2015-03-31 22:15:16 2570795. 1204957.
 3 016A   Rosa        13972 2015-03-31 22:30:34 2570797. 1204928.
 4 016A   Rosa        13972 2015-03-31 22:45:13 2570810. 1204914.
 5 016A   Rosa        13972 2015-03-31 23:00:10 2570850. 1204876.
 6 016A   Rosa        13972 2015-03-31 23:15:19 2570844. 1204864.
 7 016A   Rosa        13972 2015-03-31 23:30:23 2570851. 1204853.
 8 016A   Rosa        13972 2015-03-31 23:45:14 2570861. 1204843.
 9 016A   Rosa        13972 2015-04-01 00:00:10 2570823. 1204800.
10 016A   Rosa        13972 2015-04-01 00:15:14 2570831. 1204794.
# ℹ 374 more rows
# ℹ 6 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>

Task 2 Specify and apply threshold d

rosa <- rosa |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

rosa_filter <- rosa |>
    filter(!static)

Task 3 Visualize segmented trajectories

rosa_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = rosa, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Task 4 Segment-based analysis

entspricht dem Schritt d) in Figure 16.1 (Input: Segmentation)

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}

Segment ID zuweisen

rosa_segments <- rosa |>
    mutate(segment_id = rle_id(static))

nach Segment ID darstellen

rosa_segments |>
    ggplot(aes(E, N)) +
    geom_path(aes(colour = segment_id)) +
    geom_point(aes(colour = segment_id)) +
    geom_point(data=rosa_segments|>filter(static),color="black")+
    coord_fixed() +
    theme(legend.position = "none")

Jetzt wollen wir nur Segmente einer bestimmten Dauer einbeziehen (z.B. mehr als 1000 Sekungen). Dazu berechnen wir die Segment-Dauer und filtern nach der Segmentdauer.

rosa_sum <- rosa_segments |> 
  group_by(segment_id) |> 
  summarise(segment_duration = last(DatetimeUTC) - first(DatetimeUTC)) |> 
  filter(segment_duration > 1000)

Darstellen der neuen gefilterten Segmente

rosa_segments |>
  filter(segment_id %in% rosa_sum$segment_id) |> 
    ggplot(aes(E, N)) +
    geom_path(aes(colour = segment_id)) +
    geom_point(aes(colour = segment_id))+
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise B

Task 1

pedestrian <- read_delim("Datasets-20250219/pedestrian.csv", ",")

Rows: 289 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl  (3): TrajID, E, N
dttm (1): DatetimeUTC

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

pedestrian <- pedestrian |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE)

ggplot(pedestrian)+
  geom_sf()+
  geom_path(aes(x=E,y=N))+
  facet_wrap(~TrajID)

library(ggplot2)

ggplot(pedestrian, aes(x = E, y = N, group = TrajID)) +
  geom_path(aes(color = factor(TrajID)), size = 1.2) +  # Farbige Pfade pro Trajektorie
  geom_point(aes(color = factor(TrajID)), size = 2) +   # Punkte für Messwerte
  facet_wrap(~TrajID) +  # Facettierte Darstellung nach Trajektorien-ID
  theme_minimal() +  
  theme(legend.position = "none")  # Legende entfernen, da jede Facette nur eine Trajektorie zeigt

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

most similiar: 1 & 6

most dissimilar: 1/2 oder 2/6

Task 2

Lösung mit ChatGPT und Codes von kinmar

#install.packages("SimilarityMeasures")
library(SimilarityMeasures)

trajectories  <- pedestrian |> 
  group_by(TrajID) |> 
  summarise(geometry = list(matrix(c(E, N), ncol = 2, byrow = FALSE))) |> 
  ungroup() |> 
  pull(geometry)

results <- tibble(TrajID = 2:6) |> 
  rowwise() |> 
  mutate(
    DTW = DTW(trajectories[[1]], trajectories[[TrajID]]),
    EditDist = EditDist(trajectories[[1]], trajectories[[TrajID]]),
    Frechet = Frechet(trajectories[[1]], trajectories[[TrajID]])
#    LCSS = LCSS(trajectories[[1]], trajectories[[TrajID]])
  ) |> 
  ungroup() |> 
  mutate(
    TrajID = as.factor(TrajID)
  )

library(tidyr)

results |> 
  pivot_longer(cols = c(DTW, EditDist, Frechet), names_to = "Method", values_to = "Similarity") |> 
  ggplot(aes(TrajID, y = Similarity, fill = TrajID)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_brewer(palette = "Greens") +
  theme_minimal() +
  facet_wrap(.~Method, scales="free")+
  labs(x = "Verglichene Trajektorie", y = "Ähnlichkeitswert")