Geo 880 Exercise 4

Author

Jan Krummenacher

Exercise 4

Preparation

library("readr")
library("ggplot2")

wildschwein <- read_delim("Data/wildschwein_BE_2056.csv", ",")


library("sf")
library("dplyr")

# Careful! What Timezone is assumed?
sabi <- wildschwein |>
    st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
    filter(
      TierName == "Sabi", 
      DatetimeUTC >= "2015-07-01", 
      DatetimeUTC < "2015-07-03"
      )

Plot my data

ggplot(sabi)+
  geom_sf()+
  geom_path(aes(E,N))

Specify temp window v & measure distance to every point within v

distance_by_element <- function(later, now) {
  as.numeric(
    st_distance(later, now, by_element = TRUE)
  )
}


sabi <- sabi |>
    mutate(
        nMinus2 = distance_by_element(lag(geometry, 2), geometry),  # distance to pos -30 minutes
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -15 minutes
        nPlus1  = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
        nPlus2  = distance_by_element(geometry, lead(geometry, 2))  # distance to pos +30 minutes
    )

Mean distance rowwise

sabi <- sabi |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
    ) |>
    ungroup()

sabi
Simple feature collection with 192 features and 11 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 2569724 ymin: 1204916 xmax: 2570927 ymax: 1205957
Projected CRS: CH1903+ / LV95
# A tibble: 192 × 12
   TierID TierName CollarID DatetimeUTC                E        N
   <chr>  <chr>       <dbl> <dttm>                 <dbl>    <dbl>
 1 002A   Sabi        12275 2015-06-30 22:00:13 2569972. 1205366.
 2 002A   Sabi        12275 2015-06-30 22:16:06 2569975. 1205637.
 3 002A   Sabi        12275 2015-06-30 22:30:19 2570266. 1205857.
 4 002A   Sabi        12275 2015-06-30 22:45:13 2570208. 1205913.
 5 002A   Sabi        12275 2015-06-30 23:00:10 2570247. 1205731.
 6 002A   Sabi        12275 2015-06-30 23:15:17 2570512. 1205279.
 7 002A   Sabi        12275 2015-06-30 23:30:38 2570684. 1205103.
 8 002A   Sabi        12275 2015-06-30 23:45:16 2570526. 1205051.
 9 002A   Sabi        12275 2015-07-01 00:00:10 2570532. 1205044.
10 002A   Sabi        12275 2015-07-01 00:15:14 2570530. 1205059.
# ℹ 182 more rows
# ℹ 6 more variables: geometry <POINT [m]>, nMinus2 <dbl>, nMinus1 <dbl>,
#   nPlus1 <dbl>, nPlus2 <dbl>, stepMean <dbl>

Remove “static points”

sabi <- sabi |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE))

sabi_filter <- sabi |>
    filter(!static)

sabi_filter |>
    ggplot(aes(E, N)) +
    geom_point(data = sabi, col = "red") +
    geom_path() +
    geom_point() +
    coord_fixed() +
    theme(legend.position = "bottom")

Exercise A Segmentation

Import own dataset

library(sf)
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
# Schritt 1: Entpacken der KMZ-Datei (ersetze den Pfad mit deinem tatsächlichen Dateipfad)
kmz_file <- "Data/Ella_1.kmz"  # Ersetze diesen Pfad mit dem tatsächlichen Pfad
unzip_dir <- "Data/"        # Ordner, in dem die KML-Datei gespeichert werden soll

# Entpacken der KMZ-Datei
unzip(kmz_file, exdir = unzip_dir)

# Schritt 2: KML-Datei einlesen 
kml_file <- file.path(unzip_dir, "doc.kml")  

# Einlesen der KML-Datei als SF-Objekt
ella_point <- st_read(kml_file, layer = "Points")
Reading layer `Points' from data source 
  `C:\Users\Jan Krummenacher\OneDrive - Universität Zürich UZH\Dokumente\Geo 880\Data\doc.kml' 
  using driver `KML'
Simple feature collection with 96 features and 2 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 8.54181 ymin: 47.39716 xmax: 8.548177 ymax: 47.3982
z_range:       zmin: 478.1163 zmax: 494.0036
Geodetic CRS:  WGS 84
ella_line <- st_read(kml_file, layer = "Route")
Reading layer `Route' from data source 
  `C:\Users\Jan Krummenacher\OneDrive - Universität Zürich UZH\Dokumente\Geo 880\Data\doc.kml' 
  using driver `KML'
Simple feature collection with 2 features and 2 fields
Geometry type: LINESTRING
Dimension:     XYZ
Bounding box:  xmin: 8.54181 ymin: 47.39716 xmax: 8.548177 ymax: 47.3982
z_range:       zmin: 478.1163 zmax: 494.0036
Geodetic CRS:  WGS 84
# Reproject to LV 95
ella_line <- st_transform(ella_line, crs = 2056)
ella_point <- st_transform(ella_point, crs = 2056)

# Schritt 2: Extrahieren der "Description"-Daten aus der KML-Datei
ella_point <- ella_point %>%
  mutate(description = as.character(Description))  # Falls die Description eine andere Struktur hat

# Schritt 3: Verwenden von regex, um relevante Informationen zu extrahieren

ella_point <- ella_point %>%
  mutate(
    # Extrahiere ID
    id = seq(1, nrow(ella_point)),
    
    # Extrahieren des Zeitstempels
    timestamp = str_extract(Description, "(?<=Zeit</dt><dd>)[^<]+"),
    
    # Extrahieren der Position (Koordinaten)
    position = str_extract(Description, "(?<=Position</dt><dd>)[^<]+"),
    
    # Extrahieren der Höhe
    altitude = str_extract(Description, "(?<=Altitude</dt><dd>)[^<]+"),
    
    # Extrahieren der Geschwindigkeit
    speed = str_extract(Description, "(?<=Geschwindigkeit</dt><dd>)[^<]+"),
    
    # Extrahieren der GPS-Genauigkeit
    gps_accuracy = str_extract(Description, "(?<=GPS-Genauigkeit</dt><dd>)[^<]+")
    )%>%
  
  mutate(
    altitude = str_remove(altitude, "m$"),  
    speed = str_remove(speed, "m/s$"),      
    gps_accuracy = str_remove(gps_accuracy, "m$")  
  ) %>%
  # Umwandeln der Daten, die Zahlenwerte enthalten, in numerische Werte
  mutate(
    id = as.character(id),
    timestamp = as.POSIXct(timestamp, format = "%d.%m.%Y, %H:%M:%S", tz = "CET"),
    altitude = as.numeric(altitude),
    speed = as.numeric(speed),
    gps_accuracy = as.numeric(gps_accuracy)
  ) |> 
  # Wähle relevante Spalten
  select(
    id, timestamp, position, altitude, speed, gps_accuracy, geometry
  )

# Anzeigen der ersten paar Zeilen des importierten Datensatzes
head(ella_point)
Simple feature collection with 6 features and 6 fields
Geometry type: POINT
Dimension:     XYZ
Bounding box:  xmin: 2683738 ymin: 1250184 xmax: 2683761 ymax: 1250202
z_range:       zmin: 492.3622 zmax: 493.9465
Projected CRS: CH1903+ / LV95
  id           timestamp                   position altitude speed gps_accuracy
1  1 2024-10-24 15:34:43 8°32′53.44″O 47°23′49.76″N      494  0.46            4
2  2 2024-10-24 15:34:47 8°32′53.21″O 47°23′49.94″N      494  1.41            3
3  3 2024-10-24 15:34:51 8°32′52.99″O 47°23′50.04″N      493  0.88            3
4  4 2024-10-24 15:34:55 8°32′52.72″O 47°23′50.14″N      493  1.46            3
5  5 2024-10-24 15:34:59 8°32′52.56″O 47°23′50.27″N      493  1.25            4
6  6 2024-10-24 15:35:03 8°32′52.34″O 47°23′50.35″N      492  1.17            4
                        geometry
1 POINT Z (2683761 1250184 49...
2 POINT Z (2683756 1250190 49...
3 POINT Z (2683752 1250193 49...
4 POINT Z (2683746 1250196 49...
5 POINT Z (2683743 1250200 49...
6 POINT Z (2683738 1250202 49...

Average temporal sampling interval = 4 sec. Timerange chosen for v = 8 sec.

dd <- ella_point |>
    mutate(
        nMinus1 = distance_by_element(lag(geometry, 1), geometry),  # distance to pos -4 sec
        nPlus1  = distance_by_element(geometry, lead(geometry, 1))  # distance to pos +4 sec
    )
dd <- dd |>
    rowwise() |>
    mutate(
        stepMean = mean(c(nMinus1, nPlus1))
    ) |>
    ungroup()

# Static defined as less than 75% of stepMean

dd <- dd |>
    mutate(static = stepMean < mean(stepMean, na.rm = TRUE)*0.75)

dd_filter <- dd |>
    filter(!static)

ggplot() +
  geom_point(data = dd, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2]), col = "red") + 
  geom_point(data = dd_filter, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2]), col = "black") + 
  geom_path(data = dd, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2]), col = "black") +
  coord_fixed() + 
  theme(legend.position = "bottom")

rle_id <- function(vec) {
    x <- rle(vec)$lengths
    as.factor(rep(seq_along(x), times = x))
}
dd <- dd |>
    mutate(segment_id = rle_id(static))

library(ggplot2)
library(sf)

ggplot() +
  geom_point(data = dd, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2]), col = "red") +
  geom_point(data = dd_filter, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2]), col = "black") +
  geom_path(data = dd, aes(x = st_coordinates(geometry)[,1], y = st_coordinates(geometry)[,2], color = factor(segment_id)))

Exercise B Similarity

Import data

pedestrian <- read_delim("Data/pedestrian.csv")

Visualise data

pedestrian <- pedestrian |> 
  st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |> 
  group_by(TrajID)
ggplot(pedestrian) +
  geom_sf(aes(color = as.factor(TrajID))) +   
  geom_path(aes(x = E, y = N)) +              
  facet_wrap(~TrajID, ncol = 3) +              
  theme_minimal() +                            
  scale_color_viridis_d()                      

library(SimilarityMeasures)

# Creating matrix 
traj_matrices <- pedestrian %>%
  group_by(TrajID) %>%
  arrange(TrajID) %>%
  summarize(
    coords_matrix = list(cbind(E, N))
  )
traj_matrix_1 <- traj_matrices$coords_matrix[[1]]  
traj_matrix_2 <- traj_matrices$coords_matrix[[2]]
traj_matrix_3 <- traj_matrices$coords_matrix[[3]]
traj_matrix_4 <- traj_matrices$coords_matrix[[4]]
traj_matrix_5 <- traj_matrices$coords_matrix[[5]]
traj_matrix_6 <- traj_matrices$coords_matrix[[6]]

# DTW

DTW2<- DTW(traj_matrix_1, traj_matrix_2)
DTW3<- DTW(traj_matrix_1, traj_matrix_3)
DTW4<- DTW(traj_matrix_1, traj_matrix_4)
DTW5<- DTW(traj_matrix_1, traj_matrix_5)
DTW6<- DTW(traj_matrix_1, traj_matrix_6)
dtw_data <- data.frame(
  TrajID = c("Traj2", "Traj3", "Traj4", "Traj5", "Traj6"),
  DTW_Distance = c(DTW2, DTW3, DTW4, DTW5, DTW6)
)

# Erstellen des Balkendiagramms
ggplot(dtw_data, aes(x = TrajID, y = DTW_Distance)) +
  geom_bar(stat = "identity", fill = "steelblue") +  # Balken erstellen
  labs(
    title = "DTW Distanzen für verschiedene Trajektorien",
    x = "TrajID", 
    y = "DTW Distanz"
  ) +
  theme_minimal()