library("readr")
library("sf")
library("dplyr")
library("ggplot2")Exercise_4
Input Segmentation
We will demonstrate implementing this method on the wild boar “Sabi”, restricting ourselves to a couple of tracking days. Your task will be to understand this implementation and apply it to your own movement data.
Open a RStudio Project for this week. Next, copy the wild boar data you downloaded last week (wildschwein_BE_2056.csv) to your project folder. If you cannot find this dataset on your computer, you can re-download it from moodle. Transform the data into an sf object, filter for the wild boar Sabi and a datetime between “2015-07-01” and “2015-07-03”.
wildschwein <- read_delim("data/wildschwein_BE_2056.csv", ",")
# Careful! What Timezone is assumed?
sabi <- wildschwein |>
st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
filter(
TierName == "Sabi",
DatetimeUTC >= "2015-07-01",
DatetimeUTC < "2015-07-03"
)
# Plot paths
ggplot(sabi) +
geom_sf() +
geom_path(aes(E, N))Temporal window and distance within
In the above dataset, the sampling interval is 15 minutes. If we take a temporal window of 60 minutes, that would mean including 4 fixes.
# Function:
distance_by_element <- function(later, now) {
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}We can use the function distance_by_element from week 2 in combination with lead() and lag() to calculate the Euclidean distance. For example, to create the necessary offset of n-2, we use lag(x, 2).
# Definition the distances between:
sabi <- sabi |>
mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance to pos -30 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance to pos -15 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)) # distance to pos +30 minutes
)Mean distance
Now we want to calculate the mean distance of nMinus2, nMinus1, nPlus1, nPlus2 for each row. Since we want the mean value per Row, we have to explicitly specify this before mutate() with the function rowwise(). To remove this rowwise-grouping, we end the operation with ungroup().
sabi <- sabi |>
rowwise() |> # für jeden Punkt
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
) |>
ungroup()Remove “static points” (wo das tier sich nicht bewegt)
We can now determine if an animal is moving or not by specifying a threshold distance on stepMean. In our example, we use the mean value as a threshold: Positions with distances below this value are considered static.
sabi <- sabi |>
mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
sabi_filter <- sabi |>
filter(!static)
sabi_filter |>
ggplot(aes(E, N)) +
geom_point(data = sabi, col = "red") + # rote Punkte wurden als "statisch" entfernt
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")Exercise A
library("readr")
library("sf")
library("dplyr")
library("ggplot2")# Function:
distance_by_element <- function(later, now) {
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}Data informations: This GPS trajectory dataset was collected in (Microsoft Research Asia) Geolife project by 182 users in a period of over five years (from April 2007 to August 2012). For the following analysis the trajectory of one person at 2008-10-23 is used.
# Pakete laden
library(sf)
library(tmap)
# Datei einlesen, erste 6 Zeilen überspringen
file_path <- "data/Trajectory/20081023025304.plt"
gps_data <- read.csv(file_path, skip = 6, header = FALSE)
# Spaltennamen setzen
colnames(gps_data) <- c("Latitude", "Longitude", "Unused", "Altitude", "Timestamp", "Date", "Time")
# Unnötige Spalte entfernen
gps_data <- gps_data[, c("Latitude", "Longitude", "Altitude", "Timestamp", "Date", "Time")]
# In sf-Objekt umwandeln (WICHTIG: Longitude zuerst!)
gps_data_sf <- st_as_sf(gps_data, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
# tmap interaktive Karte anzeigen
tmap_mode("view") # Interaktive Ansicht aktivieren
tm_shape(gps_data_sf) +
tm_dots(col = "red") # Punkte als Referenz anzeigenggplot(gps_data_sf) +
geom_sf() +
geom_path(aes(Longitude, Latitude)) +
theme_classic()Calculate distances
# Different windows:
gps_data_sf <- gps_data_sf |>
mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry),
nMinus1 = distance_by_element(lag(geometry, 1), geometry),
nPlus1 = distance_by_element(geometry, lead(geometry, 1)),
nPlus2 = distance_by_element(geometry, lead(geometry, 2))
)
# Mean of Point-Distances:
gps_data_sf <- gps_data_sf |>
rowwise() |> # für jeden Punkt
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
) |>
ungroup()Specify and apply threshold d
summary(gps_data_sf$stepMean) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.2127 7.9860 17.0496 23.9094 24.2946 1403.7360 4
ggplot(gps_data_sf, aes(x = stepMean)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of stepMean Values", x = "stepMean", y = "Frequency")ggplot(gps_data_sf, aes(y = stepMean)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
labs(title = "Boxplot of stepMean Values", y = "stepMean")Remove “static points” (wo nicht bewegt)
gps_data_sf <- gps_data_sf |>
mutate(static = stepMean < quantile(stepMean, 0.50, na.rm = TRUE))
gps_data_filter <- gps_data_sf |>
filter(!static)
gps_data_filter |>
ggplot(aes(Longitude, Latitude)) +
geom_point(data = gps_data_sf, col = "red") + # rote Punkte wurden als "statisch" entfernt
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")Segment-based analysis
# Function
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}# assign unique IDs to subtrajectories
gps_data_sf <- gps_data_sf |>
mutate(segment_id = rle_id(static))Visualize the moving segments by colourizing them by segment_ID:
ggplot(gps_data_sf) +
geom_sf() +
geom_path(aes(Longitude, Latitude, color = factor(segment_id))) +
theme_classic() +
scale_color_viridis_d() + # Optional: Uses a colorblind-friendly palette
theme(legend.position = "none") # Removes the legendThen use segment_ID as a grouping variable to determine the segments duration and remove short segments (e.g. segments with a duration < 5 Minutes):
# Convert Timestamp to POSIXct using the correct origin date
gps_data_sf <- gps_data_sf |>
mutate(Timestamp = as.POSIXct(Timestamp * 86400, origin = "1899-12-30", tz = "UTC"))
# Compute segment durations and filter out short segments
gps_data_filter <- gps_data_sf |>
group_by(segment_id) |>
mutate(segment_duration = difftime(max(Timestamp), min(Timestamp), units = "mins")) |>
ungroup() |>
filter(segment_duration >= 5) # Keep only segments with duration >= 5 minutes
# Plot the filtered data
ggplot(gps_data_filter) +
geom_sf() +
geom_path(aes(Longitude, Latitude, color = factor(segment_id))) +
theme_classic() +
scale_color_viridis_d() + # Optional: Improves color mapping
theme(legend.position = "none") # Removes legendExercise B: Similarity
Similarity measures
We will now calculate similarties between trajectories using a new dataset pedestrian.csv (available on moodle). Download an import this dataset as a data.frame or tibble. It it a set of six different but similar trajectories from pedestrians walking on a path. For this task, explore the trajectories first and get an idea on how the pedestrians moved.
# Import data:
pedestrian <- read_delim("data/pedestrian.csv")
# create an sf object:
pedestrian <- st_as_sf(pedestrian, coords = c("E", "N"), crs = 2056, remove = F)
str(pedestrian)sf [289 × 5] (S3: sf/spec_tbl_df/tbl_df/tbl/data.frame)
$ TrajID : num [1:289] 1 1 1 1 1 1 1 1 1 1 ...
$ E : num [1:289] 2571414 2571396 2571373 2571347 2571336 ...
$ N : num [1:289] 1205804 1205791 1205770 1205753 1205744 ...
$ DatetimeUTC: POSIXct[1:289], format: "2015-03-01 12:01:00" "2015-03-01 12:02:00" ...
$ geometry :sfc_POINT of length 289; first list element: 'XY' num [1:2] 2571414 1205804
- attr(*, "spec")=
.. cols(
.. TrajID = col_double(),
.. E = col_double(),
.. N = col_double(),
.. DatetimeUTC = col_datetime(format = "")
.. )
- attr(*, "problems")=<externalptr>
- attr(*, "sf_column")= chr "geometry"
- attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
..- attr(*, "names")= chr [1:4] "TrajID" "E" "N" "DatetimeUTC"
# plot:
# ?facet_wrap
ggplot() +
# Grey background points for the entire data (repeated in every facet)
geom_point(data = transform(pedestrian, TrajID = NULL), aes(E, N), color = "grey70", alpha = 0.3) +
# Highlight paths for each specific TrajID
geom_path(data = pedestrian, aes(E, N, color = factor(TrajID)), size = 1) +
# Highlight points for each TrajID
geom_point(data = pedestrian, aes(E, N, color = factor(TrajID)), size = 2) +
theme_classic() +
scale_color_viridis_d() + # Colorblind-friendly color scale
theme(legend.position = "none") + # Removes the legend
facet_wrap(~TrajID)Calculate similarity
library(SimilarityMeasures)
library(sf)
library(dplyr)
library(ggplot2)
library(readr)
pedestrian <- read_delim("data/pedestrian.csv")
# Convert trajectories to matrices based on unique TrajID
trajectory_matrices <- pedestrian %>%
split(.$TrajID) %>%
lapply(function(x) as.matrix(x[, c("E", "N")]))# Compute similarity of Trajectory 1 with others (2 to 6)
similarity_results <- lapply(2:6, function(i) {
trajectory1 <- trajectory_matrices[[1]] # Reference trajectory
trajectory2 <- trajectory_matrices[[i]] # Comparison trajectory
# Compute similarity measures
dtw_dist <- DTW(trajectory1, trajectory2) # Dynamic Time Warping
edit_dist <- EditDist(trajectory1, trajectory2) # Edit Distance
frechet_dist <- Frechet(trajectory1, trajectory2) # Frechet Distance
lcss_dist <- LCSS(trajectory1, trajectory2, pointSpacing = 1, pointDistance = 5, errorMarg = 2)
# Longest Common Subsequence
return(data.frame(
Trajectory = i,
DTW = dtw_dist,
EditDist = edit_dist,
Frechet = frechet_dist,
LCSS = lcss_dist
))
})
# Combine results into one dataframe
similarity_df <- bind_rows(similarity_results)# Convert to long format for ggplot
similarity_long <- similarity_df %>%
tidyr::pivot_longer(cols = -Trajectory, names_to = "Measure", values_to = "Value")trajectory_colors <- c("2" = "#253B80", # Dunkelblau
"3" = "#1F78B4", # Blau
"4" = "#33A02C", # Grün
"5" = "#B2DF8A", # Hellgrün
"6" = "#FDBF00") # Gelb
# Create the ggplot visualization
ggplot(similarity_long, aes(x = as.factor(Trajectory), y = Value, fill = as.factor(Trajectory))) +
geom_bar(stat = "identity") +
scale_fill_manual(values = trajectory_colors) +
facet_wrap(~Measure, scales = "free") +
labs(
title = "Computed similarities using different measures\nbetween trajectory 1 to all other trajectories",
x = "Comparison trajectory",
y = "Value"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16),
axis.text.x = element_text(size = 12),
strip.text = element_text(size = 12)
) +
theme(legend.position = "none")