library("readr")
library("sf")
library("dplyr")
library("ggplot2")
Exercise_4
Input Segmentation
We will demonstrate implementing this method on the wild boar “Sabi”, restricting ourselves to a couple of tracking days. Your task will be to understand this implementation and apply it to your own movement data.
Open a RStudio Project for this week. Next, copy the wild boar data you downloaded last week (wildschwein_BE_2056.csv) to your project folder. If you cannot find this dataset on your computer, you can re-download it from moodle. Transform the data into an sf object, filter for the wild boar Sabi and a datetime between “2015-07-01” and “2015-07-03”.
<- read_delim("data/wildschwein_BE_2056.csv", ",")
wildschwein
# Careful! What Timezone is assumed?
<- wildschwein |>
sabi st_as_sf(coords = c("E", "N"), crs = 2056, remove = FALSE) |>
filter(
== "Sabi",
TierName >= "2015-07-01",
DatetimeUTC < "2015-07-03"
DatetimeUTC
)# Plot paths
ggplot(sabi) +
geom_sf() +
geom_path(aes(E, N))
Temporal window and distance within
In the above dataset, the sampling interval is 15 minutes. If we take a temporal window of 60 minutes, that would mean including 4 fixes.
# Function:
<- function(later, now) {
distance_by_element as.numeric(
st_distance(later, now, by_element = TRUE)
) }
We can use the function distance_by_element from week 2 in combination with lead() and lag() to calculate the Euclidean distance. For example, to create the necessary offset of n-2, we use lag(x, 2).
# Definition the distances between:
<- sabi |>
sabi mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance to pos -30 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance to pos -15 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +15 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)) # distance to pos +30 minutes
)
Mean distance
Now we want to calculate the mean distance of nMinus2, nMinus1, nPlus1, nPlus2 for each row. Since we want the mean value per Row, we have to explicitly specify this before mutate() with the function rowwise(). To remove this rowwise-grouping, we end the operation with ungroup().
<- sabi |>
sabi rowwise() |> # für jeden Punkt
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
|>
) ungroup()
Remove “static points” (wo das tier sich nicht bewegt)
We can now determine if an animal is moving or not by specifying a threshold distance on stepMean. In our example, we use the mean value as a threshold: Positions with distances below this value are considered static.
<- sabi |>
sabi mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
<- sabi |>
sabi_filter filter(!static)
|>
sabi_filter ggplot(aes(E, N)) +
geom_point(data = sabi, col = "red") + # rote Punkte wurden als "statisch" entfernt
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")
Exercise A
library("readr")
library("sf")
library("dplyr")
library("ggplot2")
# Function:
<- function(later, now) {
distance_by_element as.numeric(
st_distance(later, now, by_element = TRUE)
) }
Data informations: This GPS trajectory dataset was collected in (Microsoft Research Asia) Geolife project by 182 users in a period of over five years (from April 2007 to August 2012). For the following analysis the trajectory of one person at 2008-10-23 is used.
# Pakete laden
library(sf)
library(tmap)
# Datei einlesen, erste 6 Zeilen überspringen
<- "data/Trajectory/20081023025304.plt"
file_path <- read.csv(file_path, skip = 6, header = FALSE)
gps_data
# Spaltennamen setzen
colnames(gps_data) <- c("Latitude", "Longitude", "Unused", "Altitude", "Timestamp", "Date", "Time")
# Unnötige Spalte entfernen
<- gps_data[, c("Latitude", "Longitude", "Altitude", "Timestamp", "Date", "Time")]
gps_data
# In sf-Objekt umwandeln (WICHTIG: Longitude zuerst!)
<- st_as_sf(gps_data, coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)
gps_data_sf
# tmap interaktive Karte anzeigen
tmap_mode("view") # Interaktive Ansicht aktivieren
tm_shape(gps_data_sf) +
tm_dots(col = "red") # Punkte als Referenz anzeigen
ggplot(gps_data_sf) +
geom_sf() +
geom_path(aes(Longitude, Latitude)) +
theme_classic()
Calculate distances
# Different windows:
<- gps_data_sf |>
gps_data_sf mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry),
nMinus1 = distance_by_element(lag(geometry, 1), geometry),
nPlus1 = distance_by_element(geometry, lead(geometry, 1)),
nPlus2 = distance_by_element(geometry, lead(geometry, 2))
)
# Mean of Point-Distances:
<- gps_data_sf |>
gps_data_sf rowwise() |> # für jeden Punkt
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
|>
) ungroup()
Specify and apply threshold d
summary(gps_data_sf$stepMean)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.2127 7.9860 17.0496 23.9094 24.2946 1403.7360 4
ggplot(gps_data_sf, aes(x = stepMean)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of stepMean Values", x = "stepMean", y = "Frequency")
ggplot(gps_data_sf, aes(y = stepMean)) +
geom_boxplot(fill = "lightblue", color = "darkblue") +
labs(title = "Boxplot of stepMean Values", y = "stepMean")
Remove “static points” (wo nicht bewegt)
<- gps_data_sf |>
gps_data_sf mutate(static = stepMean < quantile(stepMean, 0.50, na.rm = TRUE))
<- gps_data_sf |>
gps_data_filter filter(!static)
|>
gps_data_filter ggplot(aes(Longitude, Latitude)) +
geom_point(data = gps_data_sf, col = "red") + # rote Punkte wurden als "statisch" entfernt
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")
Segment-based analysis
# Function
<- function(vec) {
rle_id <- rle(vec)$lengths
x as.factor(rep(seq_along(x), times = x))
}
# assign unique IDs to subtrajectories
<- gps_data_sf |>
gps_data_sf mutate(segment_id = rle_id(static))
Visualize the moving segments by colourizing them by segment_ID:
ggplot(gps_data_sf) +
geom_sf() +
geom_path(aes(Longitude, Latitude, color = factor(segment_id))) +
theme_classic() +
scale_color_viridis_d() + # Optional: Uses a colorblind-friendly palette
theme(legend.position = "none") # Removes the legend
Then use segment_ID as a grouping variable to determine the segments duration and remove short segments (e.g. segments with a duration < 5 Minutes):
# Convert Timestamp to POSIXct using the correct origin date
<- gps_data_sf |>
gps_data_sf mutate(Timestamp = as.POSIXct(Timestamp * 86400, origin = "1899-12-30", tz = "UTC"))
# Compute segment durations and filter out short segments
<- gps_data_sf |>
gps_data_filter group_by(segment_id) |>
mutate(segment_duration = difftime(max(Timestamp), min(Timestamp), units = "mins")) |>
ungroup() |>
filter(segment_duration >= 5) # Keep only segments with duration >= 5 minutes
# Plot the filtered data
ggplot(gps_data_filter) +
geom_sf() +
geom_path(aes(Longitude, Latitude, color = factor(segment_id))) +
theme_classic() +
scale_color_viridis_d() + # Optional: Improves color mapping
theme(legend.position = "none") # Removes legend
Exercise B: Similarity
Similarity measures
We will now calculate similarties between trajectories using a new dataset pedestrian.csv (available on moodle). Download an import this dataset as a data.frame or tibble. It it a set of six different but similar trajectories from pedestrians walking on a path. For this task, explore the trajectories first and get an idea on how the pedestrians moved.
# Import data:
<- read_delim("data/pedestrian.csv")
pedestrian # create an sf object:
<- st_as_sf(pedestrian, coords = c("E", "N"), crs = 2056, remove = F)
pedestrian str(pedestrian)
sf [289 × 5] (S3: sf/spec_tbl_df/tbl_df/tbl/data.frame)
$ TrajID : num [1:289] 1 1 1 1 1 1 1 1 1 1 ...
$ E : num [1:289] 2571414 2571396 2571373 2571347 2571336 ...
$ N : num [1:289] 1205804 1205791 1205770 1205753 1205744 ...
$ DatetimeUTC: POSIXct[1:289], format: "2015-03-01 12:01:00" "2015-03-01 12:02:00" ...
$ geometry :sfc_POINT of length 289; first list element: 'XY' num [1:2] 2571414 1205804
- attr(*, "spec")=
.. cols(
.. TrajID = col_double(),
.. E = col_double(),
.. N = col_double(),
.. DatetimeUTC = col_datetime(format = "")
.. )
- attr(*, "problems")=<externalptr>
- attr(*, "sf_column")= chr "geometry"
- attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA
..- attr(*, "names")= chr [1:4] "TrajID" "E" "N" "DatetimeUTC"
# plot:
# ?facet_wrap
ggplot() +
# Grey background points for the entire data (repeated in every facet)
geom_point(data = transform(pedestrian, TrajID = NULL), aes(E, N), color = "grey70", alpha = 0.3) +
# Highlight paths for each specific TrajID
geom_path(data = pedestrian, aes(E, N, color = factor(TrajID)), size = 1) +
# Highlight points for each TrajID
geom_point(data = pedestrian, aes(E, N, color = factor(TrajID)), size = 2) +
theme_classic() +
scale_color_viridis_d() + # Colorblind-friendly color scale
theme(legend.position = "none") + # Removes the legend
facet_wrap(~TrajID)
Calculate similarity
library(SimilarityMeasures)
library(sf)
library(dplyr)
library(ggplot2)
library(readr)
<- read_delim("data/pedestrian.csv")
pedestrian
# Convert trajectories to matrices based on unique TrajID
<- pedestrian %>%
trajectory_matrices split(.$TrajID) %>%
lapply(function(x) as.matrix(x[, c("E", "N")]))
# Compute similarity of Trajectory 1 with others (2 to 6)
<- lapply(2:6, function(i) {
similarity_results <- trajectory_matrices[[1]] # Reference trajectory
trajectory1 <- trajectory_matrices[[i]] # Comparison trajectory
trajectory2
# Compute similarity measures
<- DTW(trajectory1, trajectory2) # Dynamic Time Warping
dtw_dist <- EditDist(trajectory1, trajectory2) # Edit Distance
edit_dist <- Frechet(trajectory1, trajectory2) # Frechet Distance
frechet_dist <- LCSS(trajectory1, trajectory2, pointSpacing = 1, pointDistance = 5, errorMarg = 2)
lcss_dist # Longest Common Subsequence
return(data.frame(
Trajectory = i,
DTW = dtw_dist,
EditDist = edit_dist,
Frechet = frechet_dist,
LCSS = lcss_dist
))
})
# Combine results into one dataframe
<- bind_rows(similarity_results) similarity_df
# Convert to long format for ggplot
<- similarity_df %>%
similarity_long ::pivot_longer(cols = -Trajectory, names_to = "Measure", values_to = "Value") tidyr
<- c("2" = "#253B80", # Dunkelblau
trajectory_colors "3" = "#1F78B4", # Blau
"4" = "#33A02C", # Grün
"5" = "#B2DF8A", # Hellgrün
"6" = "#FDBF00") # Gelb
# Create the ggplot visualization
ggplot(similarity_long, aes(x = as.factor(Trajectory), y = Value, fill = as.factor(Trajectory))) +
geom_bar(stat = "identity") +
scale_fill_manual(values = trajectory_colors) +
facet_wrap(~Measure, scales = "free") +
labs(
title = "Computed similarities using different measures\nbetween trajectory 1 to all other trajectories",
x = "Comparison trajectory",
y = "Value"
+
) theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16),
axis.text.x = element_text(size = 12),
strip.text = element_text(size = 12)
+
) theme(legend.position = "none")