library(readr)
library(sf)
library(dplyr)
library(ggplot2)Exercise 4: Segmentation and Similarity
Exercise A: Segmentation
Load libraries
Read in data, select necessary part of data, tidy data
First look at the different layers of the gpx-File
st_layers("data/tractive_april.gpx")Read in the layer track_points & have a look at the dataset.
cat <- st_read("data/tractive_april.gpx", layer="track_points")
#View(cat)Select only the necessary columns. Then rename the column timeto date_time and cmtto calc_speed.
cat <- cat [, c("track_seg_point_id", "ele", "time", "cmt", "geometry")]
cat <- cat |>
rename(date_time = time,
calc_speed = cmt
)Change CRS from WGS84 to LV95 and add the separate x & y coordinates as well (they were not included in the raw data).
cat <- st_transform(cat, crs=2056)
cat <- cbind(cat, st_coordinates(cat))Select only a small timespan of the dataset.
cat_small <- cat |>
filter(
date_time >= "2024-04-03",
date_time < "2024-04-04"
)Now have a look at the smaller dataset in a plot.
ggplot(cat_small) +
geom_sf() +
geom_path(aes(X,Y))Task 1: Calculate distances
Step a.) Specify temporal window υ
The cat was logged approximately every 10 minutes. If we take a temporal window of 60 minutes, this includes 6 fixes. Therefore we need to calculate the following euclidean distances (pos representing single location):
pos[n-3] to pos[n]
pos[n-2] to pos[n]
pos[n-1] to pos[n]
pos[n] to pos[n+1]
pos[n] to pos[n+2]
pos[n] to pos[n+3]
Step b.): Measure the distance to every point within υ
Recreate function distance_by_element.
distance_by_element <- function(later, now) {
as.numeric(
st_distance(later, now, by_element = TRUE)
)
}Calculate the necessary distances and add them to the dataset.
cat_small <- cat_small |>
mutate(
nMinus3 = distance_by_element(lag(geometry, 3), geometry), #distance to pos -30 minutes
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance to pos -20 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance to pos -10 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance to pos +10 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)), # distance to pos +20 minutes
nPlus3 = distance_by_element(geometry, lead(geometry, 3)) #distance to pos +30 minutes
)Now we want to calculate the mean distance of nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3 for each row. Since we want the mean value per row, we have to explicitly specify this before mutate() with the function rowwise(). To remove this rowwise-grouping, we end the operation with ungroup(). (If we also would want to calculate the stepMeans of the rows with NAs in the distances (nMinus3 etc), we would need to add na.rm=TRUE. But this makes no sense.)
cat_small <- cat_small |>
rowwise() |>
mutate(
stepMean = mean(c(nMinus3, nMinus2, nMinus1, nPlus1, nPlus2, nPlus3))
) |>
ungroup()Task 2: Specify and apply threshold d
Have a look at the step-meansvia histogram (play with bindwidths), via summary()and via boxplot.
ggplot(cat_small, aes(x=stepMean)) +
geom_histogram(binwidth=20)
ggplot(cat_small, aes(y=stepMean)) +
geom_boxplot()summary(cat_small$stepMean) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
6.144 17.648 26.953 45.789 60.921 189.449 6
In this exercise just use the mean as threshold. Then remove the static segments from the dataset
cat_small <- cat_small |>
mutate(static = stepMean < mean(stepMean, na.rm = TRUE))
cat_small_filter <- cat_small |>
filter(!static)Task 3: Visualize segmented trajectories
Plot the filtered dataset and also add the removed points for comparison.
cat_small_filter |>
ggplot(aes(X, Y)) +
geom_point(data = cat_small, col = "red") +
geom_path() +
geom_point() +
coord_fixed() +
theme(legend.position = "bottom")Task 4: Segment-based analysis
Assign unique IDs to each segment.
1st create necessary function:
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}Then use the function to assign unique IDs to the subtrajectories:
cat_small <- cat_small |>
mutate(segment_id = rle_id(static))Plot all non-static subtrajectories that do not consist of a single point.
cat_small |>
filter(!static) |>
group_by(segment_id) |>
filter(n() > 1) |>
ungroup() |>
ggplot(aes(x=X, y=Y, group=segment_id, color = as.factor(segment_id))) +
geom_line() +
geom_point() +
labs(color="Segment ID")Exercise B: Similarity
Load necessary libraries
library(tmap)
library(SimilarityMeasures)Task 1: Similarity measures
Read in data and have a look at the different pedestrians.
pedestrian <- read_csv("data/pedestrian.csv")Plot every subtrajectory.
ggplot(pedestrian[pedestrian$TrajID == 1, ], aes(E,N)) +
geom_line(color=1) +
geom_point(color=1)
ggplot(pedestrian[pedestrian$TrajID == 2, ], aes(E,N)) +
geom_line(color=2) +
geom_point(color=2)
ggplot(pedestrian[pedestrian$TrajID == 3, ], aes(E,N)) +
geom_line(color=3) +
geom_point(color=3)
ggplot(pedestrian[pedestrian$TrajID == 4, ], aes(E,N)) +
geom_line(color=4) +
geom_point(color=4)
ggplot(pedestrian[pedestrian$TrajID == 5, ], aes(E,N)) +
geom_line(color=5) +
geom_point(color=5)
ggplot(pedestrian[pedestrian$TrajID == 6, ], aes(E,N)) +
geom_line(color=6) +
geom_point(color=6)Task 2: Calculate similarity
Install the package SimilarityMeasures (install.packages("SimilarityMeasures")). Familiarize yourself with this package by skimming through the function descriptions help(package = "SimilarityMeasures").
Split the dataframe to 6 individual matrices, assign a name to every matrice.
trajectories <- split(pedestrian, pedestrian$TrajID)
trajectories_matrices <- lapply(trajectories, function(df) as.matrix(df[, c("E", "N")]))
names(trajectories_matrices) <- paste0("TrajID_", names(trajectories_matrices))Now compare trajectory 1 to trajectories 2-6 using different similarity measures from the package. Your options are. DTW, EditDist, Frechet and LCSS. (I skipped LCSS due to time reasons.)
Compare trajectory 1 to every other trajectory using DTW.
dtw2 <- DTW(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_2, pointSpacing=-1 )
dtw3 <- DTW(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_3, pointSpacing=-1 )
dtw4 <- DTW(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_4, pointSpacing=-1 )
dtw5 <- DTW(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_5, pointSpacing=-1 )
dtw6 <- DTW(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_6, pointSpacing=-1 )
dtw <- data.frame(
trajectory_dtw = c("dtw2", "dtw3", "dtw4", "dtw5", "dtw6"),
value_dtw = c(dtw2, dtw3,dtw4, dtw5, dtw6)
)Compare trajectory 1 to every other trajectory using EditDist.
ed2 <- EditDist(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_2, pointDistance=20)
ed3 <- EditDist(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_3, pointDistance=20)
ed4 <- EditDist(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_4, pointDistance=20)
ed5 <- EditDist(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_5, pointDistance=20)
ed6 <- EditDist(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_6, pointDistance=20)
ed <- data.frame(
trajectory_ed = c("ed2", "ed3", "ed4", "ed5", "ed6"),
value_ed = c(ed2, ed3,ed4, ed5, ed6)
)Compare trajectory 1 to every other trajectory using Frechet.
fr2 <- Frechet(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_2, testLeash=-1)
fr3 <- Frechet(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_3, testLeash=-1)
fr4 <- Frechet(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_4, testLeash=-1)
fr5 <- Frechet(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_5, testLeash=-1)
fr6 <- Frechet(trajectories_matrices$TrajID_1, trajectories_matrices$TrajID_6, testLeash=-1)
fr <- data.frame(
trajectory_fr = c("fr2", "fr3", "fr4", "fr5", "fr6"),
value_fr = c(fr2, fr3,fr4, fr5, fr6)
)ggplot(dtw, aes(x=trajectory_dtw, y=value_dtw, fill=trajectory_dtw)) +
geom_bar(stat="identity") +
labs(title = "DTW")
ggplot(ed, aes(x=trajectory_ed, y=value_ed, fill=trajectory_ed)) +
geom_bar(stat="identity") +
labs(title = "EditDistance")
ggplot(fr, aes(x=trajectory_fr, y=value_fr, fill=trajectory_fr)) +
geom_bar(stat="identity") +
labs(title = "Frechet")