library(readr)
library(dplyr)
library(sf)
library(ggplot2)
Exercise_B_Similarity
Exercise B: Similarity
Task 1: Similarity measures
We will now calculate similarties between trajectories using a new dataset pedestrian.csv (available on moodle). Download an import this dataset as a data.frame or tibble. It it a set of six different but similar trajectories from pedestrians walking on a path.
For this task, explore the trajectories first and get an idea on how the pedestrians moved.
Loading libraries
Read Data
<- read_delim("Daten/pedestrian.csv")
pedestrians
<- st_as_sf(pedestrians, coords = c("E", "N"), crs = 2056, remove = FALSE)
pedestrians
# EDA
|>
pedestrians ggplot(aes(E,N)) +
geom_point(colour = pedestrians$TrajID) +
geom_line(colour = pedestrians$TrajID) +
facet_wrap(~TrajID) +
theme_minimal()
Task 2: Calculate similarity
Install the package SimilarityMeasures (install.packages(“SimilarityMeasures”)). Familiarize yourself with this package by skimming through the function descriptions help(package = “SimilarityMeasures”). Now compare trajectory 1 to trajectories 2-6 using different similarity measures from the package. Your options are. DTW, EditDist, Frechet and LCSS.
Before visualizing your results think about the following: Which two trajectories to you percieve to be most similar, which are most dissimilar? Now visualize the results from the computed similarity measures. Which measure reflects your own intuition the closest?
Note: All functions in the package need matrices as input, with one trajectory per matrix.
1 and 6 look really similar, but 2 and 3 also have a lot in common 4 and 3 look very dissimlar
library("SimilarityMeasures")
library(SimilarityMeasures)
# Convert trajectories to dataframes
<- pedestrians |>
p1 filter(TrajID == 1) |>
select(E,N)
<- pedestrians |>
p2 filter(TrajID == 2) |>
select(E,N)
<- pedestrians |>
p3 filter(TrajID == 3) |>
select(E,N)
<- pedestrians |>
p4 filter(TrajID == 4) |>
select(E,N)
<- pedestrians |>
p5 filter(TrajID == 5) |>
select(E,N)
<- pedestrians |>
p6 filter(TrajID == 6) |>
select(E,N)
# Convert into matrix (extra conversion because there is an issue I do not understand. ChatGPT helped me solve this problem with the second code to create a matrix)
<- as.matrix(p1)
path1 <- matrix(unlist(path1), ncol = 2, byrow = FALSE)
path1
<- as.matrix(p2)
path2 <- matrix(unlist(path2), ncol = 2, byrow = FALSE)
path2
<- as.matrix(p3)
path3 <- matrix(unlist(path3), ncol = 2, byrow = FALSE)
path3
<- as.matrix(p4)
path4 <- matrix(unlist(path4), ncol = 2, byrow = FALSE)
path4
<- as.matrix(p5)
path5 <- matrix(unlist(path5), ncol = 2, byrow = FALSE)
path5
<- as.matrix(p6)
path6 <- matrix(unlist(path6), ncol = 2, byrow = FALSE)
path6
# help(package = "SimilarityMeasures")
Calculating Similarities
# DTW
<- DTW(path1, path2, pointSpacing = -1)
DTW12
# EditDist
EditDist(path1, path2, pointDistance = 20)
[1] 93
# Frechet
Frechet(path1, path2, testLeash = -1)
[1] 964525.3
# LCSS
LCSS(path1, path2, pointSpacing = -1, pointDistance = 20, errorMarg = 100, returnTrans = FALSE)
[1] 5
# or even easier with one big dataframe and all the results added
# function needed for LCSS
<- function(path1, path2) {
My_LCSS LCSS(path1, path2, errorMarg = 100.0)
}
# creating a empty data.frame
<- data.frame(Name = character(), Comparator = numeric(), Result = double(), stringsAsFactors = FALSE)
results <- rbind(results, data.frame(Name = "EditDist", Comparator = 2, Result = EditDist(path1, path2)))
results <- rbind(results, data.frame(Name = "EditDist", Comparator = 3, Result = EditDist(path1, path3)))
results <- rbind(results, data.frame(Name = "EditDist", Comparator = 4, Result = EditDist(path1, path4)))
results <- rbind(results, data.frame(Name = "EditDist", Comparator = 5, Result = EditDist(path1, path5)))
results <- rbind(results, data.frame(Name = "EditDist", Comparator = 6, Result = EditDist(path1, path6)))
results <- rbind(results, data.frame(Name = "DTW", Comparator = 2, Result = DTW(path1, path2)))
results <- rbind(results, data.frame(Name = "DTW", Comparator = 3, Result = DTW(path1, path3)))
results <- rbind(results, data.frame(Name = "DTW", Comparator = 4, Result = DTW(path1, path4)))
results <- rbind(results, data.frame(Name = "DTW", Comparator = 5, Result = DTW(path1, path5)))
results <- rbind(results, data.frame(Name = "DTW", Comparator = 6, Result = DTW(path1, path6)))
results <- rbind(results, data.frame(Name = "Frechet", Comparator = 2, Result = Frechet(path1, path2, testLeash = -1)))
results <- rbind(results, data.frame(Name = "Frechet", Comparator = 3, Result = Frechet(path1, path3)))
results <- rbind(results, data.frame(Name = "Frechet", Comparator = 4, Result = Frechet(path1, path4)))
results <- rbind(results, data.frame(Name = "Frechet", Comparator = 5, Result = Frechet(path1, path5)))
results <- rbind(results, data.frame(Name = "Frechet", Comparator = 6, Result = Frechet(path1, path6)))
results <- rbind(results, data.frame(Name = "LCSS", Comparator = 2, Result = My_LCSS(path1, path2)))
results <- rbind(results, data.frame(Name = "LCSS", Comparator = 3, Result = My_LCSS(path1, path3)))
results <- rbind(results, data.frame(Name = "LCSS", Comparator = 4, Result = My_LCSS(path1, path4)))
results <- rbind(results, data.frame(Name = "LCSS", Comparator = 5, Result = My_LCSS(path1, path5)))
results <- rbind(results, data.frame(Name = "LCSS", Comparator = 6, Result = My_LCSS(path1, path6))) results
Visualize results
ggplot(results, aes(Comparator, Result, fill = Comparator)) +
geom_bar(stat = "identity") +
facet_wrap(~Name, scales = "free_y")
For me it is unclear why I have other values than the github page shows. but i do not find the error.
Which measure reflects your own intuition the closest? LCSS shows the same assumption, that 1 and 6 look very similar.