library(readr)
<- read_csv("track.csv") track
Monte Carlo Simulation with movement data
track
# A tibble: 34 × 3
time X Y
<dttm> <dbl> <dbl>
1 2024-05-04 15:05:02 2685915. 1248194.
2 2024-05-04 15:05:09 2685920. 1248218.
3 2024-05-04 15:05:18 2685942. 1248244.
4 2024-05-04 15:05:25 2685959. 1248260.
5 2024-05-04 15:05:36 2685988. 1248290.
6 2024-05-04 15:05:48 2686025. 1248322.
7 2024-05-04 15:05:59 2686050. 1248346.
8 2024-05-04 15:06:06 2686064. 1248362.
9 2024-05-04 15:06:11 2686075. 1248372.
10 2024-05-04 15:06:14 2686081. 1248376.
# ℹ 24 more rows
library(ggplot2)
ggplot(track, aes(X, Y)) +
geom_point() +
geom_path() +
coord_equal()
To determine the timelag (i.e. the time in between consecutive samples) we can use lead()
from dplyr
library(dplyr)
<- \(x) as.numeric(difftime(lead(x), x, units = "secs"))
get_timelag
<- track |>
track mutate(
timelag = get_timelag(time)
)
track
# A tibble: 34 × 4
time X Y timelag
<dttm> <dbl> <dbl> <dbl>
1 2024-05-04 15:05:02 2685915. 1248194. 7
2 2024-05-04 15:05:09 2685920. 1248218. 9
3 2024-05-04 15:05:18 2685942. 1248244. 7
4 2024-05-04 15:05:25 2685959. 1248260. 11
5 2024-05-04 15:05:36 2685988. 1248290. 12
6 2024-05-04 15:05:48 2686025. 1248322. 11
7 2024-05-04 15:05:59 2686050. 1248346. 7
8 2024-05-04 15:06:06 2686064. 1248362. 5
9 2024-05-04 15:06:11 2686075. 1248372. 3
10 2024-05-04 15:06:14 2686081. 1248376. 7
# ℹ 24 more rows
Similarly, to get the euclidean distance between two sample, we can use lead
together with the formula for 2D euclidean distance:
<- \(x,y){
get_distance sqrt((x - lead(x))^2 + (y - lead(y))^2)
}
<- track |>
track mutate(
steplength = get_distance(X, Y)
)
track
# A tibble: 34 × 5
time X Y timelag steplength
<dttm> <dbl> <dbl> <dbl> <dbl>
1 2024-05-04 15:05:02 2685915. 1248194. 7 23.9
2 2024-05-04 15:05:09 2685920. 1248218. 9 33.5
3 2024-05-04 15:05:18 2685942. 1248244. 7 24.1
4 2024-05-04 15:05:25 2685959. 1248260. 11 41.9
5 2024-05-04 15:05:36 2685988. 1248290. 12 48.6
6 2024-05-04 15:05:48 2686025. 1248322. 11 34.8
7 2024-05-04 15:05:59 2686050. 1248346. 7 21.2
8 2024-05-04 15:06:06 2686064. 1248362. 5 14.7
9 2024-05-04 15:06:11 2686075. 1248372. 3 7.21
10 2024-05-04 15:06:14 2686081. 1248376. 7 16.3
# ℹ 24 more rows
To calculate speed, we can just divide steplength
(meters) by timelag
(seconds).
<- track |>
track mutate(
speed = steplength / timelag
)
To calculate the turning angle:
<- function(x, y) {
get_turning_angle
= lag(x) - x
v1_x = lag(y) - y
v1_y = lead(x) - x
v2_x = lead(y) - y
v2_y
= v1_x * v2_x + v1_y * v2_y
dot = sqrt(v1_x^2 + v1_y^2)
mag1 = sqrt(v2_x^2 + v2_y^2)
mag2 = pmax(pmin(dot / (mag1 * mag2), 1), -1)
cos_theta = acos(cos_theta) * 180 / pi
angle
# We are interested in the offset to a straight line 180°
180 - angle
}
<- track |>
track mutate(
angle = get_turning_angle(X, Y)
)
To visualize the distribution of our speed and turning angles, we can pivot our data and use ggplot2
.
library(tidyr)
|>
track pivot_longer(c(speed, angle)) |>
ggplot() +
geom_boxplot(aes(x = name, y= value)) +
facet_wrap(~name, scale = "free")
To simulate locations, we can offset X
and Y
by a random value from a normal distribution. The parameter sd
determines how much offset we want the simulated point to have. With asd
of 5:
- 68.27% of all values will be offset +/- 5 Meters (\(1\sigma\))
- 99.73% of all values will be offset +/- 15 Meters (\(3\sigma\))
# This simulation is for illustration purposes only
<- track |>
track_sim mutate(
X = X + rnorm(X, sd = 5),
Y = Y + rnorm(Y, sd = 5)
)
ggplot(track, aes(X, Y)) +
geom_point(col = "black") +
geom_path() +
geom_point(data = track_sim, col = "red") +
geom_path(data = track_sim, col = "red") +
coord_equal()
To do muliple simulations, we recommend using the map_dfr
function from purrr
. This function works like a loop, is just more concise and easier to write:
library(purrr)
<- map_dfr(seq_len(100), \(x){
tracks_mc |>
track mutate(
X = X + rnorm(X, sd = 3),
Y = Y + rnorm(Y, sd = 3),
# To keep track of the iteration
iteration = x
) })
Now we can calculate timelag, steplength, speed, turning angle as before. The difference is, that we need to group by iteration
so that we dont calculate values across different simulations.
<- tracks_mc |>
tracks_mc group_by(iteration) |>
mutate(
timelag = get_timelag(time),
steplength = get_distance(X, Y),
speed = steplength / timelag,
angle = get_turning_angle(X, Y)
)
To compare the simulated data with the measured data, it’s best to combine them into a single dataframe. To differentiate the two datasets, we need an additional column type
:
$type <- "simulated"
tracks_mc$type <- "measured"
track
<- bind_rows(tracks_mc, track) tracks_bind
First, let’s take a look at the data spatially:
# Again, we need to group by iteration, so that the path is not drawn
# across simulations:
ggplot(tracks_bind, aes(X, Y, group = iteration, color = type)) +
geom_point() +
geom_path()
Now we can compare speed and angles:
|>
tracks_bind pivot_longer(c(speed, angle)) |>
ggplot() +
geom_boxplot(aes(x = type, y= value)) +
facet_wrap(~name, scale = "free")
To do multiple simulations, wen can just do two nested map_dfr
:
# the outer loop takes care of the offset distance
<- map_dfr(c(1, 3, 9), \(dist){
tracks_mc map_dfr(seq_len(100), \(x){
|>
track mutate(
X = X + rnorm(X, sd = dist),
Y = Y + rnorm(Y, sd = dist),
# to keep track of the distance
type = paste("simulated",dist),
# to keep track of the iteration
iteration = x
)
}) })
As before, we can calculate timelag, steplength, speed, turning angle.
<- tracks_mc |>
tracks_mc mutate(
timelag = get_timelag(time),
steplength = get_distance(X, Y),
speed = steplength / timelag,
angle = get_turning_angle(X, Y)
)
Bind the rows together:
<- bind_rows(tracks_mc, track) tracks_bind
First, let’s look at a spatial visualisation of our data
|>
tracks_bind ggplot(aes(X, Y, group = iteration)) +
geom_point(alpha = .1) +
geom_path(alpha = .1) +
coord_equal() +
facet_wrap(~type, nrow = 1)
|>
tracks_bind pivot_longer(c(speed, angle)) |>
ggplot() +
geom_boxplot(aes(x = type, y= value)) +
facet_wrap(~name, scale = "free")