Monte Carlo Simulation with movement data

library(readr)

track <- read_csv("track.csv")

track

# A tibble: 34 × 3
   time                       X        Y
   <dttm>                 <dbl>    <dbl>
 1 2024-05-04 15:05:02 2685915. 1248194.
 2 2024-05-04 15:05:09 2685920. 1248218.
 3 2024-05-04 15:05:18 2685942. 1248244.
 4 2024-05-04 15:05:25 2685959. 1248260.
 5 2024-05-04 15:05:36 2685988. 1248290.
 6 2024-05-04 15:05:48 2686025. 1248322.
 7 2024-05-04 15:05:59 2686050. 1248346.
 8 2024-05-04 15:06:06 2686064. 1248362.
 9 2024-05-04 15:06:11 2686075. 1248372.
10 2024-05-04 15:06:14 2686081. 1248376.
# ℹ 24 more rows

library(ggplot2)


ggplot(track, aes(X, Y)) +
  geom_point() +
  geom_path() +
  coord_equal()

To determine the timelag (i.e. the time in between consecutive samples) we can use lead() from dplyr

library(dplyr)

get_timelag <- \(x) as.numeric(difftime(lead(x), x, units = "secs"))


track <- track |> 
  mutate(
    timelag = get_timelag(time)
  )

track

# A tibble: 34 × 4
   time                       X        Y timelag
   <dttm>                 <dbl>    <dbl>   <dbl>
 1 2024-05-04 15:05:02 2685915. 1248194.       7
 2 2024-05-04 15:05:09 2685920. 1248218.       9
 3 2024-05-04 15:05:18 2685942. 1248244.       7
 4 2024-05-04 15:05:25 2685959. 1248260.      11
 5 2024-05-04 15:05:36 2685988. 1248290.      12
 6 2024-05-04 15:05:48 2686025. 1248322.      11
 7 2024-05-04 15:05:59 2686050. 1248346.       7
 8 2024-05-04 15:06:06 2686064. 1248362.       5
 9 2024-05-04 15:06:11 2686075. 1248372.       3
10 2024-05-04 15:06:14 2686081. 1248376.       7
# ℹ 24 more rows

Similarly, to get the euclidean distance between two sample, we can use lead together with the formula for 2D euclidean distance:

get_distance <- \(x,y){
  sqrt((x - lead(x))^2 + (y  - lead(y))^2)
}


track <- track |> 
  mutate(
    steplength = get_distance(X, Y)
  )

track

# A tibble: 34 × 5
   time                       X        Y timelag steplength
   <dttm>                 <dbl>    <dbl>   <dbl>      <dbl>
 1 2024-05-04 15:05:02 2685915. 1248194.       7      23.9 
 2 2024-05-04 15:05:09 2685920. 1248218.       9      33.5 
 3 2024-05-04 15:05:18 2685942. 1248244.       7      24.1 
 4 2024-05-04 15:05:25 2685959. 1248260.      11      41.9 
 5 2024-05-04 15:05:36 2685988. 1248290.      12      48.6 
 6 2024-05-04 15:05:48 2686025. 1248322.      11      34.8 
 7 2024-05-04 15:05:59 2686050. 1248346.       7      21.2 
 8 2024-05-04 15:06:06 2686064. 1248362.       5      14.7 
 9 2024-05-04 15:06:11 2686075. 1248372.       3       7.21
10 2024-05-04 15:06:14 2686081. 1248376.       7      16.3 
# ℹ 24 more rows

To calculate speed, we can just divide steplength (meters) by timelag (seconds).

track <- track |> 
  mutate(
    speed = steplength / timelag
  )

To calculate the turning angle:

get_turning_angle <- function(x, y) {
  
  v1_x = lag(x) - x
  v1_y = lag(y) - y
  v2_x = lead(x) - x
  v2_y = lead(y) - y
  
  dot = v1_x * v2_x + v1_y * v2_y
  mag1 = sqrt(v1_x^2 + v1_y^2)
  mag2 = sqrt(v2_x^2 + v2_y^2)
  cos_theta = pmax(pmin(dot / (mag1 * mag2), 1), -1)
  angle = acos(cos_theta) * 180 / pi
  
  # We are interested in the offset to a straight line 180°
  180 - angle
}

track <- track |> 
  mutate(
    angle = get_turning_angle(X, Y)
  )

To visualize the distribution of our speed and turning angles, we can pivot our data and use ggplot2.

library(tidyr)
track |> 
  pivot_longer(c(speed, angle)) |> 
  ggplot() +
  geom_boxplot(aes(x = name, y= value)) +
  facet_wrap(~name, scale = "free")

To simulate locations, we can offset X and Y by a random value from a normal distribution. The parameter sd determines how much offset we want the simulated point to have. With asd of 5:

68.27% of all values will be offset +/- 5 Meters (\(1\sigma\))
99.73% of all values will be offset +/- 15 Meters (\(3\sigma\))

# This simulation is for illustration purposes only
track_sim <- track |> 
  mutate(
    X = X + rnorm(X, sd = 5),
    Y = Y + rnorm(Y, sd = 5)
  )

ggplot(track, aes(X, Y)) +
  geom_point(col = "black") +
  geom_path() +
  geom_point(data = track_sim, col = "red") +
  geom_path(data = track_sim, col = "red") +
  coord_equal()

To do muliple simulations, we recommend using the map_dfr function from purrr. This function works like a loop, is just more concise and easier to write:

library(purrr)

tracks_mc <- map_dfr(seq_len(100), \(x){
  track |> 
  mutate(
    X = X + rnorm(X, sd = 3),
    Y = Y + rnorm(Y, sd = 3),
    # To keep track of the iteration
    iteration = x
  )
})

Now we can calculate timelag, steplength, speed, turning angle as before. The difference is, that we need to group by iteration so that we dont calculate values across different simulations.

tracks_mc <- tracks_mc |> 
  group_by(iteration) |> 
  mutate(
    timelag = get_timelag(time),
    steplength = get_distance(X, Y),
    speed = steplength / timelag,
    angle = get_turning_angle(X, Y)    
  )

To compare the simulated data with the measured data, it’s best to combine them into a single dataframe. To differentiate the two datasets, we need an additional column type:

tracks_mc$type <- "simulated"
track$type <- "measured"

tracks_bind <- bind_rows(tracks_mc, track)

First, let’s take a look at the data spatially:

# Again, we need to group by iteration, so that the path is not drawn
# across simulations:
ggplot(tracks_bind, aes(X, Y, group = iteration, color = type)) +
  geom_point() +
  geom_path()

Now we can compare speed and angles:

tracks_bind |> 
  pivot_longer(c(speed, angle)) |> 
  ggplot() +
  geom_boxplot(aes(x = type, y= value)) +
  facet_wrap(~name, scale = "free")

To do multiple simulations, wen can just do two nested map_dfr:

# the outer loop takes care of the offset distance
tracks_mc <- map_dfr(c(1, 3, 9), \(dist){ 
  map_dfr(seq_len(100), \(x){
    track |> 
    mutate(
      X = X + rnorm(X, sd = dist),
      Y = Y + rnorm(Y, sd = dist),
      # to keep track of the distance
      type = paste("simulated",dist),
      # to keep track of the iteration
      iteration = x
    )
  })
})

As before, we can calculate timelag, steplength, speed, turning angle.

tracks_mc <- tracks_mc |> 
  mutate(
    timelag = get_timelag(time),
    steplength = get_distance(X, Y),
    speed = steplength / timelag,
    angle = get_turning_angle(X, Y)    
  )

Bind the rows together:

tracks_bind <- bind_rows(tracks_mc, track)

First, let’s look at a spatial visualisation of our data

tracks_bind |> 
  ggplot(aes(X, Y, group = iteration)) +
  geom_point(alpha = .1) +
  geom_path(alpha = .1) +
  coord_equal() +
  facet_wrap(~type, nrow = 1)

tracks_bind |> 
  pivot_longer(c(speed, angle)) |> 
  ggplot() +
  geom_boxplot(aes(x = type, y= value)) +
  facet_wrap(~name, scale = "free")