# add weekday column
data <- data |>
mutate(day = as.Date(time), weekday = weekdays(time))Week 4 Exercise A: Segmentation
Read and Tidy Data
Create a weekday column.
Add coordinates as separate columns (in degrees). Change geometry column from degrees to metres.
Ensure correct CRS.
# geometry in metres, not degrees
data <- st_transform(data, crs=2056)
# add coordinates as separate columns (in metres)
coord <- st_coordinates(data)
# join
data_coord <- cbind(data, coord)There are 28 duplicated values in the time column. The duplicated values containing no speed value (cmt) will be removed.
# there are 28 duplicated
data_coord |>
st_drop_geometry(data_coord) |>
select(track_seg_point_id, time, cmt, weekday) |>
filter(duplicated(time)) |>
knitr::kable(format = "html") |>
kable_styling(font_size = 4)| track_seg_point_id | time | cmt | weekday |
|---|---|---|---|
| 6 | 2024-04-01 10:29:38 | NA | Monday |
| 11 | 2024-04-01 11:09:37 | NA | Monday |
| 14 | 2024-04-01 11:29:38 | NA | Monday |
| 239 | 2024-04-06 23:26:22 | NA | Saturday |
| 244 | 2024-04-07 00:07:06 | NA | Sunday |
| 251 | 2024-04-07 01:06:40 | NA | Sunday |
| 254 | 2024-04-07 02:26:23 | NA | Sunday |
| 257 | 2024-04-07 02:46:24 | NA | Sunday |
| 260 | 2024-04-07 03:06:37 | NA | Sunday |
| 273 | 2024-04-07 05:04:08 | NA | Sunday |
| 526 | 2024-04-10 06:44:25 | NA | Wednesday |
| 813 | 2024-04-13 00:41:43 | NA | Saturday |
| 820 | 2024-04-13 01:41:41 | NA | Saturday |
| 829 | 2024-04-13 03:01:52 | NA | Saturday |
| 831 | 2024-04-13 03:21:45 | NA | Saturday |
| 846 | 2024-04-13 05:41:43 | NA | Saturday |
| 852 | 2024-04-13 06:41:48 | NA | Saturday |
| 858 | 2024-04-13 07:21:46 | NA | Saturday |
| 877 | 2024-04-13 10:01:43 | NA | Saturday |
| 1005 | 2024-04-14 19:26:44 | NA | Sunday |
| 1012 | 2024-04-14 20:26:46 | NA | Sunday |
| 1023 | 2024-04-14 22:06:42 | NA | Sunday |
| 1026 | 2024-04-14 22:26:43 | NA | Sunday |
| 1029 | 2024-04-14 22:46:43 | NA | Sunday |
| 1197 | 2024-04-28 08:01:53 | NA | Sunday |
| 1202 | 2024-04-28 08:41:52 | NA | Sunday |
| 1205 | 2024-04-28 09:01:51 | NA | Sunday |
| 1214 | 2024-04-28 10:21:52 | NA | Sunday |
# remove the duplicated rows
data_coord <- data_coord |>
distinct(time, .keep_all=TRUE)Select a sample of two days and ensure correct CRS.
data_sample <- data_coord |>
st_as_sf(coords = c("X", "Y"), crs = 2056, remove = FALSE) |>
filter(
day >= "2024-04-01",
day < "2024-04-03"
)
data_no_geo <- st_drop_geometry(data_sample)Calculate time lag.
data_no_geo <- data_no_geo |>
arrange(time) |> # make sure order is correct
mutate(timestamp = difftime_secs(lead(time), time))Check timelags.
| count_na | mean_interval | median_interval | min_interval | max_interval | count_timestamp | count_ts_larger_15 |
|---|---|---|---|---|---|---|
| 0 | 1419.659 | 600 | 3 | 43198 | 92 | 7 |
There are 7 of total 92 time lag values which are larger than 15 minutes. Interestingly, the mean time lag is around 23 minutes, which indicates that the high outliers have a high influence on the mean value. Therefore, I calculated the median as well, which is 10 minutes. Data gaps occur because when the cat is at home, the GPS does not record any data. At the same time, it happens sometimes that the cat goes outside without the GPS tacker, which might have happened in the night from April 1 to April 2 (data gap of around 12 hours).
Task 1: Calculate Distances
Step a): Specify a temporal window v
The sampling interval should be 10 minutes. I will use a temporal window of 40 minutes which includes 4 fixes. Step b calculates the Euclidean distances.
Step b): Measure the distance (m) to every point within v
We can use the function distance_by_element from week 2 in combination with lead() and lag() to calculate the Euclidean distance.
data_sample <- data_sample |>
mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance -20 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance -10 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance +10 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)) # distance +20 minutes
)Now we want to calculate the mean distance of nMinus2, nMinus1, nPlus1, nPlus2 for each row.
data_sample <- data_sample |>
rowwise() |>
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
) |>
ungroup()Task 2: Specify and apply threshold d
Exploratory Data Analysis
Figure 2 shows that the median value of the stepMean is around 74. There are more counts on the smaller stepMean spectrum.
Summary of column stepMean.
summary(data_sample$stepMean) Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
10.99 50.08 71.89 79.14 103.81 191.79 4
threshold = 50 # use first quantile as threshold
# store information of static or not static in a new column as TRUE or FALSE
data_sample <- data_sample |>
mutate(static = stepMean < threshold)
#mean(data_sample$stepMean, na.rm=TRUE)The first quantile of stepMean is around 50, which is chosen as threshold for classification as «static».
Task 3: Visualise segmented trajectories
data_filter <- data_sample |>
filter(!static) # filter the ones which are not staticTask 4: Segment-based analysis
Function for creating unique ID for each subtrajectories.
rle_id <- function(vec) {
x <- rle(vec)$lengths
as.factor(rep(seq_along(x), times = x))
}Create unique ID for each subtrajectories and filter the subtrajectories which are not static.
data_sample <- data_sample |>
mutate(segment_id = rle_id(static))
data_filter2 <- data_sample |>
filter(!static) # filter the ones which are not staticThen use segment_id as a grouping variable to determine the segments duration and remove short segments (e.g. segments with a duration < xx minutes).
data_filter2 <- data_filter2 |>
arrange(time) |> # make sure order is correct
group_by(segment_id) |> # group it by segment_id
mutate(seg_duration = difftime_secs(lead(time), time)) # calculate time differenceCalculate the duration per subtrajectory.
data_filter2 |>
st_drop_geometry(data_filter2) |>
group_by(segment_id) |>
summarise(seg_duration=sum(seg_duration, na.rm=TRUE)) |>
mutate(seg_duration_min = seg_duration/60) |>
knitr::kable()| segment_id | seg_duration | seg_duration_min |
|---|---|---|
| 3 | 1800 | 30.00000 |
| 5 | 3607 | 60.11667 |
| 7 | 1244 | 20.73333 |
| 9 | 624 | 10.40000 |
| 11 | 18377 | 306.28333 |
| 13 | 2395 | 39.91667 |
| 15 | 866 | 14.43333 |
| 17 | 2984 | 49.73333 |
| 19 | 7798 | 129.96667 |
| 21 | 3604 | 60.06667 |
Remove the subtrajectories with values < 15 min (subtrajectories 9 and 15).
data_filter2 = data_filter2[!(data_filter2$segment_id %in% c("9", "15")), ]Visualise.