# add weekday column
<- data |>
data mutate(day = as.Date(time), weekday = weekdays(time))
Week 4 Exercise A: Segmentation
Read and Tidy Data
Create a weekday column.
Add coordinates as separate columns (in degrees). Change geometry column from degrees to metres.
Ensure correct CRS.
# geometry in metres, not degrees
<- st_transform(data, crs=2056)
data
# add coordinates as separate columns (in metres)
<- st_coordinates(data)
coord
# join
<- cbind(data, coord) data_coord
There are 28 duplicated values in the time column. The duplicated values containing no speed value (cmt) will be removed.
# there are 28 duplicated
|>
data_coord st_drop_geometry(data_coord) |>
select(track_seg_point_id, time, cmt, weekday) |>
filter(duplicated(time)) |>
::kable(format = "html") |>
knitrkable_styling(font_size = 4)
track_seg_point_id | time | cmt | weekday |
---|---|---|---|
6 | 2024-04-01 10:29:38 | NA | Monday |
11 | 2024-04-01 11:09:37 | NA | Monday |
14 | 2024-04-01 11:29:38 | NA | Monday |
239 | 2024-04-06 23:26:22 | NA | Saturday |
244 | 2024-04-07 00:07:06 | NA | Sunday |
251 | 2024-04-07 01:06:40 | NA | Sunday |
254 | 2024-04-07 02:26:23 | NA | Sunday |
257 | 2024-04-07 02:46:24 | NA | Sunday |
260 | 2024-04-07 03:06:37 | NA | Sunday |
273 | 2024-04-07 05:04:08 | NA | Sunday |
526 | 2024-04-10 06:44:25 | NA | Wednesday |
813 | 2024-04-13 00:41:43 | NA | Saturday |
820 | 2024-04-13 01:41:41 | NA | Saturday |
829 | 2024-04-13 03:01:52 | NA | Saturday |
831 | 2024-04-13 03:21:45 | NA | Saturday |
846 | 2024-04-13 05:41:43 | NA | Saturday |
852 | 2024-04-13 06:41:48 | NA | Saturday |
858 | 2024-04-13 07:21:46 | NA | Saturday |
877 | 2024-04-13 10:01:43 | NA | Saturday |
1005 | 2024-04-14 19:26:44 | NA | Sunday |
1012 | 2024-04-14 20:26:46 | NA | Sunday |
1023 | 2024-04-14 22:06:42 | NA | Sunday |
1026 | 2024-04-14 22:26:43 | NA | Sunday |
1029 | 2024-04-14 22:46:43 | NA | Sunday |
1197 | 2024-04-28 08:01:53 | NA | Sunday |
1202 | 2024-04-28 08:41:52 | NA | Sunday |
1205 | 2024-04-28 09:01:51 | NA | Sunday |
1214 | 2024-04-28 10:21:52 | NA | Sunday |
# remove the duplicated rows
<- data_coord |>
data_coord distinct(time, .keep_all=TRUE)
Select a sample of two days and ensure correct CRS.
<- data_coord |>
data_sample st_as_sf(coords = c("X", "Y"), crs = 2056, remove = FALSE) |>
filter(
>= "2024-04-01",
day < "2024-04-03"
day
)
<- st_drop_geometry(data_sample) data_no_geo
Calculate time lag.
<- data_no_geo |>
data_no_geo arrange(time) |> # make sure order is correct
mutate(timestamp = difftime_secs(lead(time), time))
Check timelags.
count_na | mean_interval | median_interval | min_interval | max_interval | count_timestamp | count_ts_larger_15 |
---|---|---|---|---|---|---|
0 | 1419.659 | 600 | 3 | 43198 | 92 | 7 |
There are 7 of total 92 time lag values which are larger than 15 minutes. Interestingly, the mean time lag is around 23 minutes, which indicates that the high outliers have a high influence on the mean value. Therefore, I calculated the median as well, which is 10 minutes. Data gaps occur because when the cat is at home, the GPS does not record any data. At the same time, it happens sometimes that the cat goes outside without the GPS tacker, which might have happened in the night from April 1 to April 2 (data gap of around 12 hours).
Task 1: Calculate Distances
Step a): Specify a temporal window v
The sampling interval should be 10 minutes. I will use a temporal window of 40 minutes which includes 4 fixes. Step b calculates the Euclidean distances.
Step b): Measure the distance (m) to every point within v
We can use the function distance_by_element from week 2 in combination with lead() and lag() to calculate the Euclidean distance.
<- data_sample |>
data_sample mutate(
nMinus2 = distance_by_element(lag(geometry, 2), geometry), # distance -20 minutes
nMinus1 = distance_by_element(lag(geometry, 1), geometry), # distance -10 minutes
nPlus1 = distance_by_element(geometry, lead(geometry, 1)), # distance +10 mintues
nPlus2 = distance_by_element(geometry, lead(geometry, 2)) # distance +20 minutes
)
Now we want to calculate the mean distance of nMinus2, nMinus1, nPlus1, nPlus2 for each row.
<- data_sample |>
data_sample rowwise() |>
mutate(
stepMean = mean(c(nMinus2, nMinus1, nPlus1, nPlus2))
|>
) ungroup()
Task 2: Specify and apply threshold d
Exploratory Data Analysis
Figure 2 shows that the median value of the stepMean is around 74. There are more counts on the smaller stepMean spectrum.
Summary of column stepMean.
summary(data_sample$stepMean)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
10.99 50.08 71.89 79.14 103.81 191.79 4
= 50 # use first quantile as threshold
threshold
# store information of static or not static in a new column as TRUE or FALSE
<- data_sample |>
data_sample mutate(static = stepMean < threshold)
#mean(data_sample$stepMean, na.rm=TRUE)
The first quantile of stepMean is around 50, which is chosen as threshold for classification as «static».
Task 3: Visualise segmented trajectories
<- data_sample |>
data_filter filter(!static) # filter the ones which are not static
Task 4: Segment-based analysis
Function for creating unique ID for each subtrajectories.
<- function(vec) {
rle_id <- rle(vec)$lengths
x as.factor(rep(seq_along(x), times = x))
}
Create unique ID for each subtrajectories and filter the subtrajectories which are not static.
<- data_sample |>
data_sample mutate(segment_id = rle_id(static))
<- data_sample |>
data_filter2 filter(!static) # filter the ones which are not static
Then use segment_id as a grouping variable to determine the segments duration and remove short segments (e.g. segments with a duration < xx minutes).
<- data_filter2 |>
data_filter2 arrange(time) |> # make sure order is correct
group_by(segment_id) |> # group it by segment_id
mutate(seg_duration = difftime_secs(lead(time), time)) # calculate time difference
Calculate the duration per subtrajectory.
|>
data_filter2 st_drop_geometry(data_filter2) |>
group_by(segment_id) |>
summarise(seg_duration=sum(seg_duration, na.rm=TRUE)) |>
mutate(seg_duration_min = seg_duration/60) |>
::kable() knitr
segment_id | seg_duration | seg_duration_min |
---|---|---|
3 | 1800 | 30.00000 |
5 | 3607 | 60.11667 |
7 | 1244 | 20.73333 |
9 | 624 | 10.40000 |
11 | 18377 | 306.28333 |
13 | 2395 | 39.91667 |
15 | 866 | 14.43333 |
17 | 2984 | 49.73333 |
19 | 7798 | 129.96667 |
21 | 3604 | 60.06667 |
Remove the subtrajectories with values < 15 min (subtrajectories 9 and 15).
= data_filter2[!(data_filter2$segment_id %in% c("9", "15")), ] data_filter2
Visualise.