#clear working space
rm(list=ls())
#Import necessary libraries
library(readr)
library(dplyr)
library(stringr)
library(tidyverse)
library(lubridate)
library(scales)
library(ggtext)
library(ggplot2)
#Import Data
Nutcracker <- read_csv("00_data/NuCra_Davos_all_data_2025-02-07_V2.csv")
# remove outliers
remove_ids <- c(48325, 15381, 15382, 15383, 45363)
Nutcracker <- Nutcracker |> filter(!...1 %in% remove_ids)Data_Exploratory_GEO880
Setup
Data structure
#Data structure
str(Nutcracker)spc_tbl_ [49,758 × 34] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ...1 : num [1:49758] 1 2 3 4 5 6 7 8 9 10 ...
$ id : chr [1:49758] ".458" ".458" ".458" ".458" ...
$ hdop : num [1:49758] 1.7 2.8 1.9 1.1 1.4 1.3 1.2 3.6 3.1 3 ...
$ longitude : num [1:49758] 9.91 9.91 9.91 9.87 9.89 ...
$ latitude : num [1:49758] 46.8 46.8 46.8 46.8 46.8 ...
$ altitude : num [1:49758] 2073 2072 2071 1998 1998 ...
$ satellites : chr [1:49758] "6/6" "5/5" "6/6" "6/7" ...
$ datetime : POSIXct[1:49758], format: "2022-08-02 11:00:10" "2022-08-02 12:00:11" ...
$ tag.type : chr [1:49758] "vhf" "vhf" "vhf" "vhf" ...
$ year : num [1:49758] 2022 2022 2022 2022 2022 ...
$ month : chr [1:49758] "08" "08" "08" "08" ...
$ season : chr [1:49758] "summer" "summer" "summer" "summer" ...
$ brutzeit : chr [1:49758] "no" "no" "no" "no" ...
$ ring_no : chr [1:49758] "K125908" "K125908" "K125908" "K125908" ...
$ bag : num [1:49758] 26 26 26 26 26 26 26 26 26 26 ...
$ bird_and_bag : num [1:49758] 233 233 233 233 233 233 233 233 233 233 ...
$ weight : num [1:49758] 207 207 207 207 207 207 207 207 207 207 ...
$ wing_length : num [1:49758] 191 191 191 191 191 191 191 191 191 191 ...
$ bill_depth : num [1:49758] 16.3 16.3 16.3 16.3 16.3 ...
$ bill_length : num [1:49758] 46.4 46.4 46.4 46.4 46.4 ...
$ tarsus_length : num [1:49758] 42 42 42 42 42 ...
$ feathers : chr [1:49758] "yes" "yes" "yes" "yes" ...
$ stage.at.capture : chr [1:49758] "juvenile" "juvenile" "juvenile" "juvenile" ...
$ photo : num [1:49758] 0 0 0 0 0 0 0 0 0 0 ...
$ datetime.at.capture : POSIXct[1:49758], format: "2022-08-02 09:36:00" "2022-08-02 09:36:00" ...
$ timediff : num [1:49758] 0 -1 -2 -8 -10 ...
$ steplength : num [1:49758] 0 19.4 26.1 2716.8 1490.7 ...
$ stepsize.from.last.hour: num [1:49758] 0 19.4 0 0 0 ...
$ stage.current : chr [1:49758] "juvenile" "juvenile" "juvenile" "juvenile" ...
$ id.stage : chr [1:49758] ".458.juvenile" ".458.juvenile" ".458.juvenile" ".458.juvenile" ...
$ date : Date[1:49758], format: "2022-08-02" "2022-08-02" ...
$ ndays.new : num [1:49758] 33 33 33 33 33 33 33 33 33 33 ...
$ n.datapoints : num [1:49758] 183 183 183 183 183 183 183 183 183 183 ...
$ dates.spanned.per.year : num [1:49758] 33 33 33 33 33 33 33 33 33 33 ...
- attr(*, "spec")=
.. cols(
.. ...1 = col_double(),
.. id = col_character(),
.. hdop = col_double(),
.. longitude = col_double(),
.. latitude = col_double(),
.. altitude = col_double(),
.. satellites = col_character(),
.. datetime = col_datetime(format = ""),
.. tag.type = col_character(),
.. year = col_double(),
.. month = col_character(),
.. season = col_character(),
.. brutzeit = col_character(),
.. ring_no = col_character(),
.. bag = col_double(),
.. bird_and_bag = col_double(),
.. weight = col_double(),
.. wing_length = col_double(),
.. bill_depth = col_double(),
.. bill_length = col_double(),
.. tarsus_length = col_double(),
.. feathers = col_character(),
.. stage.at.capture = col_character(),
.. photo = col_double(),
.. datetime.at.capture = col_datetime(format = ""),
.. timediff = col_double(),
.. steplength = col_double(),
.. stepsize.from.last.hour = col_double(),
.. stage.current = col_character(),
.. id.stage = col_character(),
.. date = col_date(format = ""),
.. ndays.new = col_double(),
.. n.datapoints = col_double(),
.. dates.spanned.per.year = col_double()
.. )
- attr(*, "problems")=<externalptr>
#Number and names of variables
length(Nutcracker)[1] 34
names(Nutcracker) [1] "...1" "id"
[3] "hdop" "longitude"
[5] "latitude" "altitude"
[7] "satellites" "datetime"
[9] "tag.type" "year"
[11] "month" "season"
[13] "brutzeit" "ring_no"
[15] "bag" "bird_and_bag"
[17] "weight" "wing_length"
[19] "bill_depth" "bill_length"
[21] "tarsus_length" "feathers"
[23] "stage.at.capture" "photo"
[25] "datetime.at.capture" "timediff"
[27] "steplength" "stepsize.from.last.hour"
[29] "stage.current" "id.stage"
[31] "date" "ndays.new"
[33] "n.datapoints" "dates.spanned.per.year"
#number of birds
unique(Nutcracker$id) [1] ".458" ".468" "0500" "0504" "0516" "0519" "061c" "062b" "0630" "0633"
[11] "063a" "063b" "19" "20" "21" "22" "24" "25" "27" "28"
[21] "29" "30" "31" "32" "33" "36" "37" "5449" "5450" "5451"
[31] "5454" "5995" "5996" "5997" "5998" "5999" "6000" "6001" "6002" "6003"
[41] "6004" "6005" "6006" "6447" "6448" "6449" "6450" "6451" "6452" "6453"
[51] "6454" "6455" "6456" "6457" "6458" "6459" "6460" "6461" "6462" "6463"
[61] "6464" "6465" "6466" "6467" "6468" "6522" "6523" "6524" "6525" "6526"
[71] "6527" "6528" "7314" "7315" "7316" "7317" "7318" "7319" "7320" "7321"
[81] "7322" "7323" "7324" "7325" "7326" "7327" "7328" "7329" "7330" "7331"
[91] "7332" "7930" "7931" "7932" "7933" "7934" "7935" "7936" "7937" "7938"
[101] "7939" "7940" "7941" "7942" "7943" "7944" "7945" "7946" "7947" "7948"
[111] "7949" "7950" "85" "88" "89"
About this chunk:
This dataset contains 34 variables, including: Bird_ID, individual metadata, GPS locations and additional information such as whether a datapoint is collected during breeding season or not.
The dataset contains 115 individuals
Number of data points
#total
nrow(Nutcracker)[1] 49758
#per individual
counts <- table(Nutcracker$id)
# extract names
ids <- names(counts)
values <- as.vector(counts)
n <- length(ids)
cols <- 3
rows <- ceiling(n / cols)
#create matrix
mat <- matrix(NA, nrow = rows, ncol = cols)
for (i in 1:n) {
row <- ((i - 1) %% rows) + 1
col <- ((i - 1) %/% rows) + 1
mat[row, col] <- sprintf("**%s**: %d", ids[i], values[i])
}
#print
apply(mat, 1, function(row) {
cat(paste(ifelse(is.na(row), "", row), collapse = " | "), "\n")
})**.458**: 183 | **6003**: 1134 | **7320**: 216
**.468**: 282 | **6004**: 642 | **7321**: 72
**0500**: 197 | **6005**: 189 | **7322**: 244
**0504**: 660 | **6006**: 60 | **7323**: 325
**0516**: 336 | **6447**: 724 | **7324**: 201
**0519**: 187 | **6448**: 784 | **7325**: 118
**061c**: 109 | **6449**: 137 | **7326**: 247
**062b**: 114 | **6450**: 10 | **7327**: 241
**0630**: 15 | **6451**: 248 | **7328**: 259
**0633**: 190 | **6452**: 906 | **7329**: 1132
**063a**: 37 | **6453**: 2735 | **7330**: 582
**063b**: 5 | **6454**: 592 | **7331**: 288
**19**: 924 | **6455**: 1966 | **7332**: 156
**20**: 405 | **6456**: 1085 | **7930**: 3
**21**: 377 | **6457**: 127 | **7931**: 45
**22**: 705 | **6458**: 561 | **7932**: 38
**24**: 981 | **6459**: 478 | **7933**: 57
**25**: 106 | **6460**: 1654 | **7934**: 306
**27**: 392 | **6461**: 336 | **7935**: 331
**28**: 1100 | **6462**: 937 | **7936**: 7
**29**: 35 | **6463**: 160 | **7937**: 35
**30**: 64 | **6464**: 126 | **7938**: 112
**31**: 37 | **6465**: 2039 | **7939**: 86
**32**: 264 | **6466**: 81 | **7940**: 103
**33**: 526 | **6467**: 828 | **7941**: 41
**36**: 79 | **6468**: 196 | **7942**: 77
**37**: 231 | **6522**: 1199 | **7943**: 20
**5449**: 13 | **6523**: 1 | **7944**: 309
**5450**: 616 | **6524**: 858 | **7945**: 34
**5451**: 123 | **6525**: 2436 | **7946**: 37
**5454**: 45 | **6526**: 166 | **7947**: 152
**5995**: 166 | **6527**: 774 | **7948**: 59
**5996**: 63 | **6528**: 1396 | **7949**: 226
**5997**: 12 | **7314**: 800 | **7950**: 3
**5998**: 109 | **7315**: 1070 | **85**: 984
**5999**: 736 | **7316**: 951 | **88**: 212
**6000**: 127 | **7317**: 227 | **89**: 188
**6001**: 2224 | **7318**: 86 |
**6002**: 640 | **7319**: 98 |
NULL
#per year
Nutcracker |>
count(year) |>
ggplot(aes(x = year, y = n)) +
geom_col(fill = "orange") +
labs(title = "Number of datapoints per year", x = "year", y = "counts") +
theme_minimal()#per month
Nutcracker |>
count(month) |>
ggplot(aes(x = month, y = n)) +
geom_col(fill = "orange") +
labs(title = "Number of datapoints per month", x = "month", y = "counts") +
theme_minimal()#per month and year
Nutcracker |>
mutate(month = factor(month, levels = sprintf("%02d", 1:12))) |>
count(year, month) |>
ggplot(aes(x = month, y = n, fill = as.factor(year))) +
geom_col(position = "dodge") +
labs(title = "Number datapoints per year and month", x = "month", y = "counts", fill = "Year") +
theme_minimal()About this chunk:
In total 49758 datapoints
Highly variable number of datapoints per individual
Highly variable number of datapoints per year & month
Age classes and time spans
# Ageclasses
zeitspanne <- Nutcracker |>
group_by(id) |>
summarize(
start = as.Date(min(datetime)),
ende = as.Date(max(datetime)),
alter = first(stage.at.capture) # Alter pro Vogel
) |>
filter(year(start) >= 2017) |>
mutate(alter = case_when(
alter %in% c("A", "adult") ~ "adult",
alter %in% c("J", "juvenile", "sub") ~ "juvenile",
TRUE ~ "unbekannt"
)) |>
arrange(start)
# Plot
ggplot(zeitspanne, aes(x = alter, y = n, fill = alter)) +
geom_col(width = 0.6) +
scale_fill_manual(values = c("juvenile" = "tomato", "adult" = "steelblue", "unbekannt" = "gray")) +
labs(
title = "Number of datapoints from juvenile and adult birds",
x = "Age",
y = "Number of datapoints",
fill = "Age class"
) +
theme_minimal()# Plot with age and timespan
ggplot(zeitspanne, aes(y = reorder(id, start), color = alter)) +
geom_linerange(aes(xmin = start, xmax = ende), linewidth = 1.2) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
scale_color_manual(values = c("juvenile" = "tomato", "adult" = "steelblue", "unbekannt" = "gray")) +
labs(title = "Time span of each bird", x = "Year", y = "Bird ID", color = "Age class") +
theme_minimal()About this chunk:
more datapoints of adult birds than from juveniles
large differences in time span between birds, some over multiple years, some over a few days
GPS-Shedules
# Filter years, hours and minutes
Nutcracker |>
filter(year >= 2017) |>
mutate(hour = as.numeric(format(datetime, "%H")),
minute = as.numeric(format(datetime, "%M")),
year = as.numeric(format(datetime, "%Y")),
month = as.numeric(format(datetime, "%m"))) -> Nutcracker_filtered
# Loop for plot creation
for (current_year in unique(Nutcracker_filtered$year)) {
# Filter data by year
data_year <- Nutcracker_filtered |>
filter(year == current_year)
# create plots
plot <- ggplot(data_year, aes(x = hour + minute / 60)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black", alpha = 0.7) +
facet_wrap(~ month, ncol = 3, scales = "free_y") + # Facet by month
labs(title = paste("Daytime distribution of GPS-Datapoints", current_year),
x = "Daytime (hour)", y = "number of datapoints") +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 24, 5), limits = c(0, 24)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# show plot
print(plot)
}#differences in GPS-Shedules
id_group <- c("5450", "6459", "7944")
# filter by id
p <- Nutcracker |>
filter(id %in% id_group) |>
arrange(id, datetime) |>
group_by(id) |>
mutate(time_diff = as.numeric(difftime(datetime, lag(datetime), units = "mins"))) |>
filter(!is.na(time_diff), time_diff < 180) |>
ggplot(aes(x = time_diff)) +
geom_histogram(binwidth = 5, fill = "tomato", color = "black") +
facet_wrap(~ id, scales = "free_y") +
labs(
title = paste("Time between two datapoints for ID's:", paste(id_group, collapse = ", ")),
x = "Time (minutes)", y = "Frecuency"
) +
theme_minimal()
# show Plot
print(p)About this chunk:
different shedules between years and months
different timesteps between individuals
Conclusion:
The Nutcracker consists of 49758 GPS-locations collected by 115 individuals over multiple years. Each datapoint additionally holds information about the bird, body measurements and some additional information (34 variables). Due to the nature of the data and different research focuses in previous years, the number of datapoints per individual, season and year are highly variable. Additionally, due to battery restrictions, the timesteps between two datapoints, as well as the shedules, based on which the datapoints were collected, are also variable. This implications have to be kept in mind, when working with the data.