This analysis addresses one of the core objectives of the study published in April 2014 Royal Society Publishing. The first hypothesis is that there will be significant differences in movement characteristics between populations, individuals, migration seasons and geographical locations. This hypothesis leads to the prediction that populations engaging in long-distance migration will travel faster than those migrating shorter distances.
This code transforms the data provided by MakeoverMonday on data.world on January 21, 2018. It averages the daily travel for each vulture, determines how far the bird is from the start and end of its migration path, and assumes a vulture is migrating if it is more than 13.5 km from the start or end of the path. The 13.5 km benchmark comes from a study published in BioOne in 2011. Distances were calculated using a the longitude and latitude coordinates in a Cosine function. The data was then downloaded to a csv file for use in final visualization in Tableau, here.
#the data.world package provides easy acces to the data
library(data.world)
# Datasets are referenced by their URL or path
dataset_key <- "https://data.world/makeovermonday/2018-w-4-turkey-vulture-migration-in-north-and-south-america"
# List tables available for SQL queries
tables_qry <- data.world::qry_sql("SELECT * FROM Tables")
tables_df <- data.world::query(tables_qry, dataset = dataset_key)
# See what is in it
tables_df$tableName
if (length(tables_df$tableName) > 0) {
sample_qry <- data.world::qry_sql(sprintf("SELECT * FROM `%s`", tables_df$tableName[[1]]))
sample_df <- data.world::query(sample_qry, dataset = dataset_key)
sample_df
}
library(ggplot2)
library(lubridate)
library(FSA) #to calculate cumsum
library(geosphere) # to calculate distance between 2 points on earth
library(plyr) #to join distance calc to data
library(dplyr)
library(ggplot2)
library(lubridate)
library(FSA) #to calculate cumsum
library(geosphere) # to calculate distance between 2 points on earth
library(dplyr)
vulture2 <- sample_df %>%
dplyr::select(event_id,deployment_id,
location_long, location_lat,
study_local_timestamp,
animal_comments,
study_site) %>%
filter(!deployment_id == "42500-Schaumboch") %>%
filter(animal_comments == "migratory") %>%
mutate(deployment_id = as.factor(deployment_id)) %>%
mutate(animal_comments = as.factor(animal_comments)) %>%
mutate(date = as.Date(study_local_timestamp))
vulture3 <- vulture2 %>%
group_by(deployment_id, date)%>%
summarize(avg_lat = mean(location_lat),
avg_long = mean(location_long))
vulture3$ID <- seq.int(nrow(vulture3))
vulture4 <- vulture3 %>%
group_by(deployment_id) %>%
mutate(lat1 = lag(avg_lat,1)) %>%
mutate(lat2 = avg_lat) %>%
mutate(dlat = lat2-lat1) %>%
mutate(long1 = lag(avg_long,1)) %>%
mutate(long2 = avg_long) %>%
mutate(dlong = long2 - long1) %>%
mutate(date1 = as.numeric(date)) %>%
mutate(day1 = lag(date1, 1)) %>%
mutate(day2 = date1) %>%
mutate(days_btwn_obs =
day2 - day1) %>%
ungroup()
vulture4$direction <- ifelse(vulture4$dlat >0, "northbound", "southbound")
p <- vulture4 %>%
dplyr::select(ID, long1, long2, lat1, lat2)
#these next 2 lines eliminate na's so we can do distance calc
#na's replaced with identical values so distance becomes zero
p$long1[is.na(p$long1)] <- p$long2[is.na(p$long1)]
p$lat1[is.na(p$lat1)] <- p$lat2[is.na(p$lat1)]
sum(is.na(p[,2]))
sum(is.na(p[,4]))
p1 <- p %>% dplyr::select(long1, lat1)
p2 <- p %>% dplyr::select(long2, lat2)
dist <- (distCosine(p1,p2)/1000)
p <- cbind(p, dist)
p3 <- p %>% dplyr::select(ID, dist)
library(plyr) # to get the join function
speed <- vulture4 %>%
join(p3, by="ID") %>%
dplyr::select(ID, deployment_id, date,
direction, dist, days_btwn_obs,
avg_lat, avg_long,
lat1, lat2, long1, long2, day1, day2) %>%
mutate(year = year(date))
detach(package:plyr) # it interferes with the grouping
speed2 <- speed %>%
group_by(year, deployment_id)%>%
mutate(apex = max(avg_lat)) %>%
mutate(nadir = min(avg_lat)) %>%
mutate(km_per_day = dist/days_btwn_obs)%>%
ungroup()
library(lattice) #to make a bunch of histograms - one per bird!
histogram(~speed2$km_per_day | speed2$deployment_id,
type = "count", main="KM/Day flown by each vulture")
#now - the following code will identify if bird is roosting or
#or migrating by looking at locations that are more than a
# minimum of 1 days flight from the apex and nadir. These will
# be considered migratory legs
speed2$km_per_day[is.na(speed2$km_per_day)] <- 0
speed3 <-speed2 %>%
group_by(deployment_id) %>%
filter(km_per_day >0) %>%
mutate(min_days_flight = min(km_per_day)) %>%
mutate(first_quartile = quantile(km_per_day, 0.25))%>%
ungroup()
#This df, speed4, is an extract that will link long & lat for
#the apex and nadir of the bird's migration path
speed4 <- speed3 %>%
filter(avg_lat == apex | avg_lat==nadir) %>%
dplyr::select(ID, deployment_id, date,
avg_long, avg_lat, apex, nadir)
speed4$terminus <- ifelse(speed4$avg_lat == speed4$apex, "apex",
"nadir")
#These next 2 lines pull the longitude for apex & nadir
speed4$apex_long <- ifelse(speed4$terminus=="apex", speed4$avg_long, NA)
speed4$nadir_long <- ifelse(speed4$terminus=="nadir",
speed4$avg_long, NA)
#check
sum(is.na(speed4$apex_long))
sum(is.na(speed4$nadir_long))
p.apex <- speed4 %>%
filter(terminus == "apex") %>%
dplyr::select(ID, date, deployment_id,
apex_long, apex)%>%
mutate(year = year(date)) %>%
dplyr::select(ID, deployment_id,year, apex_long, apex)
p.nadir <- speed4 %>%
filter(terminus == "nadir") %>%
dplyr::select(ID, date, deployment_id,
nadir_long, nadir) %>%
mutate(year = year(date)) %>%
dplyr::select(ID, deployment_id, year, nadir_long, nadir)
#now that we have all coordinates merge them back to the
#the daily summary data and then calculate distances from
#apex and nadir of the journey
speed5 <- merge(speed3, p.apex, by=c("deployment_id",
"year"))
speed5 <- merge(speed5, p.nadir, by=c("deployment_id",
"year"))
speed6 <- speed5 %>%
dplyr::select(ID, deployment_id, year, date,
direction, dist, days_btwn_obs,
avg_lat, avg_long, apex.x, nadir.x,
apex_long, nadir_long, km_per_day,
min_days_flight, first_quartile)
p.apex.dist <- speed6 %>%
dplyr::select(apex_long, apex.x)
p.nadir.dist <- speed6%>%
dplyr::select(nadir_long, nadir.x)
p.bird <- speed6 %>%
dplyr::select(avg_long, avg_lat)
#see how far the bird is from term points in journey
apex.bird.dist <- distCosine(p.apex.dist, p.bird)/1000
nadir.bird.dist <- distCosine(p.nadir.dist, p.bird)/1000
speed7 <- cbind(speed6, apex.bird.dist)
speed7 <- cbind(speed7, nadir.bird.dist)
#Use the minimum days flight to determine if roosting or migrating
speed7$min_status <- ifelse((speed7$apex.bird.dist -
speed7$min_days_flight > 0) &
(speed7$nadir.bird.dist -
speed7$min_days_flight > 0), "migrating", "roosting")
speed7$quartile_status <- ifelse((speed7$apex.bird.dist -
speed7$first_quartile > 0) &
(speed7$nadir.bird.dist -
speed7$first_quartile > 0),
"migrating", "roosting")
speed7$km_status13.5 <- ifelse((speed7$apex.bird.dist - 13.5 >0) &
(speed7$nadir.bird.dist - 13.5>0),
"migrating", "roosting")
table(speed7$min_status)
table(speed7$quartile_status)
table(speed7$km_status13.5)