#
# Life Artina - crawl interpolation v2
# mate.zec@biom.hr
# Aug 2022
#

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readxl)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(sp)
library(crawl)
## crawl 2.2.1 (2018-09-12) 
##  Demos and documentation can be found at our new GitHub repository:
##  https://dsjohnson.github.io/crawl_examples/
## 
## Attaching package: 'crawl'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
library(track2KBA)
## Registered S3 methods overwritten by 'adehabitatMA':
##   method                       from
##   print.SpatialPixelsDataFrame sp  
##   print.SpatialPixels          sp
# definitions -------------------------------------------------------------

# store needed CRS definitions as variables:

wgs84 <- st_crs("EPSG:4326")
htrs96 <- st_crs("EPSG:3765")


# where are all the files? ------------------------------------------------

# find Yelkouan .pos files by searching the data input folder recursively:

yelkouan_pos_2019 <- list.files(path = "data-input/Shearwater tagging/Yelkouan 2019/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)
yelkouan_pos_2020 <- list.files(path = "data-input/Shearwater tagging/Yelkouan 2020/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)
yelkouan_pos_2021 <- list.files(path = "data-input/Shearwater tagging/Yelkouan 2021/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)


# find Scopoli .pos files

scopoli_pos_2019 <- list.files(path = "data-input/Shearwater tagging/Scopoli 2019/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)
scopoli_pos_2020 <- list.files(path = "data-input/Shearwater tagging/Scopoli 2020/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)
scopoli_pos_2021 <- list.files(path = "data-input/Shearwater tagging/Scopoli 2021/",
                                pattern = ".pos",
                                recursive = TRUE,
                                include.dirs = TRUE,
                                full.names = TRUE)


colony_data <- read_excel(path = "data-input/MetaData_tagging_Artina.xlsx")
colony_data_spat <- st_as_sf(colony_data, coords = c("lon_colony", "lat_colony"), crs = wgs84) %>% 
  st_transform(crs = htrs96)
colony_coords_htrs <- st_coordinates(colony_data_spat)
colony_data$colony_X <- colony_coords_htrs[,1]
colony_data$colony_Y <- colony_coords_htrs[,2]


# reading the .pos files --------------------------------------------------

# function that fetches the data from individual files:

getposdata <- function(filelist, ...) {
  posnames <- c("day",  "month",    "year", "hour", "minute",   "second", 
                 "satellites",  "latitude", "longitude",    "altitude", 
                 "time_offset", "accuracy", "voltage")
  poscols <- "iiiiii_innnnnn__"
  list_of_pos_tables <- lapply(filelist, FUN = read_csv, 
                               col_types = poscols, 
                               col_names = posnames,
                               skip = 5)
  for (i in 1:length(filelist)) {
    # get id of tag from filename structure:
    tag_id <- gsub(".*?/(Tag.*?) - (.*?)/.*", "\\1_\\2", filelist[i])
    # get id of colony from filename structure:
    list_of_pos_tables[[i]]$colony_code <- gsub(".*?/(Tag.*?) - (.*?)-.*", "\\2", filelist[i])
    print(tag_id)
    list_of_pos_tables[[i]]$bird_id <- paste0(list_of_pos_tables[[i]]$year, "_",
                                              tag_id)
    }
  postable <- bind_rows(list_of_pos_tables) %>% 
    mutate(dttm = ymd_hms(paste(year, month, day, hour, minute, second, sep = "-")))
  return(postable)  
}


# yelkouan loading and preprocessing --------------------------------------

yelk2019 <- getposdata(yelkouan_pos_2019)
## [1] "Tag17600_Z-9"
## [1] "Tag17604_Z-7"
## [1] "Tag17617_Z-4 (2nd Parent)"
## [1] "Tag17644_Z-13"
## [1] "Tag17652_Z-2"
## [1] "Tag17704_Z-11"
## [1] "Tag17735_Z-3 (RAW DATA MISSING)"
## [1] "Tag40066_Z-14"
## [1] "Tag40069_Z-6"
## [1] "Tag40073_Z-1"
## [1] "Tag40078_Z-3 (2nd Parent)"
## [1] "Tag40086_Z-11 (2nd Parent)"
## [1] "Tag40094_Z-16"
## [1] "Tag40118_Z-2 (2nd Parent)"
## [1] "Tag40133_Z-15"
## [1] "Tag40138_Z-17"
## [1] "Tag40170_Z-12"
## [1] "Tag40177_Z-17 (2nd Parent)"
## [1] "Tag40182_Z-4"
yelk2020 <- getposdata(yelkouan_pos_2020)
## [1] "Tag17600_Z-170"
## [1] "Tag17604_Z-95"
## [1] "Tag17644_Z-106"
## [1] "Tag17677_Z-170 (2nd Parent)"
## [1] "Tag17700_VM-13"
## [1] "Tag17717_VM-8"
## [1] "Tag17724_Z-106 (2nd Parent)"
## [1] "Tag40014_VM-3 (2nd Parent)"
## [1] "Tag40024_Z-178 (2nd Parent)"
## [1] "Tag40039_Z-15 (2nd Parent)"
## [1] "Tag40073_Z-131 (2nd Parent)"
## [1] "Tag40078_Z-178"
## [1] "Tag40086_VM-3"
## [1] "Tag40094_Z-131"
## [1] "Tag40118_Z-175"
## [1] "Tag40133_Z-1 (2nd Parent)"
## [1] "Tag40138_VM-18"
## [1] "Tag40536_VM-8 (2nd Parent)"
## [1] "Tag40615_VM-23"
## [1] "Tag40817_VM-12"
## [1] "Tag40859_Z-179"
## [1] "Tag41108_Z-95 (2nd Parent)"
## [1] "Tag40193_Z-13 (2nd Parent)"
yelk2021 <- getposdata(yelkouan_pos_2021)
## [1] "Tag41220_VR-1"
yelk <- bind_rows(yelk2019, yelk2020, yelk2021) %>% 
  # filter(year != 21) %>% # misunderstanding, still a breeding bird
  filter(latitude != 0) %>% 
  filter(time_offset < 1000)

dupes <- yelk %>% dplyr::select(bird_id, dttm) %>% 
  duplicated()

yelk <- yelk %>% filter(!dupes)

yelk_xy <- st_as_sf(yelk, coords = c("longitude", "latitude"), crs = wgs84)
st_write(yelk_xy, "data-output/yelk_points.gpkg", append = FALSE)
## Deleting layer `yelk_points' using driver `GPKG'
## Writing layer `yelk_points' to data source 
##   `data-output/yelk_points.gpkg' using driver `GPKG'
## Writing 16692 features with 14 fields and geometry type Point.
yelk_xy_proj <- st_transform(yelk_xy, crs = htrs96)


# formatting for track2KBA ------------------------------------------------

# Zaklopatica
yelk_zaklop <- filter(yelk, colony_code == "Z") %>% 
  rename(DateTime = dttm,
         Latitude = latitude,
         Longitude = longitude,
         ID = bird_id)

colony_yelk_zaklop <- filter(colony_data, colony_code == "Z") %>% 
  dplyr::select(Latitude = lat_colony,
                Longitude = lon_colony)

trips_yelk_zaklop <- suppressWarnings(tripSplit(
  dataGroup = yelk_zaklop,
  colony = colony_yelk_zaklop,
  innerBuff = 3,
  returnBuff = 10,
  duration = 1,
  rmNonTrip = FALSE
))
## track 19_Tag17600_Z-901 starts out on trip
## track 19_Tag17600_Z-97 does not return to the colony
## track 19_Tag17604_Z-78 does not return to the colony
## track 19_Tag17617_Z-4 (2nd Parent)6 does not return to the colony
## track 19_Tag17644_Z-1301 starts out on trip
## track 19_Tag17652_Z-201 starts out on trip
## track 19_Tag17704_Z-119 does not return to the colony
## track 19_Tag17735_Z-3 (RAW DATA MISSING)01 starts out on trip
## track 19_Tag40066_Z-1413 does not return to the colony
## track 19_Tag40069_Z-601 starts out on trip
## track 19_Tag40073_Z-117 does not return to the colony
## track 19_Tag40078_Z-3 (2nd Parent)2 does not return to the colony
## track 19_Tag40086_Z-11 (2nd Parent)01 starts out on trip
## track 19_Tag40094_Z-162 does not return to the colony
## track 19_Tag40133_Z-1513 does not return to the colony
## track 19_Tag40138_Z-176 does not return to the colony
## track 19_Tag40170_Z-1213 does not return to the colony
## track 19_Tag40177_Z-17 (2nd Parent)01 starts out on trip
## track 19_Tag40182_Z-401 starts out on trip
## track 20_Tag17600_Z-17013 does not return to the colony
## track 20_Tag17644_Z-10601 starts out on trip
## track 20_Tag17644_Z-1067 does not return to the colony
## track 20_Tag40024_Z-178 (2nd Parent)15 does not return to the colony
## track 20_Tag40073_Z-131 (2nd Parent)01 starts out on trip
## track 20_Tag40073_Z-131 (2nd Parent)18 does not return to the colony
## track 20_Tag40078_Z-1785 does not return to the colony
## track 20_Tag40094_Z-13101 starts out on trip
## track 20_Tag40118_Z-17501 starts out on trip
## track 20_Tag40118_Z-17524 does not return to the colony
## track 20_Tag40133_Z-1 (2nd Parent)01 starts out on trip
## track 20_Tag40193_Z-13 (2nd Parent)01 starts out on trip
## track 20_Tag40859_Z-17901 starts out on trip
## track 20_Tag40859_Z-1793 does not return to the colony
## track 20_Tag41108_Z-95 (2nd Parent)16 does not return to the colony
# trips_yelk_zaklop <- subset(trips_yelk_zaklop, trips_yelk_zaklop$Returns == "Yes")

sumTrips <- tripSummary(trips = trips_yelk_zaklop, colony = colony_yelk_zaklop)
## Warning in tripSummary(trips = trips_yelk_zaklop, colony = colony_yelk_zaklop): Some trips did not return to the specified returnBuffer distance from the
##   colony. The return DateTime given for these trips refers to the last location
##   of the trip, and NOT the actual return time to the colony.
mapTrips(trips = trips_yelk_zaklop, colony = colony_yelk_zaklop)
## Too many individuals to plot. Only 25 IDs will be shown.
## Trips colored by completeness. Indicate colorBy=='trip' to color by
##     trips.

# Veli Maslovnjak
yelk_velmas <- filter(yelk, colony_code == "VM") %>% 
  rename(DateTime = dttm,
         Latitude = latitude,
         Longitude = longitude,
         ID = bird_id)

colony_yelk_velmas <- filter(colony_data, colony_code == "VM") %>% 
  dplyr::select(Latitude = lat_colony,
                Longitude = lon_colony)

trips_yelk_velmas <- suppressWarnings(tripSplit(
  dataGroup = yelk_velmas,
  colony = colony_yelk_velmas,
  innerBuff = 3,
  returnBuff = 10,
  duration = 1,
  rmNonTrip = FALSE
))
## track 20_Tag17700_VM-1301 starts out on trip
## track 20_Tag17700_VM-1316 does not return to the colony
## track 20_Tag17717_VM-801 starts out on trip
## track 20_Tag17717_VM-84 does not return to the colony
## track 20_Tag40014_VM-3 (2nd Parent)01 starts out on trip
## track 20_Tag40014_VM-3 (2nd Parent)19 does not return to the colony
## track 20_Tag40086_VM-301 starts out on trip
## track 20_Tag40138_VM-1814 does not return to the colony
## track 20_Tag40536_VM-8 (2nd Parent)01 starts out on trip
## track 20_Tag40615_VM-2301 starts out on trip
## track 20_Tag40615_VM-2312 does not return to the colony
## track 20_Tag40817_VM-1201 starts out on trip
# trips_yelk_velmas <- subset(trips_yelk_velmas, trips_yelk_velmas$Returns == "Yes")

sumTrips <- tripSummary(trips = trips_yelk_velmas, colony = colony_yelk_velmas)
## Warning in tripSummary(trips = trips_yelk_velmas, colony = colony_yelk_velmas): Some trips did not return to the specified returnBuffer distance from the
##   colony. The return DateTime given for these trips refers to the last location
##   of the trip, and NOT the actual return time to the colony.
mapTrips(trips = trips_yelk_velmas, colony = colony_yelk_velmas)
## Trips colored by completeness. Indicate colorBy=='trip' to color by
##     trips.

# interpolation of yelkouan tracks ----------------------------------------

# merging the trip data into a single table

trips_yelk <- rbind(trips_yelk_zaklop, trips_yelk_velmas)

# adding "activity" variable, setting non-trip locations as "inactive" (activity = 0)

trips_yelk_proj <- st_as_sf(trips_yelk) %>% 
  st_transform(crs = htrs96) %>% 
  mutate(x_proj = st_coordinates(.)[,1],
         y_proj = st_coordinates(.)[,2]) %>% 
  mutate(activity = ifelse(tripID == -1, 0, 1)) %>% 
  mutate(error_semi_major_axis = 50,
         error_semi_minor_axis = 50,
         error_ellipse_orientation = 0)    # adding error parameters


# calculating speed and filtering out more erroneous fixes ----------------

library(purrr)
library(furrr)
## Loading required package: future
plan(multisession)

# f to replace sdafilter() from the deprecated argosfilter package
speedCalc <- function(sftab) {
  coords <- st_coordinates(sftab)
  timediff <- as.difftime(0, units = "secs")
  distdiff <- 0
  for (i in 2:nrow(sftab)) {
    timediff <- append(timediff, sftab$DateTime[i] - sftab$DateTime[i-1])
    distdiff <- append(distdiff, sqrt((coords[i,1]-coords[i-1,1])^2 + (coords[i,2]-coords[i-1,2])^2))
  }
  speed_calc = distdiff / as.numeric(timediff, units = "secs")
  return(speed_calc*3.6)
}

trips_yelk_proj <- trips_yelk_proj %>% 
  arrange(ID, DateTime) %>% 
  group_by(ID) %>% 
  nest() %>% 
  mutate(speed_kmh = future_map(data, speedCalc)) %>% 
  unnest(cols = c(data, speed_kmh)) %>% 
  filter(speed_kmh <= 100)


# creating error columns --------------------------------------------------

trips_yelk_proj <- trips_yelk_proj %>% 
  arrange(DateTime) %>% 
  nest() %>% 
  mutate(data = future_map(data, st_as_sf)) %>% 
  mutate(diag = map(data, ~ argosDiag2Cov(.x$error_semi_major_axis, 
                                          .x$error_semi_minor_axis, 
                                          .x$error_ellipse_orientation)),
         data = map2(data, diag, bind_cols)) %>% 
  select(-diag)


# creating initial parameters ---------------------------------------------

# function init_params() nicked (almost) directly from 
# https://jmlondon.github.io/crawl-workshop/crawl-practical.html
init_params <- function(d) {
  if (any(colnames(d) == "x_proj") && any(colnames(d) == "y_proj")) {
    ret <- list(a = c(d$x_proj[1], 0,
                      d$y_proj[1], 0),
                P = diag(c(10 ^ 2, 10 ^ 2,
                           10 ^ 2, 10 ^ 2)))
  } else if (inherits(d,"sf")) {
    ret <- list(a = c(sf::st_coordinates(d)[[1,1]], 0,
                      sf::st_coordinates(d)[[1,2]], 0))
  }
  ret
} 

# need to verify if this is correct
fix_par = c(NA,    # 
            1,     # 
            NA,    # sigma
            1,    # beta -- auto-correlation parameter
            NA,     # activity -- provided in dataset, therefore should be set to 0?
            NA)    # no idea what this one is 🤷

fitCrawl <- function(d, fix_par) {
  fit <- crawl::crwMLE(mov.model = ~ 1,
                       err.model = list(x =  ~ ln.sd.x - 1,
                                        y =  ~ ln.sd.y - 1,
                                        rho =  ~ error.corr),
                       activity = ~ I(activity),
                       fixPar = fix_par,
                       data = d,
                       method = "Nelder-Mead",    # not sure what this means
                       Time.name = "DateTime",
                       control=list(trace=1, REPORT=1))
  return(fit)
}

# yelk_fit <- trips_yelk_proj %>% 
#   mutate(fit = future_pmap(list(d = data, fix_par = fix_par),
#                            fitCrawl),
#          params = map(fit, tidy_crwFit))
# 
# Error in `mutate()`:
#   ! Problem while computing `fit = future_pmap(list(d = data, fix_par =
#                                                       fix_par), fitCrawl)`.
# ℹ The error occurred in group 1: ID = "19_Tag17600_Z-9".
# Caused by error in `crawl::crwMLE()`:
#   ! 'fixPar' argument is not the right length! The number of parameters in the model is 5