knitr::opts_chunk$set(warning=FALSE, message=FALSE, fig.align = 'center')
options(tidyverse.quiet = TRUE)
library(readxl)
library(janitor)
library(glmmTMB)
library(lubridate)
library(broom.mixed)
library(knitr)
library(ggpubr)
library(sjPlot)
library(sf)
library(leaflet)
library(raster)
library(DHARMa)
library(ggspatial)
library(cowplot)
library(ggpubr)
library(mapview)
theme_set(theme_minimal())

1 Survey design

  • The objective of the study was to quantify spatio-temporal changes in fish density after a massive seabream escape event in July 2014.

  • Surveys were conducted in 14 July 2014, five days after the escape event, and then in August 2014 and September 2014.

  • Density was determined by snorkel-based dive surveys at eight different locations.

  • At each location two sites were surveyed, with six replicates per site, except Gorguel in July (12 replicates) and Escombreras in August and September when no surveys were conducted.

  • Sampling units consisted of 500 m2 transects sampled visually by snorkelers swimming 50 m in a straight line and observing the area within 2.5 m on either side (5 x 100 m2).

  • Snorkelers estimated the density and the size of each fish in the transect.

# get map of Murcia---
murcia <- 
  read_sf('C:/Users/javiera/OneDrive - Cawthron/UA/Consumo/data/CCAA/Comunidades_Autonomas_ETRS89_30N.shp') %>% 
  filter(Texto_Alt == "Murcia")

locations <- st_read("data/coords_gorguel.kml", quiet = T)

esc_d <- 
  read_csv('data/gorguel_density_data_clean.csv', show_col_types = FALSE) %>% 
  mutate(month = fct_relevel(month, c("July","August"))) 
esc_d %>% 
  tabyl(month, localidad) %>% 
  kable(caption = 'Transectos por Localidad y mes') 
Transectos por Localidad y mes
month Atamaría Azohía Cabo Palos Calacortina Escombreras Gorguel Portman Portus
July 12 12 12 12 12 24 12 12
August 12 12 12 12 0 12 12 12
September 12 12 12 12 0 12 12 12

2 Study locations

leaflet(locations) %>%
  setView(lng = -1,
          lat = 37.6,
          zoom = 11) %>%
  addTiles() %>%
  addProviderTiles("Esri.WorldImagery") %>%
  addCircleMarkers()

2.1 Bathymetry

murcia_bathy_cropped <- raster('data/murcia_bathy_cropped.grd')
mapview(murcia_bathy_cropped)@map

2.2 Map version 2

sf_use_s2(FALSE)
Spherical geometry (s2) switched off
murcia <- 
  read_sf('C:/Users/javiera/OneDrive - Cawthron/UA/Consumo/data/CCAA/Comunidades_Autonomas_ETRS89_30N.shp') %>% 
  filter(Texto_Alt == "Murcia") %>% 
  st_transform (4326) %>% 
  st_crop(
    xmin = -1.2,
    xmax = 0.7,
    ymin = 37.5,
    ymax = 37.65
  ) 
although coordinates are longitude/latitude, st_intersection assumes that they are planar

3 Data exploration

3.1 Distance

Locations were at increasing distance, in both direction (E and W), from the farm where the escape was originated.

esc_d %>% 
  distinct(localidad, distance, orientation) %>% 
  arrange(orientation, distance) %>% 
  kable(caption = "Distancia y direccion de cada sitio")
Distancia y direccion de cada sitio
localidad distance orientation
Gorguel 0.7 E
Portman 4.4 E
Atamaría 10.1 E
Cabo Palos 23.7 E
Escombreras 6.4 W
Calacortina 12.6 W
Portus 26.4 W
Azohía 45.3 W

3.2 Density distribution

Fish density data was standardized to number of fish per 100 m2, resulting in continuous variable containing a disproportionately high number of zero values (73.6% of the 276 transects) and the non-zero data was highly overdispersed.

ggarrange(
  ggplot(esc_d, aes(density)) +
  geom_histogram() + labs(subtitle = 'Untrasnformed'),
  ggplot(esc_d, aes(density)) +
  geom_histogram() +
  scale_x_continuous(trans = 'log1p') + labs(subtitle = 'log1p transformed')
  )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

3.3 Density by Month

ggplot(esc_d, aes(month, density, fill = month)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_boxplot(alpha = .4) +
  labs(x = 'Distance (m)' , y = Individuals~per~100~m^2, parse = T) +
  scale_fill_discrete(name = NULL)

3.4 Density by location, month and site

ggplot(esc_d, aes(month, density)) +
  geom_point(alpha = .5, size = 3, aes(color = factor(sitio)), position =     position_jitter(width = .1)) +
  scale_y_continuous(trans = "log1p", expand = c(0, Inf), ) +
  # scale_x_date(date_labels = "%b %d") +
  facet_wrap(~localidad) 

3.5 Density vs distance, month and direccion (E -W)

ggplot(esc_d, aes(distance, density, color = month, group = month)) +
  scale_y_continuous(trans = "log1p") +
  stat_summary(fun.data = mean_se) +
  stat_summary(fun.data = mean_se, geom = 'line') +
  facet_wrap(~orientation, scales = 'free_x') +
  theme(legend.position = 'bottom') +
  labs(x = 'Distance from escape (km)' , y = Number~of~fish~per~100~m^2, parse = T) +
  scale_color_discrete(name = NULL)

3.6 Distance no direction

ggplot(esc_d, aes(distance, density, color = month, group = month)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_point(alpha = .4, position = position_jitter(width = .5)) +
  geom_smooth(stat = 'summary', fun.data = mean_se, alpha = .2) +
  theme(legend.position = 'bottom') +
  labs(x = 'Distance from escape (km)' , y = Individuals~per~100~m^2, parse = T) +
  scale_color_discrete(name = NULL)

3.7 Density by Orientation

ggplot(esc_d, aes(month, density, fill = orientation)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_boxplot(alpha = .4) +
  labs(x = NULL , y = Individuals~per~100~m^2, parse = T) +
  scale_fill_discrete(name = NULL)

4 Fish density modelling

Fish count data was standardised to number of fish per 100 m2, resulting in continuous variable that was overdispered and contained a large number of zero values. As such, density data was analysed using generalised linear models fitted with Tweedie error distribution which is more robust to overdispersed and zero-rich data than other distribution families (e.g. negative binomial, gamma). Zero-inflation was tested with the testZeroInflation function of the package DHARMa that compares the observed number of zeros with the zeros expected from simulations.

Models were fitted with distance a continuous covariate, month as categorical with three levels (July, August and September), and their interaction (Distance x Month). The main effect of Orientation (W and E) was also included as a categorical fixed effect. Site nested in location was included a random factor to quatify the variability in density at a small spatial scale (100s m). Models were selected based on the AIC values and validated using simulated residulas using the simulateResiduals function in the package DHARMa.

The contribution of fixed and random effects to the performance of the model was calculated using marginal R2 (accounting for fixed effects only) and conditional pseudo-R2 (accounting for fixed and random effects, Nakagawa and Schielzeth, 2013).

4.1 Test for zero-inflation

zi_model <-
  glmmTMB(
    density ~ distance * month + orientation + (1|localidad:sitio),
    zi =  ~ distance + month,
    family = tweedie(link = "log"),
    data = esc_d
  )

testZeroInflation(zi_model)

    DHARMa zero-inflation test via comparison to expected zeros with
    simulation under H0 = fitted model

data:  simulationOutput
ratioObsSim = 0.98653, p-value = 0.816
alternative hypothesis: two.sided

glance(zi_model)

The test showed that the expected distribution of zeros is not larger than the observed values, thus there is no need to incorporate zero-inflation in the model.

4.2 GLMM without zero-inflation

m1 <-
  glmmTMB(
    density ~ distance * month + orientation + (1|localidad:sitio),
    family = tweedie(link = "log"),
    data = esc_d
  )


drop1(m1)
Single term deletions

Model:
density ~ distance * month + orientation + (1 | localidad:sitio)
               Df    AIC
<none>            718.69
orientation     1 716.97
distance:month  2 721.76

The lowest AIC value (716.97) was for the model with the distance and month interaction and no effect of orientation (density ~ distance * month).

All AIC values were lower than that for the zero-inflated model.

4.3 Model validation

final_model <-
  glmmTMB(
    density ~ distance * month  + (1|localidad:sitio),
    family = tweedie(link = "log"),
    data = esc_d
  )

simulationOutput <- simulateResiduals(fittedModel = final_model, plot = F)
plot(simulationOutput)

# testResiduals(final_model)

There are no residual patterns or over-dispersion. The residuals test identified one outlier that could be potentially removed and some mild deviation that is not of concern.

4.4 Model inference

effect component group term estimate std.error statistic p.value
fixed cond NA (Intercept) 3.77 0.72 5.21 0.00
fixed cond NA distance -0.19 0.03 -7.13 0.00
fixed cond NA monthAugust -4.16 0.82 -5.07 0.00
fixed cond NA monthSeptember -5.23 1.06 -4.95 0.00
fixed cond NA distance:monthAugust 0.10 0.04 2.51 0.01
fixed cond NA distance:monthSeptember 0.11 0.06 1.72 0.09
ran_pars cond localidad:sitio sd__(Intercept) 0.78 NA NA NA

The model predicted an average fish density (intercept of the model) of 43 individuals per 100 m2 (10.49 – 178.96 95% CI) at the location of the escape. The density reduction rate varied significantly with sampling month (Distance x month, P<0.05).

Fish density significantly decreased with distance, more specifically by 17% for every km away from the escape location. Fish density was predicted to decrease to 2% and 1% after one and two month of the escape event, respectively

Additionally, there was relatively site-to-site variability in fish density, evidenced by the small standard deviation for the effect of site nested in location, and the small difference between the conditional the R2 value marginal value (0.38 and 0.41, respectively).

5 Fish size distribution

In addition to fish density, divers estimated total fish length of all fish in the transects.

size <- read_csv('data/size_data.csv')
size_raw <- read_csv('data/size_data_raw.csv')

5.1 Histograms of total fish length by month and location


ggplot(size_raw, aes(size, fill = localidad)) +
  geom_histogram(bins = 15) +
  facet_wrap(~month, scales = 'free_y') 

NA

5.2 Mean (±SD) total length by month and locality

ggplot(size_raw, aes(fct_relevel(month,"July","August"), size, color = fct_reorder(loc_dist, distance) , group = fct_reorder(loc_dist, distance) )) +
  stat_summary(fun.data = mean_sdl,   position = position_dodge(width = .7)) +
  scale_color_discrete(name = NULL) +
  labs(x = NULL, y = "Total fish length (cm)" )

5.3 Test for size between month

Length frequencies between months were compared using a randomization Kolmogorov & Smirnov test using the function clus.lf in the fishmethods package.

From package help file:

Length frequency distributions of fishes are commonly tested for differences among groups (e.g., regions, sexes, etc.) using a two-sample Kolmogov-Smirnov test (K-S). Like most statistical tests, the K-S test requires that observations are collected at random and are independent of each other to satisfy assumptions. These basic assumptions are violated when gears (e.g., trawls, haul seines, gillnets, etc.) are used to sample fish because individuals are collected in clusters . In this case, the “haul”, not the individual fish, is the primary sampling unit and statistical comparisons must take this into account.

library(fishmethods)

clus_res <- 
clus.lf(group=size$month,haul=paste(size$transecto,size$toponimia),
        len=size$size, number=size$number, 
        binsize=5,resamples=100)

clus_res$results %>% kable

There were significant differences in fish size frequency distribution between months, except between August and September.

---
title: "Escapes Gorguel"
author: "Javier Atalah"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
  html_notebook:
    df_print: paged
    number_section: yes
    theme: default
    toc: yes
    toc_depth: 2
    toc_float:
      collapsed: no
      smooth_scroll: yes
---

![](figures/logo-ua.jpg){width="307"}

```{r setup, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(warning=FALSE, message=FALSE, fig.align = 'center')
options(tidyverse.quiet = TRUE)
library(readxl)
library(janitor)
library(glmmTMB)
library(lubridate)
library(broom.mixed)
library(knitr)
library(ggpubr)
library(sjPlot)
library(sf)
library(leaflet)
library(raster)
library(DHARMa)
library(ggspatial)
library(cowplot)
library(ggpubr)
library(mapview)
theme_set(theme_minimal())
```

# Survey design

-   The objective of the study was to quantify spatio-temporal changes in fish density after a massive seabream escape event in July 2014.

-   Surveys were conducted in 14 July 2014, five days after the escape event, and then in August 2014 and September 2014.

-   Density was determined by snorkel-based dive surveys at eight different locations.

-   At each location two sites were surveyed, with six replicates per site, except Gorguel in July (12 replicates) and Escombreras in August and September when no surveys were conducted.

-   Sampling units consisted of 500 m^2^ transects sampled visually by snorkelers swimming 50 m in a straight line and observing the area within 2.5 m on either side (5 x 100 m^2^).

-   Snorkelers estimated the density and the size of each fish in the transect.

```{r}
# get map of Murcia---
murcia <- 
  read_sf('C:/Users/javiera/OneDrive - Cawthron/UA/Consumo/data/CCAA/Comunidades_Autonomas_ETRS89_30N.shp') %>% 
  filter(Texto_Alt == "Murcia")

locations <- st_read("data/coords_gorguel.kml", quiet = T)

esc_d <- 
  read_csv('data/gorguel_density_data_clean.csv', show_col_types = FALSE) %>% 
  mutate(month = fct_relevel(month, c("July","August"))) 

```

```{r}
esc_d %>% 
  tabyl(month, localidad) %>% 
  kable(caption = 'Transectos por Localidad y mes') 
```

# Study locations

```{r}
leaflet(locations) %>%
  setView(lng = -1,
          lat = 37.6,
          zoom = 11) %>%
  addTiles() %>%
  addProviderTiles("Esri.WorldImagery") %>%
  addCircleMarkers()
```

## Bathymetry

```{r}
murcia_bathy_cropped <- raster('data/murcia_bathy_cropped.grd')
mapview(murcia_bathy_cropped)@map
```

## Map version 2

```{r}
coords <- read_csv('data/coords.csv')
spain <- 
  read_sf('C:/Users/javiera/OneDrive - Cawthron/UA/Consumo/data/CCAA/Comunidades_Autonomas_ETRS89_30N.shp') %>% 
  filter(!Texto%in% c("Canarias","Ceuta","Melilla")) %>% 
  st_transform (4326) 

sf_use_s2(FALSE)
murcia <- 
  read_sf('C:/Users/javiera/OneDrive - Cawthron/UA/Consumo/data/CCAA/Comunidades_Autonomas_ETRS89_30N.shp') %>% 
  filter(Texto_Alt == "Murcia") %>% 
  st_transform (4326) %>% 
  st_crop(
    xmin = -1.2,
    xmax = 0.7,
    ymin = 37.5,
    ymax = 37.65
  ) 

sites_map <- 
ggplot() + 
  geom_sf(data = murcia, fill = 'gray90') +
  geom_point(data = coords, aes(X,Y)) +
  ggrepel::geom_label_repel(data = coords, aes(X,Y, label = localidad), color =1 , size = 2.5) +
  labs(x = NULL, y = NULL) +
  annotation_north_arrow(
    location = "br",
    which_north = "true",
    pad_x = unit(0.05, "in"),
    pad_y = unit(0.2, "in"),
    style = north_arrow_fancy_orienteering
  ) +
  annotation_scale(location = "br", width_hint = 0.15) 


spain_map <- 
  ggplot() + 
  geom_sf(data = spain, fill = 'gray90') +
  labs(x = NULL, y = NULL) +
  # coord_map(xlim = c(166, 179), ylim = c(-47.5, -34)) +
  annotate(
    "rect",
    xmin = -1.2,
    xmax = -0.7,
    ymin = 37.5,
    ymax = 37.7,
    color = 2,
    fill = 'transparent',
    size = 1
  ) +
  theme_void() +
  theme(
    panel.border = element_rect(color = 'gray30', fill= NA)
  ) 

fig_map <- 
  cowplot::ggdraw() +
  draw_plot(sites_map) +
  draw_plot(
    spain_map,
    x = 0.8,
    y = .75,
    width = .2,
    height = .2
  )
fig_map
```

# Data exploration

## Distance

Locations were at increasing distance, in both direction (E and W), from the farm where the escape was originated.

```{r}
esc_d %>% 
  distinct(localidad, distance, orientation) %>% 
  arrange(orientation, distance) %>% 
  kable(caption = "Distancia y direccion de cada sitio"
```

## Density distribution

Fish density data was standardized to number of fish per 100 m2, resulting in continuous variable containing a disproportionately high number of zero values (73.6% of the 276 transects) and the non-zero data was highly overdispersed.

```{r, fig.height=4, fig.width=10}
ggarrange(
  ggplot(esc_d, aes(density)) +
  geom_histogram() + labs(subtitle = 'Untrasnformed'),
  ggplot(esc_d, aes(density)) +
  geom_histogram() +
  scale_x_continuous(trans = 'log1p') + labs(subtitle = 'log1p transformed')
  )
```

## Density by Month

```{r}
ggplot(esc_d, aes(month, density, fill = month)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_boxplot(alpha = .4) +
  labs(x = 'Distance (m)' , y = Individuals~per~100~m^2, parse = T) +
  scale_fill_discrete(name = NULL)
```

## Density by location, month and site

```{r}
ggplot(esc_d, aes(month, density)) +
  geom_point(alpha = .5, size = 3, aes(color = factor(sitio)), position =     position_jitter(width = .1)) +
  scale_y_continuous(trans = "log1p", expand = c(0, Inf), ) +
  # scale_x_date(date_labels = "%b %d") +
  facet_wrap(~localidad) 
```

## Density vs distance, month and direccion (E -W)

```{r}
ggplot(esc_d, aes(distance, density, color = month, group = month)) +
  scale_y_continuous(trans = "log1p") +
  stat_summary(fun.data = mean_se) +
  stat_summary(fun.data = mean_se, geom = 'line') +
  facet_wrap(~orientation, scales = 'free_x') +
  theme(legend.position = 'bottom') +
  labs(x = 'Distance from escape (km)' , y = Number~of~fish~per~100~m^2, parse = T) +
  scale_color_discrete(name = NULL)

```

## Distance no direction

```{r}
ggplot(esc_d, aes(distance, density, color = month, group = month)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_point(alpha = .4, position = position_jitter(width = .5)) +
  geom_smooth(stat = 'summary', fun.data = mean_se, alpha = .2) +
  theme(legend.position = 'bottom') +
  labs(x = 'Distance from escape (km)' , y = Individuals~per~100~m^2, parse = T) +
  scale_color_discrete(name = NULL)

```

## Density by Orientation

```{r}
ggplot(esc_d, aes(month, density, fill = orientation)) +
  scale_y_continuous(trans = "log1p", breaks = c(0,10, 50, 250, 1000) ) +
  geom_boxplot(alpha = .4) +
  labs(x = NULL , y = Individuals~per~100~m^2, parse = T) +
  scale_fill_discrete(name = NULL)
```

# Fish density modelling

Fish count data was standardised to number of fish per 100 m2, resulting in continuous variable that was overdispered and contained a large number of zero values. As such, density data was analysed using generalised linear models fitted with Tweedie error distribution which is more robust to overdispersed and zero-rich data than other distribution families (e.g. negative binomial, gamma). Zero-inflation was tested with the testZeroInflation function of the package DHARMa that compares the observed number of zeros with the zeros expected from simulations.

Models were fitted with distance a continuous covariate, month as categorical with three levels (July, August and September), and their interaction (Distance x Month). The main effect of Orientation (W and E) was also included as a categorical fixed effect. Site nested in location was included a random factor to quatify the variability in density at a small spatial scale (100s m). Models were selected based on the AIC values and validated using simulated residulas using the simulateResiduals function in the package DHARMa.

The contribution of fixed and random effects to the performance of the model was calculated using marginal R2 (accounting for fixed effects only) and conditional pseudo-R2 (accounting for fixed and random effects, Nakagawa and Schielzeth, 2013).

## Test for zero-inflation

```{r}
zi_model <-
  glmmTMB(
    density ~ distance * month + orientation + (1|localidad:sitio),
    zi =  ~ distance + month,
    family = tweedie(link = "log"),
    data = esc_d
  )

testZeroInflation(zi_model)

glance(zi_model)
```

The test showed that the expected distribution of zeros is not larger than the observed values, thus there is no need to incorporate zero-inflation in the model.

## GLMM without zero-inflation

```{r}
m1 <-
  glmmTMB(
    density ~ distance * month + orientation + (1|localidad:sitio),
    family = tweedie(link = "log"),
    data = esc_d
  )


drop1(m1)

```

The lowest AIC value (716.97) was for the model with the distance and month interaction and no effect of orientation (density \~ distance \* month).

All AIC values were lower than that for the zero-inflated model.

## Model validation

```{r}
final_model <-
  glmmTMB(
    density ~ distance * month  + (1|localidad:sitio),
    family = tweedie(link = "log"),
    data = esc_d
  )

simulationOutput <- simulateResiduals(fittedModel = final_model, plot = F)
plot(simulationOutput)
# testResiduals(final_model)
```

There are no residual patterns or over-dispersion. The residuals test identified one outlier that could be potentially removed and some mild deviation that is not of concern.

## Model inference

```{r}
tidy(final_model) %>% kable(digits = 2)
```

The model predicted an average fish density (intercept of the model) of 43 individuals per 100 m^2^ (10.49 -- 178.96 95% CI) at the location of the escape. The density reduction rate varied significantly with sampling month (Distance x month, P\<0.05).

Fish density significantly decreased with distance, more specifically by 17% for every km away from the escape location. Fish density was predicted to decrease to 2% and 1% after one and two month of the escape event, respectively

Additionally, there was relatively site-to-site variability in fish density, evidenced by the small standard deviation for the effect of site nested in location, and the small difference between the conditional the R^2^ value marginal value (0.38 and 0.41, respectively).

# Fish size distribution

In addition to fish density, divers estimated total fish length of all fish in the transects.

```{r}
size <- read_csv('data/size_data.csv')
size_raw <- read_csv('data/size_data_raw.csv')
```

## Histograms of total fish length by month and location

```{r, fig.width=12}

ggplot(size_raw, aes(size, fill = localidad)) +
  geom_histogram(bins = 15) +
  facet_wrap(~month, scales = 'free_y') 
  
```

## Mean (±SD) total length by month and locality

```{r}
ggplot(size_raw, aes(fct_relevel(month,"July","August"), size, color = fct_reorder(loc_dist, distance) , group = fct_reorder(loc_dist, distance) )) +
  stat_summary(fun.data = mean_sdl,   position = position_dodge(width = .7)) +
  scale_color_discrete(name = NULL) +
  labs(x = NULL, y = "Total fish length (cm)" )

```

## Test for size between month

Length frequencies between months were compared using a randomization Kolmogorov & Smirnov test using the function *clus.lf* in the fishmethods package.

From package help file:

*Length frequency distributions of fishes are commonly tested for differences among groups (e.g., regions, sexes, etc.) using a two-sample Kolmogov-Smirnov test (K-S). Like most statistical tests, the K-S test requires that observations are collected at random and are independent of each other to satisfy assumptions. These basic assumptions are violated when gears (e.g., trawls, haul seines, gillnets, etc.) are used to sample fish because individuals are collected in clusters . In this case, the "haul", not the individual fish, is the primary sampling unit and statistical comparisons must take this into account.*

```{r}
library(fishmethods)

clus_res <- 
clus.lf(group=size$month,haul=paste(size$transecto,size$toponimia),
        len=size$size, number=size$number, 
        binsize=5,resamples=100)

clus_res$results %>% kable
```

There were significant differences in fish size frequency distribution between months, except between August and September.
