01 Load libraries and data
library(tidyverse)
library(ggpubr)
library(vegan)
library(ggord)
library(ggpmisc)
library(readxl)
library(knitr)
library(pairwiseAdonis)
library(benthos)
library(corrplot)
source('theme_javier.R')
theme_set(theme_javier())
# polychaetes data
poli <-
read_excel('data/poliquetos.xlsx', 2) %>%
mutate(Distance = fct_relevel(Distance, "0m" , "50m", "250m", "2000m"),
Farm = fct_recode(Farm, `Farm 1` = "Ext", `Farm 2` = "Int"))
# sediment environmental data
env <-
read_excel('data/poliquetos.xlsx', 1) %>%
select(SampleID, TN, TC, MO, TFS)
02 Plot 12 most abundant families
poli_long <-
poli %>%
gather(taxa, abu, Capitellidae:Nephtyidae)
top_taxa <-
poli_long %>%
group_by(taxa) %>%
summarise_at(vars(abu), funs(sum), na.rm =T) %>%
top_n(12) %>%
dplyr::select(taxa) %>%
as.list()
## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.
## Selecting by abu
poli_long %>%
filter(taxa %in% top_taxa$taxa) %>%
ggplot() +
geom_boxplot(aes(Distance, abu, color = Farm)) +
facet_wrap( ~ taxa, scales = 'free', nrow = 3)

03 nMDS plot
# 04 MDS ordination -------------
mds <- metaMDS( sqrt(sqrt(poli[, -c(1:5)])) , distance = 'bray',)
## Run 0 stress 0.0804792
## Run 1 stress 0.08125617
## Run 2 stress 0.1209863
## Run 3 stress 0.1286267
## Run 4 stress 0.08223417
## Run 5 stress 0.08223417
## Run 6 stress 0.1214672
## Run 7 stress 0.08047792
## ... New best solution
## ... Procrustes: rmse 0.000402019 max resid 0.001375476
## ... Similar to previous best
## Run 8 stress 0.08223417
## Run 9 stress 0.1257958
## Run 10 stress 0.08047789
## ... New best solution
## ... Procrustes: rmse 7.97148e-05 max resid 0.0003084432
## ... Similar to previous best
## Run 11 stress 0.08346721
## Run 12 stress 0.0804528
## ... New best solution
## ... Procrustes: rmse 0.006998189 max resid 0.02245357
## Run 13 stress 0.08042585
## ... New best solution
## ... Procrustes: rmse 0.003082919 max resid 0.01061635
## Run 14 stress 0.1277171
## Run 15 stress 0.1245256
## Run 16 stress 0.081816
## Run 17 stress 0.08331201
## Run 18 stress 0.08223417
## Run 19 stress 0.08223417
## Run 20 stress 0.1246592
## *** No convergence -- monoMDS stopping criteria:
## 19: stress ratio > sratmax
## 1: scale factor of the gradient < sfgrmin
## MDS biplot
ggord(
mds,
grp_in = poli$Distance,
poly = F,
alpha = 1,
ellipse = T,
arrow = 0,
repel = T,
text = .01,
vec_ext = .7
) +
theme_javier()

03 PERMANOVA and SIMPER
permanova <-
adonis(
sqrt(sqrt(poli[,-c(1:5)])) ~ Distance * Farm,
method = 'bray',
permutations = 9999,
data = poli
)
permanova
##
## Call:
## adonis(formula = sqrt(sqrt(poli[, -c(1:5)])) ~ Distance * Farm, data = poli, permutations = 9999, method = "bray")
##
## Permutation: free
## Number of permutations: 9999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Distance 3 2.3760 0.79200 21.9170 0.71424 0.0001 ***
## Farm 1 0.1875 0.18745 5.1873 0.05635 0.0158 *
## Distance:Farm 3 0.1850 0.06166 1.7063 0.05560 0.1471
## Residuals 16 0.5782 0.03614 0.17381
## Total 23 3.3266 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# pairwise comparisons-------------
pairwise.adonis(
sqrt(sqrt(poli[,-c(1:5)])),
poli$Distance)
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0m vs 50m 1 0.9709690 17.913217 0.6417468 0.002 0.012 .
## 2 0m vs 250m 1 1.6477067 58.039808 0.8530272 0.001 0.006 *
## 3 0m vs 2000m 1 1.5316848 33.213406 0.7685903 0.003 0.018 .
## 4 50m vs 250m 1 0.2932122 5.990758 0.3746388 0.004 0.024 .
## 5 50m vs 2000m 1 0.1806801 2.710014 0.2132188 0.020 0.120
## 6 250m vs 2000m 1 0.1277467 3.126717 0.2381949 0.022 0.132
# SIMPER ---------------
simp_distance <- simper(sqrt(sqrt(poli[, -c(1:5)])), poli$Distance, permutations = 999)
simp_Farm <- simper(sqrt(sqrt(poli[, -c(1:5)])), poli$Farm, permutations = 999)
simper_dist_table <-
bind_rows("0m vs. 50m" = simp_distance$`0m_50m` %>% data.frame(),
"0m vs 250m" = simp_distance$`0m_250m` %>% data.frame(),
"0m vs 2000m" = simp_distance$`0m_2000m` %>% data.frame(),
"50m vs 250m" = simp_distance$`50m_250m` %>% data.frame(),
"50m vs. 2000m" = simp_distance$`50m_2000m` %>% data.frame(),
"250m vs 2000m" = simp_distance$`250m_2000m` %>% data.frame(),
.id = "Groups") %>%
dplyr::select(-overall, -ord) %>%
dplyr::filter(cusum<.7) %>%
rename(Family = "species")
kable(simper_dist_table, digits = 2,caption = "SIMPER table among distances")
SIMPER table among distances
| 0m vs. 50m |
Capitellidae |
0.17 |
0.04 |
4.04 |
4.60 |
1.73 |
0.28 |
0.00 |
| 0m vs. 50m |
Spionidae |
0.07 |
0.04 |
1.72 |
0.43 |
1.56 |
0.39 |
0.00 |
| 0m vs. 50m |
Paraonidae |
0.05 |
0.04 |
1.31 |
0.00 |
1.04 |
0.48 |
0.22 |
| 0m vs. 50m |
Dorvilleidae |
0.05 |
0.04 |
1.31 |
1.05 |
0.86 |
0.56 |
0.03 |
| 0m vs. 50m |
Cossuridae |
0.05 |
0.04 |
1.30 |
0.00 |
0.89 |
0.64 |
0.06 |
| 0m vs 250m |
Capitellidae |
0.12 |
0.03 |
3.51 |
4.60 |
1.71 |
0.16 |
0.00 |
| 0m vs 250m |
Spionidae |
0.07 |
0.03 |
2.37 |
0.43 |
2.12 |
0.29 |
0.00 |
| 0m vs 250m |
Paraonidae |
0.10 |
0.01 |
10.71 |
0.00 |
2.49 |
0.38 |
0.00 |
| 0m vs 250m |
Dorvilleidae |
0.04 |
0.03 |
1.20 |
1.05 |
0.73 |
0.48 |
0.52 |
| 0m vs 250m |
Cossuridae |
0.07 |
0.01 |
8.28 |
0.00 |
1.74 |
0.57 |
0.00 |
| 0m vs 250m |
Cirratulidae |
0.08 |
0.01 |
10.42 |
0.00 |
1.87 |
0.66 |
0.00 |
| 0m vs 2000m |
Capitellidae |
0.15 |
0.06 |
2.50 |
4.60 |
1.65 |
0.20 |
0.00 |
| 0m vs 2000m |
Spionidae |
0.07 |
0.04 |
1.70 |
0.43 |
1.76 |
0.31 |
0.00 |
| 0m vs 2000m |
Paraonidae |
0.07 |
0.01 |
6.06 |
0.00 |
1.49 |
0.41 |
0.00 |
| 0m vs 2000m |
Dorvilleidae |
0.05 |
0.04 |
1.23 |
1.05 |
0.22 |
0.51 |
0.04 |
| 0m vs 2000m |
Cossuridae |
0.08 |
0.02 |
5.12 |
0.00 |
1.72 |
0.60 |
0.00 |
| 0m vs 2000m |
Cirratulidae |
0.07 |
0.01 |
6.61 |
0.00 |
1.50 |
0.67 |
0.00 |
| 50m vs 250m |
Capitellidae |
0.01 |
0.01 |
1.26 |
1.73 |
1.71 |
0.14 |
1.00 |
| 50m vs 250m |
Spionidae |
0.02 |
0.01 |
1.59 |
1.56 |
2.12 |
0.24 |
0.95 |
| 50m vs 250m |
Paraonidae |
0.05 |
0.04 |
1.47 |
1.04 |
2.49 |
0.34 |
0.20 |
| 50m vs 250m |
Dorvilleidae |
0.03 |
0.02 |
1.23 |
0.86 |
0.73 |
0.43 |
0.95 |
| 50m vs 250m |
Cossuridae |
0.03 |
0.03 |
1.19 |
0.89 |
1.74 |
0.51 |
0.96 |
| 50m vs 250m |
Cirratulidae |
0.03 |
0.02 |
1.34 |
1.00 |
1.87 |
0.59 |
0.88 |
| 50m vs 250m |
Lumbrineridae |
0.04 |
0.03 |
1.35 |
0.67 |
1.68 |
0.66 |
0.21 |
| 50m vs. 2000m |
Capitellidae |
0.03 |
0.02 |
1.40 |
1.73 |
1.65 |
0.10 |
1.00 |
| 50m vs. 2000m |
Spionidae |
0.02 |
0.01 |
1.17 |
1.56 |
1.76 |
0.19 |
0.98 |
| 50m vs. 2000m |
Paraonidae |
0.04 |
0.03 |
1.21 |
1.04 |
1.49 |
0.28 |
0.98 |
| 50m vs. 2000m |
Dorvilleidae |
0.03 |
0.03 |
1.26 |
0.86 |
0.22 |
0.37 |
0.70 |
| 50m vs. 2000m |
Cossuridae |
0.04 |
0.03 |
1.28 |
0.89 |
1.72 |
0.46 |
0.68 |
| 50m vs. 2000m |
Cirratulidae |
0.02 |
0.03 |
0.90 |
1.00 |
1.50 |
0.54 |
0.98 |
| 50m vs. 2000m |
Lumbrineridae |
0.03 |
0.03 |
1.07 |
0.67 |
1.10 |
0.61 |
0.75 |
| 50m vs. 2000m |
Glyceridae |
0.02 |
0.02 |
0.74 |
0.86 |
0.92 |
0.67 |
0.99 |
| 250m vs 2000m |
Capitellidae |
0.02 |
0.01 |
1.45 |
1.71 |
1.65 |
0.11 |
1.00 |
| 250m vs 2000m |
Spionidae |
0.02 |
0.01 |
1.43 |
2.12 |
1.76 |
0.21 |
1.00 |
| 250m vs 2000m |
Paraonidae |
0.03 |
0.02 |
2.02 |
2.49 |
1.49 |
0.29 |
1.00 |
| 250m vs 2000m |
Dorvilleidae |
0.02 |
0.02 |
0.99 |
0.73 |
0.22 |
0.36 |
0.99 |
| 250m vs 2000m |
Cossuridae |
0.01 |
0.01 |
1.87 |
1.74 |
1.72 |
0.42 |
1.00 |
| 250m vs 2000m |
Cirratulidae |
0.01 |
0.01 |
1.41 |
1.87 |
1.50 |
0.48 |
1.00 |
| 250m vs 2000m |
Lumbrineridae |
0.02 |
0.02 |
1.04 |
1.68 |
1.10 |
0.54 |
1.00 |
| 250m vs 2000m |
Glyceridae |
0.02 |
0.02 |
1.09 |
0.80 |
0.92 |
0.60 |
0.99 |
| 250m vs 2000m |
Pilargidae |
0.03 |
0.02 |
1.25 |
1.12 |
0.60 |
0.65 |
0.95 |
04 PERMDIST Multivariate homogeneity of groups dispersions
dis <- vegdist(sqrt(sqrt(poli[, -c(1:5)])))
beta_disp <-
betadisper(
dis, poli$Distance
)
## Warning in betadisper(dis, poli$Distance): some squared distances are negative
## and changed to zero
beta_disp
##
## Homogeneity of multivariate dispersions
##
## Call: betadisper(d = dis, group = poli$Distance)
##
## No. of Positive Eigenvalues: 13
## No. of Negative Eigenvalues: 10
##
## Average distance to median:
## 0m 50m 250m 2000m
## 0.1598 0.2135 0.1334 0.2161
##
## Eigenvalues for PCoA axes:
## (Showing 8 of 23 eigenvalues)
## PCoA1 PCoA2 PCoA3 PCoA4 PCoA5 PCoA6 PCoA7 PCoA8
## 2.31922 0.47072 0.31873 0.15357 0.11520 0.09259 0.06986 0.05349
anova(beta_disp)
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.030015 0.0100051 1.1843 0.3408
## Residuals 20 0.168968 0.0084484
permutest(beta_disp, pairwise = TRUE, permutations = 999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 3 0.030015 0.0100051 1.1843 999 0.327
## Residuals 20 0.168968 0.0084484
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## 0m 50m 250m 2000m
## 0m 0.465000 0.381000 0.086
## 50m 0.457807 0.275000 0.968
## 250m 0.369678 0.267179 0.016
## 2000m 0.103298 0.971048 0.014834
plot(beta_disp, main = 'Multivariate homogeneity of groups dispersions')

05 Univariate diversity indices
indices <-
poli %>%
transmute(N = rowSums(poli[, -c(1:5)]),
H = diversity(poli[, -c(1:5)]),
S = specnumber(poli[, -c(1:5)]),
J = H/log(S)) %>%
bind_cols(poli[, c(1:5)]) %>%
write_csv('outputs/univariate_poli_indices.csv')
boxplots <-
indices %>%
gather(index, value, c("N", "S", "H", "J")) %>%
mutate(index = fct_relevel(index, c("N", "S", "J"))) %>%
ggplot(., aes(x = Distance, y = value, fill = Farm)) +
geom_boxplot(alpha = .8) +
facet_wrap(~index,scales = 'free') +
theme_bw()
boxplots
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

indices_dat <-
indices %>%
mutate(N = log(N + 1)) %>%
gather(index, value, c("N", "S", "H", "J")) %>%
group_by(index) %>%
nest() %>%
mutate(
anova = map(.x = data, ~ aov(value ~ Distance * Farm, data = .x)),
anova_tab = map(anova, ~car::Anova(.)),
anova_table = map(anova_tab, broom::tidy),
residuals = map(anova, broom::augment))
anova_table <-
indices_dat %>%
select(index,anova_table) %>%
unnest()
## Warning: `cols` is now required.
## Please use `cols = c(anova_table)`
kable(anova_table,caption = 'Univariate ANOVAs')
Univariate ANOVAs
| N |
Distance |
23.4024283 |
3 |
60.2330129 |
0.0000000 |
| N |
Farm |
3.8537146 |
1 |
29.7559946 |
0.0000529 |
| N |
Distance:Farm |
0.6126238 |
3 |
1.5767670 |
0.2339831 |
| N |
Residuals |
2.0721685 |
16 |
NA |
NA |
| S |
Distance |
268.3333333 |
3 |
65.0505051 |
0.0000000 |
| S |
Farm |
42.6666667 |
1 |
31.0303030 |
0.0000422 |
| S |
Distance:Farm |
6.3333333 |
3 |
1.5353535 |
0.2438255 |
| S |
Residuals |
22.0000000 |
16 |
NA |
NA |
| H |
Distance |
13.6232937 |
3 |
70.7468043 |
0.0000000 |
| H |
Farm |
0.3450180 |
1 |
5.3751139 |
0.0339899 |
| H |
Distance:Farm |
0.5436682 |
3 |
2.8233106 |
0.0719844 |
| H |
Residuals |
1.0270085 |
16 |
NA |
NA |
| J |
Distance |
1.9539446 |
3 |
62.0474568 |
0.0000000 |
| J |
Farm |
0.0020967 |
1 |
0.1997415 |
0.6613150 |
| J |
Distance:Farm |
0.0719965 |
3 |
2.2862456 |
0.1204108 |
| J |
Residuals |
0.1574557 |
15 |
NA |
NA |
06 Polychaete AMBI
poli_long %>%
mutate(COMPLIANT = is_accepted(taxon = taxa)) %>%
group_by(taxa) %>%
distinct(COMPLIANT) %>%
filter(COMPLIANT==FALSE) # two non-compliant taxa
## # A tibble: 2 x 2
## # Groups: taxa [2]
## COMPLIANT taxa
## <lgl> <chr>
## 1 FALSE Paralacydoniidae
## 2 FALSE Sternaspidae
AMBI_poli <-
poli_long %>%
group_by(SampleID) %>%
nest() %>%
mutate(AMBI = map(data, ~ ambi(taxon = .x$taxa, count = .x$abu))) %>%
select(SampleID,AMBI) %>%
unnest() %>%
right_join(indices, by = "SampleID")
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## Warning: `cols` is now required.
## Please use `cols = c(AMBI)`
ggplot(AMBI_poli, aes(x = Distance, y = AMBI, fill = Farm)) +
geom_boxplot(alpha = .8)

AMBI_env <- left_join(AMBI_poli, env, by = "SampleID")
# AMBI enviroment relationships ------------
AMBI_env %>%
gather(key, value, TN:TFS) %>%
ggplot(aes(x = value, y = AMBI)) +
geom_point(alpha = .8, aes(color = Farm), size = 5) +
geom_smooth() +
facet_wrap(~key, scales = 'free')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

07 Correlation among env. variables
env_bact <- read_csv('data/env_bact.csv', col_types = cols())
cors <-
env_bact %>%
select(Sandfine:TOC) %>%
cor(method = "pearson")
corrplot(
cors,
type = "upper",
tl.col = "black",
tl.srt = 45,
diag = F,
method = 'circle')

08 Bacterial biotic index
bact_ambi <- read_csv('data/bMBI_data.csv', col_types = cols())
env_bact <- read_csv('data/env_bact.csv', col_types = cols())
bact_env_ambi <- full_join(bact_ambi, env_bact, by = "SampleID")
bact_env_ambi %>%
gather(key, value, Sandfine:TOC) %>%
ggplot(aes(x = value, y = AMBI_bact)) +
geom_point(alpha = .8, size = 5, aes(color = Farm)) +
geom_smooth(method = 'lm') +
stat_poly_eq(
formula = y ~ x,
aes(label = ..rr.label..),
parse = TRUE,
size = 4
) +
labs(y = "Bacterial biotic index") +
facet_wrap( ~ key, scales = 'free_x')
## `geom_smooth()` using formula 'y ~ x'

09 Relationship between bacterial AMBI and polyquete AMBI
bact_poli_ambi <- left_join(AMBI_poli, bact_ambi, by = "SampleID")
ggplot(bact_poli_ambi, aes(x = AMBI, y = AMBI_bact)) +
geom_point(alpha = .8, size = 5, aes(color = Farm)) +
geom_smooth(method = 'lm') +
stat_poly_eq(
formula = y ~ x,
aes(label = ..rr.label..),
parse = TRUE,
size = 4
) +
labs(x = "Polychaete AMBI" , y = "Bacterial biotic index")
## `geom_smooth()` using formula 'y ~ x'

The sample “Int_2000m_1_c” seems to be an outlier affecting the AMBI relationship, as it has an abundance of 82 Capetilidae at 2000 m Distance. If this point is removed the relationship improves quite a bit.
bact_poli_ambi %>%
filter(SampleID !="Int_2000m_1_c") %>%
ggplot(aes(x = AMBI, y = AMBI_bact)) +
geom_point(alpha = .8, size = 5, aes(color = Farm)) +
geom_smooth(method = 'lm') +
stat_poly_eq(
formula = y ~ x,
aes(label = ..rr.label..),
parse = TRUE,
size = 4
) +
labs(x = "Polychaete AMBI" , y = "Bacterial biotic index")
## `geom_smooth()` using formula 'y ~ x'
