Infaunal Biodiversity Analysis

Galway Bay Native Oyster Restoration Project

Author

Aoife Taylor

Published

April 6, 2026

1 Introduction

This analysis investigates patterns in infaunal biodiversity across three habitat zones within the Galway Bay Native Oyster Restoration Project. Since 2021, scallop shell has been deployed annually within the St George’s Bed, Inner Galway Bay, to provide settlement substrate for the European flat oyster (Ostrea edulis). Three habitat zones were delineated for biodiversity assessment:

  • Zone 1 (Z1): Scallop shell mounds (>20 cm relief; early reef structures)
  • Zone 2 (Z2): Scattered scallop shell
  • Zone 3 (Z3): Outside restoration area (control)

Three replicate cores were collected per zone (n = 9 total). This analysis addresses two research questions:

  1. What is the impact of oyster restoration projects on infaunal biodiversity?
  2. Can amphipod or polychaete biodiversity predict total infaunal biodiversity (Rapid Biodiversity Assessment)?

2 Data Import and Preparation

Show code
# load all packages
library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(vegan)
library(knitr)
library(kableExtra)
library(dunn.test)
library(brglm2)
library(lmtest)
library(ggeffects)
library(patchwork)
library(MASS)
library(DHARMa)
library(ggVennDiagram)
library(conflicted)
conflicts_prefer(dplyr::select)
conflicts_prefer(dplyr::filter)

# import infauna data
data <- read_csv(
  file = "infauna_copy(R sheet).csv",
  locale = locale(encoding = "latin1")
)

# remove empty rows and columns
data <- data[rowSums(is.na(data)) != ncol(data),
             colSums(is.na(data)) != nrow(data)]

# remove taxa too small to be retained on 1mm sieve
data <- data %>%
  filter(!grepl("Oligochaeta|Nematoda sp.1|Nemertea sp.1", species))

# standardise taxonomic names
data <- data %>%
  mutate(
    species = case_when(
      species == "aplysia punctata"        ~ "Aplysia punctata",
      species == "gooddallia triangularis"  ~ "Gooddallia triangularis",
      species == "heteroclymene sp.1"      ~ "Heteroclymene sp.1",
      species == "metaphoxus Fultoni"      ~ "Metaphoxus fultoni",
      species == "syllidae sp.1"           ~ "Syllidae sp.1",
      species == "Golfingia  sp.1"         ~ "Golfingia sp.1",
      species == "Nothria conchylega "     ~ "Nothria conchylega",
      species == "Mediomastua sp.1"        ~ "Mediomastus sp.1",
      TRUE ~ species
    ),
    genus = case_when(
      genus == "aplysia"        ~ "Aplysia",
      genus == "gooddallia"     ~ "Gooddallia",
      genus == "heteroclymene"  ~ "Heteroclymene",
      genus == "urothoe"        ~ "Urothoe",
      genus == "Golfingia "     ~ "Golfingia",
      genus == "Spirobranchus " ~ "Spirobranchus",
      genus == "Mediomastua"    ~ "Mediomastus",
      TRUE ~ genus
    ),
    family = case_when(
      family == "maldanidae"    ~ "Maldanidae",
      family == "tellinidae"    ~ "Tellinidae",
      family == "Serpulidae "   ~ "Serpulidae",
      family == "Golfingiidae " ~ "Golfingiidae",
      family == "Eunicidea"     ~ "Eunicidae",
      family == "Nereidinae"    ~ "Nereididae",
      TRUE ~ family
    ),
    order = case_when(
      order == "amphipod"    ~ "Amphipoda",
      order == "Tanaid"      ~ "Tanaidacea",
      order == "ophiuroids"  ~ "Ophiurida",
      order == "gastropod"   ~ "Gastropoda",
      order == "pycnogonids" ~ "Pycnogonida",
      TRUE ~ order
    ),
    phylum = case_when(
      phylum == "Anthropoda" ~ "Arthropoda",
      TRUE ~ phylum
    )
  )

# zone colours — deliberately different from phylum colours
zone_colours <- c("Zone 1" = "#2166AC", "Zone 2" = "#F4A582", "Zone 3" = "#4DAC26")

# phylum colours
phylum_colours <- c(
  "Annelida"      = "#E8732A",
  "Arthropoda"    = "#9B59B6",
  "Mollusca"      = "#1ABC9C",
  "Echinodermata" = "#89CFF0"
)

# zone colours for Z1/Z2/Z3 coding (used in plots before factor relabelling)
zone_colours_z <- c("Z1" = "#2166AC", "Z2" = "#F4A582", "Z3" = "#4DAC26")

# create long format data with zone and sample labels (9 samples)
sample_long <- data %>%
  pivot_longer(
    cols = starts_with("INFA"),
    names_to = "sample",
    values_to = "abundance"
  ) %>%
  mutate(zone = str_extract(sample, "Z[0-9]"))

# per-sample abundance summary (n = 9)
sample_abundance <- sample_long %>%
  group_by(sample, zone) %>%
  summarise(total_abundance = sum(abundance), .groups = "drop")

# total diversity metrics per sample
total_div <- sample_long %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0) %>%
  mutate(
    total_sp_richness = specnumber(dplyr::select(., -sample, -zone)),
    total_shannon     = diversity(as.matrix(dplyr::select(., -sample, -zone)),
                                  index = "shannon"),
    total_abundance   = rowSums(dplyr::select(., -sample, -zone))
  ) %>%
  dplyr::select(sample, zone, total_sp_richness, total_shannon, total_abundance)

# zone-level species matrix for Shapiro-Wilk tests
zone_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample_type", values_to = "value") %>%
  mutate(zone = case_when(
    grepl("INFA_Z1", sample_type) ~ "zone1",
    grepl("INFA_Z2", sample_type) ~ "zone2",
    grepl("INFA_Z3", sample_type) ~ "zone3"
  )) %>%
  group_by(phylum, order, family, genus, species, zone) %>%
  summarise(total_value = sum(value), .groups = "drop") %>%
  pivot_wider(names_from = zone, values_from = total_value)

# per-sample phylum summary
sample_phylum <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(
    zone      = str_extract(sample, "Z[0-9]"),
    replicate = str_extract(sample, "[0-9][A-Z]$"),
    zone      = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")),
    phylum    = factor(phylum, levels = c("Annelida", "Arthropoda",
                                          "Mollusca", "Echinodermata"))
  ) %>%
  group_by(sample, zone, replicate, phylum) %>%
  summarise(total = sum(abundance), .groups = "drop")

# species list per zone
species_list <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]"),
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3"))) %>%
  group_by(zone, species) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  filter(total > 0) %>%
  mutate(phylum = data$phylum[match(species, data$species)],
         species = fct_reorder(species, total))

# phylum-level abundance for Shannon/Simpson phylum analyses
phylum_abund <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  group_by(sample, phylum) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = abundance, values_fill = 0)

phylum_matrix <- phylum_abund %>%
  dplyr::select(-sample) %>%
  as.matrix()

# import and clean core depth data
core <- read_csv("infauna_copy(cores).csv") %>%
  dplyr::select("SiteID", "EstCoreDepthmm", "VolumeLtr", "species total", "abundance")
core <- core[rowSums(is.na(core)) != ncol(core),
             colSums(is.na(core)) != nrow(core)]
core <- core[-11, ]

core_clean <- core %>%
  dplyr::select(SiteID, EstCoreDepthmm) %>%
  filter(!is.na(SiteID)) %>%
  mutate(EstCoreDepthmm = case_when(
    SiteID == "INFA_Z1_3A" ~ "10",
    TRUE ~ EstCoreDepthmm
  )) %>%
  filter(!grepl("N/A", EstCoreDepthmm)) %>%
  mutate(sample = SiteID, core_depth = as.numeric(EstCoreDepthmm)) %>%
  dplyr::select(sample, core_depth)

div_depth <- total_div %>% left_join(core_clean, by = "sample")

# helper functions for regression tables
get_lm_p  <- function(m) summary(m)$coefficients[2, 4]
get_lm_r2 <- function(m) round(summary(m)$r.squared, 3)
get_lm_f  <- function(m) round(summary(m)$fstatistic[1], 2)

3 Sampling Assessment

3.1 Species Accumulation Curves

Species accumulation curves were constructed using the specaccum function (method = “exact”, vegan package), which calculates the mathematically expected cumulative species richness averaged across all possible orderings of the samples within each zone. The shaded ribbon represents ± 1 standard deviation.

Show code
species_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(zone, sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zones <- c("Z1", "Z2", "Z3")
accum_list <- lapply(zones, function(z) {
  mat <- species_matrix %>%
    filter(zone == z) %>%
    dplyr::select(-zone, -sample) %>%
    as.matrix()
  sp <- specaccum(mat, method = "exact")
  data.frame(zone = z, samples = sp$sites, richness = sp$richness, sd = sp$sd)
})
Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero
Show code
accum_df <- bind_rows(accum_list)

ggplot(accum_df, aes(x = samples, y = richness, colour = zone, fill = zone)) +
  geom_ribbon(aes(ymin = richness - sd, ymax = richness + sd),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  scale_fill_manual(values = zone_colours_z,
                    labels = c("Zone 1", "Zone 2", "Zone 3")) +
  labs(x = "Number of Samples", y = "Cumulative Species Richness",
       colour = "Zone", fill = "Zone") +
  theme_classic() +
  theme(axis.text = element_text(size = 9, colour = "black"),
        axis.title = element_text(size = 10),
        legend.title = element_text(face = "bold", size = 10),
        legend.text = element_text(size = 9),
        panel.grid = element_blank())

Figure 1: Cumulative species richness plotted against number of samples per zone. Lines show mean expected richness; shaded ribbons show ± 1 standard deviation across all possible sample orderings. Zone 1 = shell mounds; Zone 2 = scattered shell; Zone 3 = control.

Results: All three curves show a continuously rising trajectory with no sign of levelling off at three samples, indicating that species richness had not reached an asymptote by the end of sampling. Zone 3 (control) showed the highest cumulative richness and widest confidence ribbon, indicating greater variability in species composition between samples in the control zone. These results indicate that more sampling would likely detect additional species in all zones and this is acknowledged as a limitation of the current sampling effort.


4 Research Question 1: Impact of Oyster Restoration on Infaunal Biodiversity

4.1 Normality Testing

Shapiro-Wilk tests were applied to species-level abundance data within each zone to determine whether parametric or non-parametric tests were appropriate.

Show code
shapiro_z1 <- shapiro.test(zone_data$zone1)
shapiro_z2 <- shapiro.test(zone_data$zone2)
shapiro_z3 <- shapiro.test(zone_data$zone3)

normality_table <- data.frame(
  Zone    = c("Zone 1", "Zone 2", "Zone 3"),
  W       = round(c(shapiro_z1$statistic, shapiro_z2$statistic,
                    shapiro_z3$statistic), 4),
  p_value = formatC(c(shapiro_z1$p.value, shapiro_z2$p.value,
                      shapiro_z3$p.value), format = "e", digits = 3)
)

kable(normality_table,
      col.names = c("Zone", "W statistic", "p-value"),
      caption = "Table 1. Shapiro-Wilk tests for normality of species-level abundance data per zone.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"), full_width = FALSE)
Table 1. Shapiro-Wilk tests for normality of species-level abundance data per zone.
Zone W statistic p-value
Zone 1 0.3884 9.153e-16
Zone 2 0.3347 2.044e-16
Zone 3 0.4729 1.186e-14

Results: Shapiro-Wilk tests revealed significant departures from normality in all three zones (p < 0.001). All subsequent statistical tests therefore used non-parametric methods.


4.2 Total Abundance per Core

Total abundance was calculated as the sum of all individuals per core sample. Each of the nine cores is treated as one replicate (three per zone). The Kruskal-Wallis test was applied to per-sample totals (n = 9) as the correct unit of replication, avoiding pseudoreplication.

Show code
kw_abund_sample <- kruskal.test(total_abundance ~ zone, data = sample_abundance)

dunn_abund_sample <- dunn.test(sample_abundance$total_abundance,
                                sample_abundance$zone,
                                method = "bonferroni")
  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 6.8796, df = 2, p-value = 0.03

                   Dunn's Pairwise Comparison of x by group                   
                                 (Bonferroni)                                 
Col Mean-│
Row Mean │         Z1         Z2
─────────┼──────────────────────
      Z2 │  -1.197569
         │     0.3466 
         │
      Z3 │  -2.619684  -1.422114
         │     0.0132*    0.2325 

α = 0.05
Reject Ho if p ≤ α/2, where p = Pr(Z ≥ |z|)
Show code
sig_labels <- data.frame(
  zone  = c("Z1", "Z2", "Z3"),
  label = c("a", "ab", "b"),
  y_pos = rep(max(sample_abundance$total_abundance) * 1.1, 3)
)

ggplot(sample_abundance, aes(x = zone, y = total_abundance, fill = zone)) +
  geom_boxplot(outlier.shape = 19, outlier.size = 1.5) +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  geom_text(data = sig_labels,
            aes(x = zone, y = y_pos, label = label),
            inherit.aes = FALSE, size = 5, fontface = "bold") +
  scale_fill_manual(values = zone_colours_z) +
  scale_x_discrete(labels = c("Z1" = "Zone 1", "Z2" = "Zone 2", "Z3" = "Zone 3")) +
  labs(x = "Zone", y = "Total Abundance per Core") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11),
        plot.margin = margin(t = 20, r = 10, b = 10, l = 10))

Figure 2: Total abundance of all infaunal individuals per core sample across zones (n = 3 per zone). Each dot represents one core. Letters indicate statistically homogeneous groups based on Dunn’s post-hoc test with Bonferroni correction (a = Zone 1; ab = Zone 2; b = Zone 3).

Results: A Kruskal-Wallis test revealed a significant difference in total abundance across zones (H = 6.88, df = 2, p = 0.032). Dunn’s post-hoc test with Bonferroni correction indicated that Zone 3 (control) differed significantly from Zone 1 (p = 0.013). Zone 2 was not significantly different from either Zone 1 (p = 0.347) or Zone 3 (p = 0.233), indicating an intermediate pattern. Total abundance was highest in Zone 3 and lowest in Zone 1.


4.3 Generalised Linear Models: Abundance and Species Richness

A Poisson GLM was initially fitted and assessed for overdispersion using DHARMa simulated residual diagnostics. Significant overdispersion was detected, a Negative Binomial GLM was therefore fitted as it includes an additional dispersion parameter accommodating overdispersed count data.

Show code
sample_div <- total_div %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))
# overdispersion check
 # fit Poisson first then test
glm_poisson <- glm(total_abundance ~ zone, 
                   data = sample_div, family = poisson)
glm_poisson

Call:  glm(formula = total_abundance ~ zone, family = poisson, data = sample_div)

Coefficients:
(Intercept)       zoneZ1       zoneZ2  
      4.685       -1.706       -1.030  

Degrees of Freedom: 8 Total (i.e. Null);  6 Residual
Null Deviance:      252.3 
Residual Deviance: 24.78    AIC: 81
Show code
simulateResiduals(glm_poisson, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.9829763 0.04791643 0.4901769 0.03824786 0.3542827 0.9817204 0.9503528 0.7571552 0.006356984
Show code
testDispersion(glm_poisson)


    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 3.3357, p-value = 0.016
alternative hypothesis: two.sided

Results: A Poisson GLM confirmed overdispersion (dispersion test: p = 0.016) with residual deviance (24.78) approximately four times greater than the residual degrees of freedom (6), confirming violation of the Poisson equal mean-variance assumption.

4.3.1 GLM 1: Total Abundance by Zone

Show code
sample_div <- total_div %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

glm_abund <- glm.nb(total_abundance ~ zone, data = sample_div)
summary(glm_abund)

Call:
glm.nb(formula = total_abundance ~ zone, data = sample_div, init.theta = 29.30877834, 
    link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   4.6852     0.1202  38.976  < 2e-16 ***
zoneZ1       -1.7063     0.2068  -8.250  < 2e-16 ***
zoneZ2       -1.0302     0.1856  -5.551 2.84e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(29.3088) family taken to be 1)

    Null deviance: 89.720  on 8  degrees of freedom
Residual deviance:  9.585  on 6  degrees of freedom
AIC: 76.451

Number of Fisher Scoring iterations: 1

              Theta:  29.3 
          Std. Err.:  24.2 

 2 x log-likelihood:  -68.451 
Show code
simulateResiduals(glm_abund, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.8972407 0.07831351 0.4332401 0.08440482 0.3987502 0.9058376 0.8257076 0.6615099 0.123393
Show code
pred_abund <- ggpredict(glm_abund, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_abund, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Total Abundance") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 3: DHARMa simulated residual diagnostics (upper) and model-predicted total abundance per zone with 95% confidence intervals (lower) from a Negative Binomial GLM. Zone 3 is the reference category.

Results: A Negative Binomial GLM confirmed that total abundance differed significantly across zones. Zone 1 had significantly lower abundance than Zone 3 (β = -1.706, z = -8.250, p < 0.001), with predicted abundance approximately 18% of Zone 3 (e^-1.706 = 0.18), corresponding to approximately 19 individuals per core compared to 108 in Zone 3. Zone 2 also had significantly lower abundance (β = -1.030, z = -5.551, p < 0.001), with predicted abundance approximately 36% of Zone 3. Model assumptions were confirmed using DHARMa diagnostics (KS p = 0.749; dispersion p = 0.752; outlier p = 1.000).

4.3.2 GLM 2: Species Richness by Zone

Show code
glm_rich <- glm.nb(total_sp_richness ~ zone, data = sample_div)
summary(glm_rich)

Call:
glm.nb(formula = total_sp_richness ~ zone, data = sample_div, 
    init.theta = 180.0094501, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   3.3673     0.1155  29.148  < 2e-16 ***
zoneZ1       -1.1337     0.2256  -5.024 5.05e-07 ***
zoneZ2       -0.6373     0.1922  -3.316 0.000914 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(180.0095) family taken to be 1)

    Null deviance: 38.4836  on 8  degrees of freedom
Residual deviance:  8.0991  on 6  degrees of freedom
AIC: 58.327

Number of Fisher Scoring iterations: 1

              Theta:  180 
          Std. Err.:  855 

 2 x log-likelihood:  -50.327 
Show code
simulateResiduals(glm_rich, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.7017479 0.2649475 0.6477857 0.3480546 0.156752 0.9545328 0.6808527 0.855776 0.05934819
Show code
pred_rich <- ggpredict(glm_rich, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_rich, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Species Richness") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 4: DHARMa simulated residual diagnostics (upper) and model-predicted species richness per zone with 95% confidence intervals (lower) from a Negative Binomial GLM. Zone 3 is the reference category.

Results: A Negative Binomial GLM confirmed that species richness differed significantly across zones. Zone 3 had a predicted mean richness of e^3.367 = 29 species per core. Zone 1 had significantly lower richness (β = -1.134, z = -5.024, p < 0.001), at approximately 9 species per core. Zone 2 also had significantly lower richness (β = -0.637, z = -3.316, p < 0.001), at approximately 15 species per core. Notably, the GLM detected significant differences for both Zone 1 and Zone 2 relative to Zone 3, whereas the Kruskal-Wallis only detected a significant difference for Zone 1 vs Zone 3, demonstrating greater sensitivity of the parametric approach. Model assumptions were confirmed using DHARMa diagnostics (KS p = 0.782; dispersion p = 0.584; outlier p = 1.000).


4.4 Diversity Indices

4.4.1 Shannon Diversity Index: Species Level

Shannon diversity (H’) was calculated per core sample at species level. Shannon diversity accounts for both species richness and evenness.

Show code
abund <- dplyr::select(data, contains("INFA"))
shannon <- diversity(t(as.matrix(abund)), index = "shannon")

shannon_df <- data.frame(Sample = colnames(abund), Shannon = shannon) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_shannon <- kruskal.test(Shannon ~ Zone, data = shannon_df)

ggplot(shannon_df, aes(x = Zone, y = Shannon, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(shannon_df$Shannon) * 1.05,
           label = paste0("ns (p = ", round(kw_shannon$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Shannon Index (H') — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 5: Shannon diversity index (H’) calculated at species level per core sample (n = 3 per zone). Each dot represents one core. ns = not significant.

Results: A Kruskal-Wallis test revealed no significant difference in Shannon diversity at species level across zones (H = 1.87, df = 2, p = 0.39). Post-hoc testing was not performed as the overall test was non-significant. Species-level diversity was comparable across all three zones despite significant differences in total abundance.


4.4.2 Shannon Diversity Index: Phylum Level

Shannon diversity was additionally calculated at phylum level per core sample, measuring the evenness of individual distribution across the four phyla (Annelida, Arthropoda, Mollusca, Echinodermata).

Show code
shannon_phylum <- diversity(phylum_matrix, index = "shannon")


shannon_phylum_df <- data.frame(
  Sample = phylum_abund$sample,
  Shannon = shannon_phylum
) %>%
  mutate(Zone = str_extract(Sample, "Z[0-9]"),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_shannon_phylum <- kruskal.test(Shannon ~ Zone, data = shannon_phylum_df)

ggplot(shannon_phylum_df, aes(x = Zone, y = Shannon, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(shannon_phylum_df$Shannon) * 1.05,
           label = paste0("ns (p = ", round(kw_shannon_phylum$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Shannon Index (H') — Phylum Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 6: Shannon diversity index (H’) calculated at phylum level per core sample (n = 3 per zone). Each dot represents one core. Shannon diversity at phylum level measures how evenly individuals are distributed across the four phyla within each sample. ns = not significant.

Results: Shannon diversity calculated at phylum level did not differ significantly across zones (H = 1.69, df = 2, p = 0.43). Post-hoc testing was not performed. The evenness of individual distribution across phyla was comparable across all three habitat zones, consistent with the species-level Shannon result.


4.4.3 Simpson Diversity Index: Species Level

Simpson diversity (1-D) was calculated per core sample at species level. The Simpson index measures the probability that two randomly selected individuals belong to different species.

Show code
simpson <- diversity(t(as.matrix(abund)), index = "simpson")

simpson_df <- data.frame(Sample = colnames(abund), Simpson = simpson) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_simpson <- kruskal.test(Simpson ~ Zone, data = simpson_df)

ggplot(simpson_df, aes(x = Zone, y = Simpson, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(simpson_df$Simpson) * 1.05,
           label = paste0("ns (p = ", round(kw_simpson$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Simpson Index (1-D) — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 7: Simpson diversity index (1-D) calculated at species level per core sample (n = 3 per zone). Each dot represents one core. ns = not significant.

Results: A Kruskal-Wallis test revealed no significant difference in Simpson diversity at species level across zones (H = 1.16, df = 2, p = 0.56). Post-hoc testing was not performed. Community evenness was comparable across all three zones, consistent with the Shannon diversity result.


4.4.4 Simpson Diversity Index: Phylum Level

Simpson diversity was additionally calculated at phylum level, measuring the probability that two randomly selected individuals belong to different phyla.

Show code
simpson_phylum <- diversity(phylum_matrix, index = "simpson")

simpson_phylum_df <- data.frame(
  Sample = phylum_abund$sample,
  Simpson = simpson_phylum
) %>%
  mutate(Zone = str_extract(Sample, "Z[0-9]"),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_simpson_phylum <- kruskal.test(Simpson ~ Zone, data = simpson_phylum_df)

ggplot(simpson_phylum_df, aes(x = Zone, y = Simpson, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(simpson_phylum_df$Simpson) + 0.02,
           label = paste0("ns (p = ", round(kw_simpson_phylum$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Simpson Index (1-D) — Phylum Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 8: Simpson diversity index (1-D) calculated at phylum level per core sample (n = 3 per zone). Each dot represents one core. The Simpson index at phylum level measures the probability that two randomly selected individuals belong to different phyla. ns = not significant.

Results: Simpson diversity calculated at phylum level did not differ significantly across zones (H = 3.47, df = 2, p = 0.18). Post-hoc testing was not performed. Taken together with the species-level results, all four diversity index analyses confirm that community evenness did not differ significantly between zones at either species or phylum level.

Show code
# one-way ANOVA for phylum-level Shannon diversity
anova_shannon_phylum <- aov(Shannon ~ Zone, data = shannon_phylum_df)
summary(anova_shannon_phylum)
            Df Sum Sq Mean Sq F value Pr(>F)
Zone         2 0.1679 0.08393   0.998  0.423
Residuals    6 0.5047 0.08412               
Show code
# one-way ANOVA for phylum-level Simpson diversity
anova_simpson_phylum <- aov(Simpson ~ Zone, data = simpson_phylum_df)
summary(anova_simpson_phylum)
            Df  Sum Sq Mean Sq F value Pr(>F)
Zone         2 0.06485 0.03243   1.013  0.418
Residuals    6 0.19215 0.03203               

4.5 Phylum Composition: Descriptive

4.5.1 Relative Abundance by Sample and Zone

Show code
phylum_rel <- sample_long %>%
  group_by(sample, zone = str_extract(sample, "Z[0-9]"),
           replicate = str_extract(sample, "[0-9][A-Z]$"), phylum) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  group_by(sample) %>%
  mutate(rel_abundance = total / sum(total) * 100) %>%
  ungroup() %>%
  mutate(
    zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                  labels = c("Zone 1", "Zone 2", "Zone 3")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

ggplot(phylum_rel, aes(x = replicate, y = rel_abundance, fill = phylum)) +
  geom_col(position = "stack", width = 1, colour = "black", linewidth = 0.2) +
  facet_wrap(~ zone, nrow = 1, scales = "free_x") +
  scale_fill_manual(values = phylum_colours) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Relative Abundance (%)", fill = "Phylum") +
  theme_bw() +
  theme(strip.text       = element_text(face = "bold", size = 11),
        strip.background = element_rect(fill = "grey85", colour = NA),
        axis.text        = element_text(size = 9, colour = "black"),
        axis.title.y     = element_text(size = 10),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
      legend.position = "bottom",
        panel.grid       = element_blank()) 

Figure 9: Relative abundance (%) of each phylum per core sample grouped by zone. Each bar represents one core. Note that relative abundance may exaggerate the visual prominence of rare phyla in low-abundance samples.

4.5.2 Total Abundance by Phylum per Sample

Show code
ggplot(sample_phylum, aes(x = replicate, y = total, fill = phylum)) +
  geom_col(position = "stack", width = 1, colour = "black", linewidth = 0.2) +
  facet_wrap(~ zone, nrow = 1, scales = "free_x") +
  scale_fill_manual(values = phylum_colours) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Total Abundance", fill = "Phylum") +
  theme_bw() +
  theme(strip.text       = element_text(face = "bold", size = 11),
        strip.background = element_rect(fill = "grey85", colour = NA),
        axis.text        = element_text(size = 9, colour = "black"),
        axis.title.y     = element_text(size = 10),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
         legend.position = "bottom",
        panel.grid       = element_blank())

Figure 10: Total abundance of individuals per phylum per core sample grouped by zone. Each bar represents one core. Raw abundance is shown to avoid distortion from relative percentage calculations.


4.5.3 Species List by Zone

Show code
ggplot(species_list, aes(x = total, y = species, fill = phylum)) +
  geom_col() +
  facet_wrap(~ zone, ncol = 3, scales = "free_y") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = phylum_colours) +
  labs(x = "Total Abundance", y = "Species", fill = "Phylum") +
  theme_bw() +
  theme(axis.text.y = element_text(size = 10, face = "italic"),
        axis.text.x        = element_text(size = 8),
        axis.title         = element_text(size = 10),
        strip.text         = element_text(face = "bold", size = 12),
        strip.background   = element_rect(fill = "grey85", colour = NA),
        legend.title       = element_text(face = "bold", size = 10),
        legend.text        = element_text(face = "italic", size = 9),
         legend.position = "bottom",
        panel.grid.major.y = element_blank(),
        panel.grid.minor   = element_blank())

Show code
# increase axis text size
axis.text.y = element_text(size = 8, face = "italic")

######uipdatee 

#| fig-width: 8.27
#| fig-height: 11.69
#| out-width: "100%"
#| out-height: "100%"

ggplot(species_list, aes(x = total, y = species, fill = phylum)) +
  geom_col(width = 0.9) + 
  facet_wrap(~ zone, ncol = 3, scales = "free_y") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = phylum_colours) +
  labs(x = "Total Abundance", y = "Species", fill = "Phylum") +
  theme_bw() +
  theme(
    axis.text.y      = element_text(size = 9, face = "italic"),
    axis.text.x      = element_text(size = 10),
    axis.title       = element_text(size = 13, face = "bold"),
    strip.text       = element_text(face = "bold", size = 15),
    strip.background = element_rect(fill = "grey85", colour = NA),
    legend.title     = element_text(face = "bold", size = 11),
    legend.text      = element_text(face = "italic", size = 10),
    legend.position  = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank()
  )

Figure 11: Total abundance of each species per zone, ordered by total abundance within zone. Bars are coloured by phylum. Species absent from a zone are not shown in that panel.


4.5.4 Species Unique to Each Zone

Show code
unique_species <- species_list %>%
  group_by(species) %>%
  summarise(zones_present = n_distinct(zone),
            zone   = paste(zone[total > 0], collapse = ", "),
            total  = sum(total),
            phylum = first(phylum)) %>%
  filter(zones_present == 1) %>%
  arrange(zone, desc(total))

unique_species %>%
  dplyr::select(zone, species, phylum, total) %>%
  arrange(zone, desc(total)) %>%
  rename(Zone = zone, Species = species,
         Phylum = phylum, `Total Abundance` = total) %>%
  kable(caption = "Table 3. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(2, italic = TRUE) %>%
  pack_rows("Zone 1", 1, sum(unique_species$zone == "Zone 1")) %>%
  pack_rows("Zone 2", sum(unique_species$zone == "Zone 1") + 1,
            sum(unique_species$zone %in% c("Zone 1", "Zone 2"))) %>%
  pack_rows("Zone 3", sum(unique_species$zone %in% c("Zone 1", "Zone 2")) + 1,
            nrow(unique_species))
Table 3. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.
Zone Species Phylum Total Abundance
Zone 1
Zone 1 Microdeutopus damnoniensis Arthropoda 1
Zone 1 Scalibregma inflatum Annelida 1
Zone 2
Zone 2 Mediomastus sp.1 Annelida 4
Zone 2 Dorvilleia sp.1 Annelida 2
Zone 2 Kurtiella bidentata Mollusca 2
Zone 2 Urothoe marina Arthropoda 2
Zone 2 Gari Tellinella Mollusca 1
Zone 2 Golfingia  sp.1 Annelida 1
Zone 2 Hediste diversicolor Annelida 1
Zone 2 Heteroclymene sp.1 Annelida 1
Zone 2 Lucinoma borealis Mollusca 1
Zone 2 Maldanidae sp.1 Annelida 1
Zone 2 Nothria conchylega  Annelida 1
Zone 2 Syllidae sp.1 Annelida 1
Zone 3
Zone 3 Microdeutopus versiculatus Arthropoda 8
Zone 3 Platynereis dumerilii Annelida 5
Zone 3 Dexaminidae spinosa Arthropoda 4
Zone 3 Socarnes erythropthalmus Arthropoda 4
Zone 3 Eurysyllis tuberculata Annelida 3
Zone 3 Leptocheirus pilosus Arthropoda 3
Zone 3 Achelia echinata Arthropoda 2
Zone 3 Autolytus sp.1 Annelida 2
Zone 3 Gooddallia triangularis Mollusca 2
Zone 3 Harmothoe sp.2 Annelida 2
Zone 3 Leptochiton cancellatus Mollusca 2
Zone 3 Metaphoxus pectinatus Arthropoda 2
Zone 3 Pariambus typicus Arthropoda 2
Zone 3 Tanaidacea sp.3 Arthropoda 2
Zone 3 Terebellidae sp.2 Annelida 2
Zone 3 Aplysia punctata Mollusca 1
Zone 3 Chondrochelia savignyi Arthropoda 1
Zone 3 Dexaminidae thea Arthropoda 1
Zone 3 Eteone flava Annelida 1
Zone 3 Fabricia sp.1 Annelida 1
Zone 3 Harpinia crenulata Arthropoda 1
Zone 3 Maldanidae sp.2 Annelida 1
Zone 3 Modiolus modiolus Mollusca 1
Zone 3 Nephtys homburgi Annelida 1
Zone 3 Nicomache sp.1 Annelida 1
Zone 3 Odontosyllis gibba Annelida 1
Zone 3 Sabellidae sp.2 Annelida 1
Zone 3 Tellinidae sp.1 Mollusca 1
Zone 3 Tricolia pullus Mollusca 1

Results: A number of species were recorded exclusively within a single zone. Zone 3 (control) contained the greatest number of zone-specific species, consistent with the higher overall species richness in the control.


4.5.5 Venn Diagram of Shared Species

Show code
z1_species <- species_list %>% filter(zone == "Zone 1", total > 0) %>%
  pull(species) %>% as.character()
z2_species <- species_list %>% filter(zone == "Zone 2", total > 0) %>%
  pull(species) %>% as.character()
z3_species <- species_list %>% filter(zone == "Zone 3", total > 0) %>%
  pull(species) %>% as.character()

ggVennDiagram(
  list("Zone 1" = z1_species, "Zone 2" = z2_species, "Zone 3" = z3_species),
  label_alpha = 0
) +
  scale_fill_gradient(low = "white", high = "#2166AC") +
  labs(fill = "Species Count") +
  theme(legend.title = element_text(face = "bold", size = 10))

Figure 12: Venn diagram showing the number of species shared between and unique to each habitat zone. Numbers in overlapping regions indicate shared species; numbers in non-overlapping regions indicate zone-specific species.


4.6 Community Composition:

4.7 PERMANOVA

A PERMANOVA (adonis2, vegan package) was used to test whether species composition differed significantly across habitat zones, using Bray-Curtis dissimilarity with 999 permutations. Homogeneity of multivariate dispersion was verified using betadisper prior to interpreting the PERMANOVA result.

Show code
perm_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zone_vector <- perm_matrix$zone
species_mat <- perm_matrix %>%
  dplyr::select(-sample, -zone) %>%
  as.matrix()

permanova <- adonis2(species_mat ~ zone_vector, method = "bray", permutations = 999)
print(permanova)
Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = species_mat ~ zone_vector, permutations = 999, method = "bray")
         Df SumOfSqs      R2      F Pr(>F)   
Model     2  0.83489 0.37512 1.8009  0.006 **
Residual  6  1.39079 0.62488                 
Total     8  2.22568 1.00000                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Show code
dist_matrix <- vegdist(species_mat, method = "bray")
dispersion  <- betadisper(dist_matrix, zone_vector)
permutest(dispersion)

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     2 0.010412 0.0052057 1.6236    999  0.279
Residuals  6 0.019238 0.0032063                     
Show code
## tab 1
permanova_table <- data.frame(
  Source = c("Zone", "Residual", "Total", "betadisper: Zone", "betadisper: Residual"),
  Df     = c(2, 6, 8, 2, 6),
  SS     = c(0.835, 1.391, 2.226, 0.010, 0.019),
  R2     = c(0.375, 0.625, 1.000, "—", "—"),
  F      = c(1.801, "—", "—", 1.624, "—"),
  p      = c("0.006 **", "—", "—", "0.272 ns", "—")
)

kable(permanova_table,
      col.names = c("Source", "df", "SS", "R²", "F", "p-value"),
      caption = "Table X. PERMANOVA results for infaunal species composition across habitat zones based on Bray-Curtis dissimilarity with 999 permutations. betadisper tests homogeneity of multivariate dispersion. SS = sum of squares; R² = proportion of variance explained; ns = not significant.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(3, bold = TRUE) %>%
  pack_rows("PERMANOVA", 1, 3) %>%
  pack_rows("betadisper", 4, 5)
Table X. PERMANOVA results for infaunal species composition across habitat zones based on Bray-Curtis dissimilarity with 999 permutations. betadisper tests homogeneity of multivariate dispersion. SS = sum of squares; R² = proportion of variance explained; ns = not significant.
Source df SS F p-value
PERMANOVA
Zone 2 0.835 0.375 1.801 0.006 **
Residual 6 1.391 0.625
Total 8 2.226 1
betadisper
betadisper: Zone 2 0.010 1.624 0.272 ns
betadisper: Residual 6 0.019
Show code
## tab 2 

permanova_table <- data.frame(
  Test       = c("PERMANOVA", "betadisper"),
  F          = c(1.80, 1.62),
  R2         = c(0.375, "—"),
  p          = c("0.006 **", "0.272 ns")
)

kable(permanova_table,
      col.names = c("Test", "F", "R²", "p-value"),
      caption = "Table X. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE)
Table X. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.
Test F p-value
PERMANOVA 1.80 0.375 0.006 **
betadisper 1.62 0.272 ns

Results: PERMANOVA revealed that species composition differed significantly across habitat zones (F = 1.80, R² = 0.375, p = 0.009). Zone explains 37.5% of the variation in species composition, with the remaining 62.5% attributable to within-zone variability between samples. Homogeneity of multivariate dispersion was confirmed (p = 0.287), indicating that the PERMANOVA result reflects genuine differences in community composition rather than differences in within-group variability.

5 nMDS

Show code
# calculate nMDS ordination
set.seed(123)
nmds <- metaMDS(species_mat, distance = "bray", k = 2, trymax = 100)
Wisconsin double standardization
Run 0 stress 0.1296653 
Run 1 stress 0.1296653 
... Procrustes: rmse 2.57544e-05  max resid 5.062835e-05 
... Similar to previous best
Run 2 stress 0.1296653 
... Procrustes: rmse 2.046149e-06  max resid 3.662313e-06 
... Similar to previous best
Run 3 stress 0.1296654 
... Procrustes: rmse 5.769671e-05  max resid 0.0001188607 
... Similar to previous best
Run 4 stress 0.1296653 
... Procrustes: rmse 9.324728e-06  max resid 1.922477e-05 
... Similar to previous best
Run 5 stress 0.1296653 
... Procrustes: rmse 5.86126e-06  max resid 9.805432e-06 
... Similar to previous best
Run 6 stress 0.1783099 
Run 7 stress 0.1577492 
Run 8 stress 0.1296653 
... Procrustes: rmse 1.137076e-06  max resid 2.109404e-06 
... Similar to previous best
Run 9 stress 0.1822321 
Run 10 stress 0.1296653 
... Procrustes: rmse 3.072587e-06  max resid 6.178088e-06 
... Similar to previous best
Run 11 stress 0.1662196 
Run 12 stress 0.1577492 
Run 13 stress 0.1789585 
Run 14 stress 0.1662196 
Run 15 stress 0.2429611 
Run 16 stress 0.1296653 
... Procrustes: rmse 1.055402e-06  max resid 1.571138e-06 
... Similar to previous best
Run 17 stress 0.1710196 
Run 18 stress 0.1296653 
... Procrustes: rmse 7.435892e-06  max resid 1.457155e-05 
... Similar to previous best
Run 19 stress 0.3209238 
Run 20 stress 0.1296653 
... Procrustes: rmse 1.338029e-06  max resid 2.291495e-06 
... Similar to previous best
*** Best solution repeated 10 times
Show code
# extract scores and add zone labels
nmds_scores <- as.data.frame(scores(nmds, display = "sites")) %>%
  mutate(zone = zone_vector,
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

# calculate zone centroids for labels
centroids <- nmds_scores %>%
  group_by(zone) %>%
  summarise(NMDS1 = mean(NMDS1), NMDS2 = mean(NMDS2))

# plot
ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_point(size = 4, alpha = 0.8) +
  stat_ellipse(aes(group = zone), type = "t", level = 0.95,
               linetype = "dashed", linewidth = 0.8, alpha = 0.3,
               geom = "polygon", fill = NA) +
  geom_label(data = centroids, aes(label = zone),
             fontface = "bold", size = 3.5, alpha = 0.8) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Show code
############

library(ggforce)

#| fig-width: 7
#| fig-height: 6

set.seed(123)
nmds <- metaMDS(species_mat, distance = "bray", k = 2, trymax = 100)
Wisconsin double standardization
Run 0 stress 0.1296653 
Run 1 stress 0.1296653 
... Procrustes: rmse 2.57544e-05  max resid 5.062835e-05 
... Similar to previous best
Run 2 stress 0.1296653 
... Procrustes: rmse 2.046149e-06  max resid 3.662313e-06 
... Similar to previous best
Run 3 stress 0.1296654 
... Procrustes: rmse 5.769671e-05  max resid 0.0001188607 
... Similar to previous best
Run 4 stress 0.1296653 
... Procrustes: rmse 9.324728e-06  max resid 1.922477e-05 
... Similar to previous best
Run 5 stress 0.1296653 
... Procrustes: rmse 5.86126e-06  max resid 9.805432e-06 
... Similar to previous best
Run 6 stress 0.1783099 
Run 7 stress 0.1577492 
Run 8 stress 0.1296653 
... Procrustes: rmse 1.137076e-06  max resid 2.109404e-06 
... Similar to previous best
Run 9 stress 0.1822321 
Run 10 stress 0.1296653 
... Procrustes: rmse 3.072587e-06  max resid 6.178088e-06 
... Similar to previous best
Run 11 stress 0.1662196 
Run 12 stress 0.1577492 
Run 13 stress 0.1789585 
Run 14 stress 0.1662196 
Run 15 stress 0.2429611 
Run 16 stress 0.1296653 
... Procrustes: rmse 1.055402e-06  max resid 1.571138e-06 
... Similar to previous best
Run 17 stress 0.1710196 
Run 18 stress 0.1296653 
... Procrustes: rmse 7.435892e-06  max resid 1.457155e-05 
... Similar to previous best
Run 19 stress 0.3209238 
Run 20 stress 0.1296653 
... Procrustes: rmse 1.338029e-06  max resid 2.291495e-06 
... Similar to previous best
*** Best solution repeated 10 times
Show code
nmds_scores <- as.data.frame(scores(nmds, display = "sites")) %>%
  mutate(zone = zone_vector,
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

# calculate centroids for label placement inside ellipses
centroids <- nmds_scores %>%
  group_by(zone) %>%
  summarise(NMDS1 = mean(NMDS1), NMDS2 = mean(NMDS2))

ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_mark_ellipse(aes(group = zone),
                    alpha = 0.15,
                    linewidth = 0.8,
                    con.cap = 0) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text(data = centroids,
            aes(x = NMDS1, y = NMDS2, label = zone, colour = zone),
            fontface = "bold",
            size = 3.5,
            inherit.aes = FALSE) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Show code
####


ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_mark_ellipse(aes(group = zone),
                    alpha = 0.15,
                    linewidth = 0.8,
                    con.cap = 0) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text(data = centroids,
            aes(x = NMDS1, y = NMDS2, label = zone, colour = zone),
            fontface = "bold", size = 3.5,
            inherit.aes = FALSE) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  xlim(-0.9, 0.6) +   # expand x axis
  ylim(-0.6, 0.5) +   # expand y axis
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))


5.1 Phylum Composition: Bias-Reduced Multinomial Logistic Regression

A bias-reduced multinomial logistic regression (brmultinom, brglm2 package) was fitted with phylum composition as the response variable and habitat zone as the predictor. Zone 3 (control) was set as the reference category. A likelihood ratio test (LRT) was used to assess overall model significance.

Show code
mlr_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, phylum) %>%
  summarise(count = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

response_matrix <- mlr_data %>%
  dplyr::select(Annelida, Arthropoda, Mollusca, Echinodermata) %>%
  as.matrix()

mlr_model <- brmultinom(response_matrix ~ zone, data = mlr_data, maxit = 999)
summary(mlr_model)
Call:
brmultinom(formula = response_matrix ~ zone, data = mlr_data, 
    maxit = 999)

Coefficients:
              (Intercept)     zoneZ1      zoneZ2
Arthropoda     -0.7196229 -1.2262873 -1.66865696
Mollusca       -3.0981939 -0.3140535 -0.03730036
Echinodermata  -3.8454083  2.0425990 -1.48731028

Std. Errors:
              (Intercept)    zoneZ1    zoneZ2
Arthropoda      0.1208138 0.4431481 0.3626326
Mollusca        0.3327022 0.9070168 0.5887044
Echinodermata   0.4778803 0.6236551 1.5075411

Residual Deviance: 99.24516 
Log-likelihood: -49.62258 
AIC: 117.2452

Type of estimator: AS_mixed (mixed bias-reducing adjusted score equations)
Number of Fisher Scoring iterations: 4
Show code
mlr_null   <- brmultinom(response_matrix ~ 1, data = mlr_data, maxit = 999)
lrt_result <- lrtest(mlr_null, mlr_model)
print(lrt_result)
Likelihood ratio test

Model 1: response_matrix ~ 1
Model 2: response_matrix ~ zone
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   3 -77.144                         
2   9 -49.623  6 55.044  4.543e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results: The likelihood ratio test confirmed that habitat zone significantly predicted phylum composition (χ² = 55.04, df = 6, p < 0.001). Relative to Zone 3 (control), Arthropoda were less prevalent in both Zone 1 (β = -1.23) and Zone 2 (β = -1.67). Echinodermata showed higher relative abundance in Zone 1 (β = 2.04). Annelida increased in predicted probability from Zone 3 to Zone 2, suggesting increasing polychaete dominance in the restoration zones.

5.1.1 Model Predictions: Phylum Composition by Zone

Show code
preds  <- ggpredict(mlr_model, terms = "zone")
pred_df <- as.data.frame(preds) %>%
  rename(zone = x, phylum = response.level, probability = predicted) %>%
  mutate(
    zone   = factor(zone, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

ggplot(pred_df, aes(x = zone, y = probability,
                     colour = phylum, group = phylum)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_colour_manual(values = phylum_colours) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  labs(x = "Habitat Zone", y = "Predicted Probability",
       caption = "brmultinom; LRT: χ² = 55.04, df = 6, p < 0.001. Zone 3 = reference category.") +
  theme_bw() +
  theme(axis.text        = element_text(size = 9, colour = "black"),
        axis.title       = element_text(size = 10),
        strip.text       = element_text(face = "italic", size = 10),
        strip.background = element_rect(fill = "grey85", colour = NA),
        legend.position  = "none",
        plot.caption     = element_text(size = 8, hjust = 0, colour = "grey40"))

Show code
## graph 2

library(ggforce)

pred_df <- as.data.frame(preds) %>%
  rename(zone = x, phylum = response.level, probability = predicted) %>%
  mutate(
    zone   = factor(zone, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

# create numeric x for smooth curves
pred_df$x_num <- as.numeric(pred_df$zone)

ggplot(pred_df, aes(x = x_num, y = probability,
                     colour = phylum, group = phylum)) +
  geom_smooth(method = "loess", se = FALSE, linewidth = 1.2, span = 1.5) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3,
                     labels = c("Zone 3", "Zone 1", "Zone 2")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  scale_colour_manual(
    values = phylum_colours,
    labels = c("Annelida", "Arthropoda", "Mollusca", "Echinodermata")
  ) +
  labs(x = "Habitat Zone",
       y = "Predicted Probability",
       colour = "Phylum",
       caption = "brmultinom; LRT: \u03c7\u00b2 = 55.04, df = 6, p < 0.001. Zone 3 = reference category.") +
  theme_bw() +
  theme(axis.text        = element_text(size = 10, colour = "black"),
        axis.title       = element_text(size = 11),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.caption     = element_text(size = 8, hjust = 0, colour = "grey40"))
`geom_smooth()` using formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.

Figure 13: Model-predicted probabilities of each phylum’s relative abundance across habitat zones from a bias-reduced multinomial logistic regression. Zone 3 is the reference category. Each panel shows one phylum.


6 Research Question 2: Rapid Biodiversity Assessment

Can amphipod or polychaete diversity predict total infaunal biodiversity? To avoid autocorrelation, total diversity metrics were calculated excluding the focal predictor group. When amphipod diversity was the predictor, total diversity excluded amphipods, and vice versa for polychaetes.

6.1 Data Preparation

Show code
# amphipod diversity per sample
amph_wide <- sample_long %>%
  dplyr::filter(order == "Amphipoda") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

amph_mat <- amph_wide %>% dplyr::select(-sample) %>% as.matrix()

amphipod_div <- amph_wide %>%
  mutate(amph_richness = specnumber(amph_mat),
         amph_shannon  = diversity(amph_mat, index = "shannon")) %>%
  dplyr::select(sample, amph_richness, amph_shannon)

# polychaete diversity per sample
poly_wide <- sample_long %>%
  dplyr::filter(order == "Polychaeta") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

poly_mat <- poly_wide %>% dplyr::select(-sample) %>% as.matrix()

poly_div <- poly_wide %>%
  mutate(poly_richness = specnumber(poly_mat),
         poly_shannon  = diversity(poly_mat, index = "shannon")) %>%
  dplyr::select(sample, poly_richness, poly_shannon)

# total diversity EXCLUDING amphipods
excl_amph_wide <- sample_long %>%
  dplyr::filter(order != "Amphipoda") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_amph_mat <- excl_amph_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_amph <- excl_amph_wide %>%
  mutate(total_sp_richness_excl_amph = specnumber(excl_amph_mat),
         total_shannon_excl_amph     = diversity(excl_amph_mat, index = "shannon")) %>%
  dplyr::select(sample, zone, total_sp_richness_excl_amph, total_shannon_excl_amph)

# total diversity EXCLUDING polychaetes
excl_poly_wide <- sample_long %>%
  dplyr::filter(order != "Polychaeta") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_poly_mat <- excl_poly_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_poly <- excl_poly_wide %>%
  mutate(total_sp_richness_excl_poly = specnumber(excl_poly_mat),
         total_shannon_excl_poly     = diversity(excl_poly_mat, index = "shannon")) %>%
  dplyr::select(sample, total_sp_richness_excl_poly, total_shannon_excl_poly)

# join into single dataframe
plot_df <- total_excl_amph %>%
  left_join(amphipod_div,   by = "sample") %>%
  left_join(poly_div,       by = "sample") %>%
  left_join(total_excl_poly, by = "sample") %>%
  mutate(zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

#######

# percentage of total individuals that are amphipods and polychaetes
sample_long %>%
  mutate(group = case_when(
    order == "Amphipoda"  ~ "Amphipoda",
    order == "Polychaeta" ~ "Polychaeta",
    TRUE ~ "Other"
  )) %>%
  group_by(group) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  mutate(percentage = round(total / sum(total) * 100, 1))
# A tibble: 3 × 3
  group      total percentage
  <chr>      <dbl>      <dbl>
1 Amphipoda    105       21  
2 Other         43        8.6
3 Polychaeta   352       70.4
Show code
# percentage of total species that are amphipods and polychaetes
sample_long %>%
  dplyr::select(species, order) %>%
  distinct() %>%
  mutate(group = case_when(
    order == "Amphipoda"  ~ "Amphipoda",
    order == "Polychaeta" ~ "Polychaeta",
    TRUE ~ "Other"
  )) %>%
  group_by(group) %>%
  summarise(n_species = n(), .groups = "drop") %>%
  mutate(percentage = round(n_species / sum(n_species) * 100, 1))
# A tibble: 3 × 3
  group      n_species percentage
  <chr>          <int>      <dbl>
1 Amphipoda         18       25  
2 Other             17       23.6
3 Polychaeta        37       51.4

6.2 Regression Plots

Show code
p1 <- ggplot(plot_df, aes(x = amph_richness,
                           y = total_sp_richness_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.825, p = 0.001", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Species Richness",
       y = "Total Species Richness (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p2 <- ggplot(plot_df, aes(x = amph_shannon,
                           y = total_shannon_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.678, p = 0.006", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Shannon Diversity",
       y = "Total Shannon Diversity (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p3 <- ggplot(plot_df, aes(x = poly_richness,
                           y = total_sp_richness_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.499, p = 0.033", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Species Richness",
       y = "Total Species Richness (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p4 <- ggplot(plot_df, aes(x = poly_shannon,
                           y = total_shannon_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.611, p = 0.013", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Shannon Diversity",
       y = "Total Shannon Diversity (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

(p1 + p2) / (p3 + p4) +
  plot_annotation(
    title = "Amphipod and Polychaete Diversity as Predictors of Total Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12))
  ) +
  plot_layout(guides = "collect") &
  scale_colour_manual(values = zone_colours) &
  theme(legend.position = "right")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 14: Linear regression plots showing amphipod (top row) and polychaete (bottom row) diversity as predictors of total infaunal diversity. Total diversity metrics exclude the focal predictor group to avoid autocorrelation. Points are coloured by zone. R² and p-values from linear regression are shown in the top right of each panel.

6.3 Statistical Tests

Show code
lm_amph_rich3 <- lm(total_sp_richness_excl_amph ~ amph_richness, data = plot_df)
lm_amph_shan3 <- lm(total_shannon_excl_amph      ~ amph_shannon,  data = plot_df)
lm_poly_rich3 <- lm(total_sp_richness_excl_poly  ~ poly_richness, data = plot_df)
lm_poly_shan3 <- lm(total_shannon_excl_poly       ~ poly_shannon,  data = plot_df)

cor_amph_rich_p <- cor.test(plot_df$amph_richness, plot_df$total_sp_richness_excl_amph, method = "pearson")
cor_amph_shan_p <- cor.test(plot_df$amph_shannon,  plot_df$total_shannon_excl_amph,     method = "pearson")
cor_poly_rich_p <- cor.test(plot_df$poly_richness, plot_df$total_sp_richness_excl_poly, method = "pearson")
cor_poly_shan_p <- cor.test(plot_df$poly_shannon,  plot_df$total_shannon_excl_poly,     method = "pearson")

cor_amph_rich_k <- cor.test(plot_df$amph_richness, plot_df$total_sp_richness_excl_amph, method = "kendall")
Warning in cor.test.default(plot_df$amph_richness,
plot_df$total_sp_richness_excl_amph, : Cannot compute exact p-value with ties
Show code
cor_amph_shan_k <- cor.test(plot_df$amph_shannon,  plot_df$total_shannon_excl_amph,     method = "kendall")
Warning in cor.test.default(plot_df$amph_shannon,
plot_df$total_shannon_excl_amph, : Cannot compute exact p-value with ties
Show code
cor_poly_rich_k <- cor.test(plot_df$poly_richness, plot_df$total_sp_richness_excl_poly, method = "kendall")
Warning in cor.test.default(plot_df$poly_richness,
plot_df$total_sp_richness_excl_poly, : Cannot compute exact p-value with ties
Show code
cor_poly_shan_k <- cor.test(plot_df$poly_shannon,  plot_df$total_shannon_excl_poly,     method = "kendall")


## one table
combined_table <- data.frame(
  Metric = c("Species Richness", "", "Shannon Diversity", ""),
  Relationship = c(
    "Amphipod richness → Total richness (excl. amph.)",
    "Polychaete richness → Total richness (excl. poly.)",
    "Amphipod Shannon → Total Shannon (excl. amph.)",
    "Polychaete Shannon → Total Shannon (excl. poly.)"
  ),
  R2        = c(get_lm_r2(lm_amph_rich3), get_lm_r2(lm_poly_rich3),
                get_lm_r2(lm_amph_shan3), get_lm_r2(lm_poly_shan3)),
  F_stat    = c(get_lm_f(lm_amph_rich3),  get_lm_f(lm_poly_rich3),
                get_lm_f(lm_amph_shan3),  get_lm_f(lm_poly_shan3)),
  LM_p      = round(c(get_lm_p(lm_amph_rich3), get_lm_p(lm_poly_rich3),
                       get_lm_p(lm_amph_shan3), get_lm_p(lm_poly_shan3)), 3),
  Pearson_r = round(c(cor_amph_rich_p$estimate, cor_poly_rich_p$estimate,
                       cor_amph_shan_p$estimate, cor_poly_shan_p$estimate), 3),
  Pearson_p = round(c(cor_amph_rich_p$p.value,  cor_poly_rich_p$p.value,
                       cor_amph_shan_p$p.value,  cor_poly_shan_p$p.value),  3),
  Kendall_t = round(c(cor_amph_rich_k$estimate, cor_poly_rich_k$estimate,
                       cor_amph_shan_k$estimate, cor_poly_shan_k$estimate), 3),
  Kendall_p = round(c(cor_amph_rich_k$p.value,  cor_poly_rich_k$p.value,
                       cor_amph_shan_k$p.value,  cor_poly_shan_k$p.value),  3)
)

kable(combined_table,
      col.names = c("Metric", "Relationship", "R²", "F", "LM p",
                    "Pearson r", "Pearson p", "Kendall τ", "Kendall p"),
      caption = "Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(2, bold = TRUE) %>%
  row_spec(1, background = "#E8F4FD") %>%
  row_spec(2, background = "#FDF2E9") %>%
  row_spec(3, background = "#E8F4FD") %>%
  row_spec(4, background = "#FDF2E9") %>%
  pack_rows("Species Richness", 1, 2) %>%
  pack_rows("Shannon Diversity", 3, 4)
Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.
Metric Relationship F LM p Pearson r Pearson p Kendall τ Kendall p
Species Richness
Species Richness Amphipod richness → Total richness (excl. amph.) 0.825 32.97 0.001 0.908 0.001 0.647 0.019
Polychaete richness → Total richness (excl. poly.) 0.499 6.96 0.033 0.706 0.033 0.493 0.072
Shannon Diversity
Shannon Diversity Amphipod Shannon → Total Shannon (excl. amph.) 0.678 14.75 0.006 0.823 0.006 0.667 0.014
Polychaete Shannon → Total Shannon (excl. poly.) 0.611 11.00 0.013 0.782 0.013 0.833 0.001

Results: Amphipod species richness was a significant predictor of total infaunal species richness (R² = 0.825, p = 0.001), confirmed by Kendall’s rank correlation (τ = 0.647, p = 0.019). Amphipod Shannon diversity was also a significant predictor of total Shannon diversity (R² = 0.678, p = 0.006). Polychaete richness (R² = 0.499, p = 0.033) and Shannon diversity (R² = 0.611, p = 0.013) were also significant predictors, though with consistently weaker relationships than amphipods across all three tests.


7 Sampling Bias: Core Penetration Depth

Core penetration depth was assessed as a potential source of sampling bias. One sample (INFA_Z1_3A) had insufficient penetration recorded as “N/A (<10 mm)” and was assigned a depth of 10 mm based on the recorded core volume of 0.0785 litres.

Show code
lm_depth_rich  <- lm(total_sp_richness ~ core_depth, data = div_depth)
lm_depth_shan  <- lm(total_shannon     ~ core_depth, data = div_depth)
lm_depth_abund <- lm(total_abundance   ~ core_depth, data = div_depth)

cor_depth_rich_p  <- cor.test(div_depth$core_depth, div_depth$total_sp_richness, method = "pearson")
cor_depth_shan_p  <- cor.test(div_depth$core_depth, div_depth$total_shannon,     method = "pearson")
cor_depth_abund_p <- cor.test(div_depth$core_depth, div_depth$total_abundance,   method = "pearson")

cor_depth_rich_k  <- cor.test(div_depth$core_depth, div_depth$total_sp_richness, method = "kendall")
Warning in cor.test.default(div_depth$core_depth, div_depth$total_sp_richness,
: Cannot compute exact p-value with ties
Show code
cor_depth_shan_k  <- cor.test(div_depth$core_depth, div_depth$total_shannon,     method = "kendall")
cor_depth_abund_k <- cor.test(div_depth$core_depth, div_depth$total_abundance,   method = "kendall")
Warning in cor.test.default(div_depth$core_depth, div_depth$total_abundance, :
Cannot compute exact p-value with ties
Show code
depth_table <- data.frame(
  Relationship = c("Core depth → Species richness",
                   "Core depth → Shannon diversity",
                   "Core depth → Total abundance"),
  R2        = c(get_lm_r2(lm_depth_rich), get_lm_r2(lm_depth_shan),
                get_lm_r2(lm_depth_abund)),
  F_stat    = c(get_lm_f(lm_depth_rich),  get_lm_f(lm_depth_shan),
                get_lm_f(lm_depth_abund)),
  LM_p      = round(c(get_lm_p(lm_depth_rich), get_lm_p(lm_depth_shan),
                       get_lm_p(lm_depth_abund)), 3),
  Pearson_r = round(c(cor_depth_rich_p$estimate, cor_depth_shan_p$estimate,
                       cor_depth_abund_p$estimate), 3),
  Pearson_p = round(c(cor_depth_rich_p$p.value,  cor_depth_shan_p$p.value,
                       cor_depth_abund_p$p.value),  3),
  Kendall_t = round(c(cor_depth_rich_k$estimate, cor_depth_shan_k$estimate,
                       cor_depth_abund_k$estimate), 3),
  Kendall_p = round(c(cor_depth_rich_k$p.value,  cor_depth_shan_k$p.value,
                       cor_depth_abund_k$p.value),  3)
)

kable(depth_table,
      col.names = c("Relationship", "R²", "F", "LM p-value",
                    "Pearson r", "Pearson p", "Kendall τ", "Kendall p"),
      caption = "Table 6. Core penetration depth as a predictor of infaunal diversity metrics.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE)
Table 6. Core penetration depth as a predictor of infaunal diversity metrics.
Relationship F LM p-value Pearson r Pearson p Kendall τ Kendall p
Core depth → Species richness 0.010 0.07 0.798 0.100 0.798 0.254 0.345
Core depth → Shannon diversity 0.022 0.16 0.705 0.148 0.705 0.167 0.612
Core depth → Total abundance 0.000 0.00 0.982 -0.009 0.982 0.085 0.753
Show code
p_depth1 <- ggplot(div_depth, aes(x = core_depth, y = total_sp_richness,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.010, p = 0.798)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Species Richness",
       colour = "Zone") +
  theme_classic()

p_depth2 <- ggplot(div_depth, aes(x = core_depth, y = total_shannon,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.022, p = 0.705)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Shannon Diversity",
       colour = "Zone") +
  theme_classic()

p_depth3 <- ggplot(div_depth, aes(x = core_depth, y = total_abundance,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² < 0.001, p = 0.982)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Abundance",
       colour = "Zone") +
  theme_classic()

(p_depth1 + p_depth2) / (p_depth3 + plot_spacer()) +
  plot_annotation(
    title = "Core Penetration Depth as a Predictor of Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 13))
  ) +
  plot_layout(guides = "collect") &
  theme(legend.position = "right")
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 15: Linear regression plots of core penetration depth against total species richness, Shannon diversity and total abundance. Points are coloured by zone. All relationships were non-significant (ns).

Results: No significant relationship was detected between core penetration depth and total species richness (R² = 0.010, p = 0.798), Shannon diversity (R² = 0.022, p = 0.705) or total abundance (R² < 0.001, p = 0.982). These results indicate that variation in core penetration depth did not constitute a significant source of sampling bias, and all nine samples were retained in subsequent analyses.


8 Summary of Statistical Results

Show code
summary_table <- data.frame(
  Test = c(
    "Shapiro-Wilk (Z1)", "Shapiro-Wilk (Z2)", "Shapiro-Wilk (Z3)",
    "Kruskal-Wallis: Total abundance (per sample)",
    "Dunn's post-hoc: Z1 vs Z2",
    "Dunn's post-hoc: Z1 vs Z3",
    "Dunn's post-hoc: Z2 vs Z3",
    "GLM (NB): Total abundance ~ zone",
    "GLM (NB): Species richness ~ zone",
    "Kruskal-Wallis: Shannon diversity (H') — species level",
    "Kruskal-Wallis: Shannon diversity (H') — phylum level",
    "Kruskal-Wallis: Simpson diversity (1-D) — species level",
    "Kruskal-Wallis: Simpson diversity (1-D) — phylum level",
    "PERMANOVA: Species composition",
    "betadisper: Dispersion homogeneity",
    "brmultinom + LRT: Phylum composition",
    "Core depth → Species richness",
    "Core depth → Shannon diversity",
    "Core depth → Total abundance"
  ),
  Result = c(
    "W = 0.385, p < 0.001",
    "W = 0.447, p < 0.001",
    "W = 0.540, p < 0.001",
    "H = 6.88, df = 2, p = 0.032",
    "p = 0.347 (ns)",
    "p = 0.013",
    "p = 0.233 (ns)",
    "Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001",
    "Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001",
    "H = 1.87, df = 2, p = 0.39 (ns)",
    "H = 1.69, df = 2, p = 0.43 (ns)",
    "H = 1.16, df = 2, p = 0.56 (ns)",
    "H = 3.47, df = 2, p = 0.18 (ns)",
    "F = 1.80, R² = 0.375, p = 0.009",
    "p = 0.287 (ns — assumption met)",
    "χ² = 55.04, df = 6, p < 0.001",
    "R² = 0.010, p = 0.798 (ns)",
    "R² = 0.022, p = 0.705 (ns)",
    "R² < 0.001, p = 0.982 (ns)"
  ),
  Conclusion = c(
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Significant difference in abundance across zones",
    "Z1 and Z2 not significantly different",
    "Z3 significantly different from Z1",
    "Z2 not significantly different from Z3",
    "Both restoration zones have significantly lower abundance than control",
    "Both restoration zones have significantly lower richness than control",
    "No significant difference in Shannon diversity",
    "No significant difference in Shannon diversity at phylum level",
    "No significant difference in Simpson diversity",
    "No significant difference in Simpson diversity at phylum level",
    "Species composition differs significantly across zones",
    "Equal dispersion — PERMANOVA result is valid",
    "Phylum composition differs significantly across zones",
    "Core depth not a significant source of sampling bias",
    "Core depth not a significant source of sampling bias",
    "Core depth not a significant source of sampling bias"
  )
)

kable(summary_table,
      col.names = c("Test", "Result", "Conclusion"),
      caption = "Table 7. Summary of all statistical tests performed.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(0, bold = TRUE)
Table 7. Summary of all statistical tests performed.
Test Result Conclusion
Shapiro-Wilk (Z1) W = 0.385, p < 0.001 Non-normal — non-parametric tests required
Shapiro-Wilk (Z2) W = 0.447, p < 0.001 Non-normal — non-parametric tests required
Shapiro-Wilk (Z3) W = 0.540, p < 0.001 Non-normal — non-parametric tests required
Kruskal-Wallis: Total abundance (per sample) H = 6.88, df = 2, p = 0.032 Significant difference in abundance across zones
Dunn's post-hoc: Z1 vs Z2 p = 0.347 (ns) Z1 and Z2 not significantly different
Dunn's post-hoc: Z1 vs Z3 p = 0.013 Z3 significantly different from Z1
Dunn's post-hoc: Z2 vs Z3 p = 0.233 (ns) Z2 not significantly different from Z3
GLM (NB): Total abundance ~ zone Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001 Both restoration zones have significantly lower abundance than control
GLM (NB): Species richness ~ zone Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001 Both restoration zones have significantly lower richness than control
Kruskal-Wallis: Shannon diversity (H') — species level H = 1.87, df = 2, p = 0.39 (ns) No significant difference in Shannon diversity
Kruskal-Wallis: Shannon diversity (H') — phylum level H = 1.69, df = 2, p = 0.43 (ns) No significant difference in Shannon diversity at phylum level
Kruskal-Wallis: Simpson diversity (1-D) — species level H = 1.16, df = 2, p = 0.56 (ns) No significant difference in Simpson diversity
Kruskal-Wallis: Simpson diversity (1-D) — phylum level H = 3.47, df = 2, p = 0.18 (ns) No significant difference in Simpson diversity at phylum level
PERMANOVA: Species composition F = 1.80, R² = 0.375, p = 0.009 Species composition differs significantly across zones
betadisper: Dispersion homogeneity p = 0.287 (ns — assumption met) Equal dispersion — PERMANOVA result is valid
brmultinom + LRT: Phylum composition χ² = 55.04, df = 6, p < 0.001 Phylum composition differs significantly across zones
Core depth → Species richness R² = 0.010, p = 0.798 (ns) Core depth not a significant source of sampling bias
Core depth → Shannon diversity R² = 0.022, p = 0.705 (ns) Core depth not a significant source of sampling bias
Core depth → Total abundance R² < 0.001, p = 0.982 (ns) Core depth not a significant source of sampling bias

9 species in zone

Show code
# species richness and total abundance per zone
sample_long %>%
  group_by(zone) %>%
  summarise(
    total_abundance = sum(abundance),
    species_richness = n_distinct(species[abundance > 0])
  )
# A tibble: 3 × 3
  zone  total_abundance species_richness
  <chr>           <dbl>            <int>
1 Z1                 59               20
2 Z2                116               33
3 Z3                325               58