Infaunal Biodiversity Analysis

Galway Bay Native Oyster Restoration Project

Author

Aoife Taylor

Published

April 6, 2026

1 Introduction

This analysis investigates patterns in infaunal biodiversity across three habitat zones within the Galway Bay Native Oyster Restoration Project. Since 2021, scallop shell has been deployed annually within the St George’s Bed, Inner Galway Bay, to provide settlement substrate for the European flat oyster (Ostrea edulis). Three habitat zones were delineated for biodiversity assessment:

Zone 1 (Z1): Scallop shell mounds (>20 cm relief; early reef structures)
Zone 2 (Z2): Scattered scallop shell
Zone 3 (Z3): Outside restoration area (control)

Three replicate cores were collected per zone (n = 9 total). This analysis addresses two research questions:

What is the impact of oyster restoration projects on infaunal biodiversity?
Can amphipod or polychaete biodiversity predict total infaunal biodiversity (Rapid Biodiversity Assessment)?

2 Data Import and Preparation

Show code

# load all packages
library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(vegan)
library(knitr)
library(kableExtra)
library(dunn.test)
library(brglm2)
library(lmtest)
library(ggeffects)
library(patchwork)
library(MASS)
library(DHARMa)
library(ggVennDiagram)
library(conflicted)
conflicts_prefer(dplyr::select)
conflicts_prefer(dplyr::filter)

# import infauna data
data <- read_csv(
  file = "infauna_copy(R sheet).csv",
  locale = locale(encoding = "latin1")
)

# remove empty rows and columns
data <- data[rowSums(is.na(data)) != ncol(data),
             colSums(is.na(data)) != nrow(data)]

# remove taxa too small to be retained on 1mm sieve
data <- data %>%
  filter(!grepl("Oligochaeta|Nematoda sp.1|Nemertea sp.1", species))

# standardise taxonomic names
data <- data %>%
  mutate(
    species = case_when(
      species == "aplysia punctata"        ~ "Aplysia punctata",
      species == "gooddallia triangularis"  ~ "Gooddallia triangularis",
      species == "heteroclymene sp.1"      ~ "Heteroclymene sp.1",
      species == "metaphoxus Fultoni"      ~ "Metaphoxus fultoni",
      species == "syllidae sp.1"           ~ "Syllidae sp.1",
      species == "Golfingia  sp.1"         ~ "Golfingia sp.1",
      species == "Nothria conchylega "     ~ "Nothria conchylega",
      species == "Mediomastua sp.1"        ~ "Mediomastus sp.1",
      TRUE ~ species
    ),
    genus = case_when(
      genus == "aplysia"        ~ "Aplysia",
      genus == "gooddallia"     ~ "Gooddallia",
      genus == "heteroclymene"  ~ "Heteroclymene",
      genus == "urothoe"        ~ "Urothoe",
      genus == "Golfingia "     ~ "Golfingia",
      genus == "Spirobranchus " ~ "Spirobranchus",
      genus == "Mediomastua"    ~ "Mediomastus",
      TRUE ~ genus
    ),
    family = case_when(
      family == "maldanidae"    ~ "Maldanidae",
      family == "tellinidae"    ~ "Tellinidae",
      family == "Serpulidae "   ~ "Serpulidae",
      family == "Golfingiidae " ~ "Golfingiidae",
      family == "Eunicidea"     ~ "Eunicidae",
      family == "Nereidinae"    ~ "Nereididae",
      TRUE ~ family
    ),
    order = case_when(
      order == "amphipod"    ~ "Amphipoda",
      order == "Tanaid"      ~ "Tanaidacea",
      order == "ophiuroids"  ~ "Ophiurida",
      order == "gastropod"   ~ "Gastropoda",
      order == "pycnogonids" ~ "Pycnogonida",
      TRUE ~ order
    ),
    phylum = case_when(
      phylum == "Anthropoda" ~ "Arthropoda",
      TRUE ~ phylum
    )
  )

# zone colours — deliberately different from phylum colours
zone_colours <- c("Zone 1" = "#2166AC", "Zone 2" = "#F4A582", "Zone 3" = "#4DAC26")

# phylum colours
phylum_colours <- c(
  "Annelida"      = "#E8732A",
  "Arthropoda"    = "#9B59B6",
  "Mollusca"      = "#1ABC9C",
  "Echinodermata" = "#89CFF0"
)

# zone colours for Z1/Z2/Z3 coding (used in plots before factor relabelling)
zone_colours_z <- c("Z1" = "#2166AC", "Z2" = "#F4A582", "Z3" = "#4DAC26")

# create long format data with zone and sample labels (9 samples)
sample_long <- data %>%
  pivot_longer(
    cols = starts_with("INFA"),
    names_to = "sample",
    values_to = "abundance"
  ) %>%
  mutate(zone = str_extract(sample, "Z[0-9]"))

# per-sample abundance summary (n = 9)
sample_abundance <- sample_long %>%
  group_by(sample, zone) %>%
  summarise(total_abundance = sum(abundance), .groups = "drop")

# total diversity metrics per sample
total_div <- sample_long %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0) %>%
  mutate(
    total_sp_richness = specnumber(dplyr::select(., -sample, -zone)),
    total_shannon     = diversity(as.matrix(dplyr::select(., -sample, -zone)),
                                  index = "shannon"),
    total_abundance   = rowSums(dplyr::select(., -sample, -zone))
  ) %>%
  dplyr::select(sample, zone, total_sp_richness, total_shannon, total_abundance)

# zone-level species matrix for Shapiro-Wilk tests
zone_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample_type", values_to = "value") %>%
  mutate(zone = case_when(
    grepl("INFA_Z1", sample_type) ~ "zone1",
    grepl("INFA_Z2", sample_type) ~ "zone2",
    grepl("INFA_Z3", sample_type) ~ "zone3"
  )) %>%
  group_by(phylum, order, family, genus, species, zone) %>%
  summarise(total_value = sum(value), .groups = "drop") %>%
  pivot_wider(names_from = zone, values_from = total_value)

# per-sample phylum summary
sample_phylum <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(
    zone      = str_extract(sample, "Z[0-9]"),
    replicate = str_extract(sample, "[0-9][A-Z]$"),
    zone      = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")),
    phylum    = factor(phylum, levels = c("Annelida", "Arthropoda",
                                          "Mollusca", "Echinodermata"))
  ) %>%
  group_by(sample, zone, replicate, phylum) %>%
  summarise(total = sum(abundance), .groups = "drop")

# species list per zone
species_list <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]"),
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3"))) %>%
  group_by(zone, species) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  filter(total > 0) %>%
  mutate(phylum = data$phylum[match(species, data$species)],
         species = fct_reorder(species, total))

# phylum-level abundance for Shannon/Simpson phylum analyses
phylum_abund <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  group_by(sample, phylum) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = abundance, values_fill = 0)

phylum_matrix <- phylum_abund %>%
  dplyr::select(-sample) %>%
  as.matrix()

# import and clean core depth data
core <- read_csv("infauna_copy(cores).csv") %>%
  dplyr::select("SiteID", "EstCoreDepthmm", "VolumeLtr", "species total", "abundance")
core <- core[rowSums(is.na(core)) != ncol(core),
             colSums(is.na(core)) != nrow(core)]
core <- core[-11, ]

core_clean <- core %>%
  dplyr::select(SiteID, EstCoreDepthmm) %>%
  filter(!is.na(SiteID)) %>%
  mutate(EstCoreDepthmm = case_when(
    SiteID == "INFA_Z1_3A" ~ "10",
    TRUE ~ EstCoreDepthmm
  )) %>%
  filter(!grepl("N/A", EstCoreDepthmm)) %>%
  mutate(sample = SiteID, core_depth = as.numeric(EstCoreDepthmm)) %>%
  dplyr::select(sample, core_depth)

div_depth <- total_div %>% left_join(core_clean, by = "sample")

# helper functions for regression tables
get_lm_p  <- function(m) summary(m)$coefficients[2, 4]
get_lm_r2 <- function(m) round(summary(m)$r.squared, 3)
get_lm_f  <- function(m) round(summary(m)$fstatistic[1], 2)

3 Sampling Assessment

3.1 Species Accumulation Curves

Species accumulation curves were constructed using the specaccum function (method = “exact”, vegan package), which calculates the mathematically expected cumulative species richness averaged across all possible orderings of the samples within each zone. The shaded ribbon represents ± 1 standard deviation.

Show code

species_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(zone, sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zones <- c("Z1", "Z2", "Z3")
accum_list <- lapply(zones, function(z) {
  mat <- species_matrix %>%
    filter(zone == z) %>%
    dplyr::select(-zone, -sample) %>%
    as.matrix()
  sp <- specaccum(mat, method = "exact")
  data.frame(zone = z, samples = sp$sites, richness = sp$richness, sd = sp$sd)
})

Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero

Show code

accum_df <- bind_rows(accum_list)

ggplot(accum_df, aes(x = samples, y = richness, colour = zone, fill = zone)) +
  geom_ribbon(aes(ymin = richness - sd, ymax = richness + sd),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  scale_fill_manual(values = zone_colours_z,
                    labels = c("Zone 1", "Zone 2", "Zone 3")) +
  labs(x = "Number of Samples", y = "Cumulative Species Richness",
       colour = "Zone", fill = "Zone") +
  theme_classic() +
  theme(axis.text = element_text(size = 9, colour = "black"),
        axis.title = element_text(size = 10),
        legend.title = element_text(face = "bold", size = 10),
        legend.text = element_text(size = 9),
        panel.grid = element_blank())

Figure 1: Cumulative species richness plotted against number of samples per zone. Lines show mean expected richness; shaded ribbons show ± 1 standard deviation across all possible sample orderings. Zone 1 = shell mounds; Zone 2 = scattered shell; Zone 3 = control.

Results: All three curves show a continuously rising trajectory with no sign of levelling off at three samples, indicating that species richness had not reached an asymptote by the end of sampling. Zone 3 (control) showed the highest cumulative richness and widest confidence ribbon, indicating greater variability in species composition between samples in the control zone. These results indicate that more sampling would likely detect additional species in all zones and this is acknowledged as a limitation of the current sampling effort.

4 Research Question 1: Impact of Oyster Restoration on Infaunal Biodiversity

4.1 Normality Testing

Shapiro-Wilk tests were applied to species-level abundance data within each zone to determine whether parametric or non-parametric tests were appropriate.

Show code

shapiro_z1 <- shapiro.test(zone_data$zone1)
shapiro_z2 <- shapiro.test(zone_data$zone2)
shapiro_z3 <- shapiro.test(zone_data$zone3)

normality_table <- data.frame(
  Zone    = c("Zone 1", "Zone 2", "Zone 3"),
  W       = round(c(shapiro_z1$statistic, shapiro_z2$statistic,
                    shapiro_z3$statistic), 4),
  p_value = formatC(c(shapiro_z1$p.value, shapiro_z2$p.value,
                      shapiro_z3$p.value), format = "e", digits = 3)
)

kable(normality_table,
      col.names = c("Zone", "W statistic", "p-value"),
      caption = "Table 1. Shapiro-Wilk tests for normality of species-level abundance data per zone.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"), full_width = FALSE)

Table 1. Shapiro-Wilk tests for normality of species-level abundance data per zone.
Zone	W statistic	p-value
Zone 1	0.3884	9.153e-16
Zone 2	0.3347	2.044e-16
Zone 3	0.4729	1.186e-14

Results: Shapiro-Wilk tests revealed significant departures from normality in all three zones (p < 0.001). All subsequent statistical tests therefore used non-parametric methods.

4.2 Total Abundance per Core

Total abundance was calculated as the sum of all individuals per core sample. Each of the nine cores is treated as one replicate (three per zone). The Kruskal-Wallis test was applied to per-sample totals (n = 9) as the correct unit of replication, avoiding pseudoreplication.

Show code

kw_abund_sample <- kruskal.test(total_abundance ~ zone, data = sample_abundance)

dunn_abund_sample <- dunn.test(sample_abundance$total_abundance,
                                sample_abundance$zone,
                                method = "bonferroni")

  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 6.8796, df = 2, p-value = 0.03

                   Dunn's Pairwise Comparison of x by group                   
                                 (Bonferroni)                                 
Col Mean-│
Row Mean │         Z1         Z2
─────────┼──────────────────────
      Z2 │  -1.197569
         │     0.3466 
         │
      Z3 │  -2.619684  -1.422114
         │     0.0132*    0.2325 

α = 0.05
Reject Ho if p ≤ α/2, where p = Pr(Z ≥ |z|)

Show code

sig_labels <- data.frame(
  zone  = c("Z1", "Z2", "Z3"),
  label = c("a", "ab", "b"),
  y_pos = rep(max(sample_abundance$total_abundance) * 1.1, 3)
)

ggplot(sample_abundance, aes(x = zone, y = total_abundance, fill = zone)) +
  geom_boxplot(outlier.shape = 19, outlier.size = 1.5) +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  geom_text(data = sig_labels,
            aes(x = zone, y = y_pos, label = label),
            inherit.aes = FALSE, size = 5, fontface = "bold") +
  scale_fill_manual(values = zone_colours_z) +
  scale_x_discrete(labels = c("Z1" = "Zone 1", "Z2" = "Zone 2", "Z3" = "Zone 3")) +
  labs(x = "Zone", y = "Total Abundance per Core") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11),
        plot.margin = margin(t = 20, r = 10, b = 10, l = 10))

Figure 2: Total abundance of all infaunal individuals per core sample across zones (n = 3 per zone). Each dot represents one core. Letters indicate statistically homogeneous groups based on Dunn’s post-hoc test with Bonferroni correction (a = Zone 1; ab = Zone 2; b = Zone 3).

Results: A Kruskal-Wallis test revealed a significant difference in total abundance across zones (H = 6.88, df = 2, p = 0.032). Dunn’s post-hoc test with Bonferroni correction indicated that Zone 3 (control) differed significantly from Zone 1 (p = 0.013). Zone 2 was not significantly different from either Zone 1 (p = 0.347) or Zone 3 (p = 0.233), indicating an intermediate pattern. Total abundance was highest in Zone 3 and lowest in Zone 1.

4.3 Generalised Linear Models: Abundance and Species Richness

A Poisson GLM was initially fitted and assessed for overdispersion using DHARMa simulated residual diagnostics. Significant overdispersion was detected, a Negative Binomial GLM was therefore fitted as it includes an additional dispersion parameter accommodating overdispersed count data.

Show code

sample_div <- total_div %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))
# overdispersion check
 # fit Poisson first then test
glm_poisson <- glm(total_abundance ~ zone, 
                   data = sample_div, family = poisson)
glm_poisson


Call:  glm(formula = total_abundance ~ zone, family = poisson, data = sample_div)

Coefficients:
(Intercept)       zoneZ1       zoneZ2  
      4.685       -1.706       -1.030  

Degrees of Freedom: 8 Total (i.e. Null);  6 Residual
Null Deviance:      252.3 
Residual Deviance: 24.78    AIC: 81

Show code

simulateResiduals(glm_poisson, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.9829763 0.04791643 0.4901769 0.03824786 0.3542827 0.9817204 0.9503528 0.7571552 0.006356984

Show code

testDispersion(glm_poisson)


    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 3.3357, p-value = 0.016
alternative hypothesis: two.sided

Results: A Poisson GLM confirmed overdispersion (dispersion test: p = 0.016) with residual deviance (24.78) approximately four times greater than the residual degrees of freedom (6), confirming violation of the Poisson equal mean-variance assumption.

4.3.1 GLM 1: Total Abundance by Zone

Show code

sample_div <- total_div %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

glm_abund <- glm.nb(total_abundance ~ zone, data = sample_div)
summary(glm_abund)


Call:
glm.nb(formula = total_abundance ~ zone, data = sample_div, init.theta = 29.30877834, 
    link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   4.6852     0.1202  38.976  < 2e-16 ***
zoneZ1       -1.7063     0.2068  -8.250  < 2e-16 ***
zoneZ2       -1.0302     0.1856  -5.551 2.84e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(29.3088) family taken to be 1)

    Null deviance: 89.720  on 8  degrees of freedom
Residual deviance:  9.585  on 6  degrees of freedom
AIC: 76.451

Number of Fisher Scoring iterations: 1

              Theta:  29.3 
          Std. Err.:  24.2 

 2 x log-likelihood:  -68.451

Show code

simulateResiduals(glm_abund, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.8972407 0.07831351 0.4332401 0.08440482 0.3987502 0.9058376 0.8257076 0.6615099 0.123393

Show code

pred_abund <- ggpredict(glm_abund, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_abund, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Total Abundance") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 3: DHARMa simulated residual diagnostics (upper) and model-predicted total abundance per zone with 95% confidence intervals (lower) from a Negative Binomial GLM. Zone 3 is the reference category.

Results: A Negative Binomial GLM confirmed that total abundance differed significantly across zones. Zone 1 had significantly lower abundance than Zone 3 (β = -1.706, z = -8.250, p < 0.001), with predicted abundance approximately 18% of Zone 3 (e^-1.706 = 0.18), corresponding to approximately 19 individuals per core compared to 108 in Zone 3. Zone 2 also had significantly lower abundance (β = -1.030, z = -5.551, p < 0.001), with predicted abundance approximately 36% of Zone 3. Model assumptions were confirmed using DHARMa diagnostics (KS p = 0.749; dispersion p = 0.752; outlier p = 1.000).

4.3.2 GLM 2: Species Richness by Zone

Show code

glm_rich <- glm.nb(total_sp_richness ~ zone, data = sample_div)
summary(glm_rich)


Call:
glm.nb(formula = total_sp_richness ~ zone, data = sample_div, 
    init.theta = 180.0094501, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   3.3673     0.1155  29.148  < 2e-16 ***
zoneZ1       -1.1337     0.2256  -5.024 5.05e-07 ***
zoneZ2       -0.6373     0.1922  -3.316 0.000914 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(180.0095) family taken to be 1)

    Null deviance: 38.4836  on 8  degrees of freedom
Residual deviance:  8.0991  on 6  degrees of freedom
AIC: 58.327

Number of Fisher Scoring iterations: 1

              Theta:  180 
          Std. Err.:  855 

 2 x log-likelihood:  -50.327

Show code

simulateResiduals(glm_rich, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.7017479 0.2649475 0.6477857 0.3480546 0.156752 0.9545328 0.6808527 0.855776 0.05934819

Show code

pred_rich <- ggpredict(glm_rich, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_rich, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Species Richness") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 4: DHARMa simulated residual diagnostics (upper) and model-predicted species richness per zone with 95% confidence intervals (lower) from a Negative Binomial GLM. Zone 3 is the reference category.

Results: A Negative Binomial GLM confirmed that species richness differed significantly across zones. Zone 3 had a predicted mean richness of e^3.367 = 29 species per core. Zone 1 had significantly lower richness (β = -1.134, z = -5.024, p < 0.001), at approximately 9 species per core. Zone 2 also had significantly lower richness (β = -0.637, z = -3.316, p < 0.001), at approximately 15 species per core. Notably, the GLM detected significant differences for both Zone 1 and Zone 2 relative to Zone 3, whereas the Kruskal-Wallis only detected a significant difference for Zone 1 vs Zone 3, demonstrating greater sensitivity of the parametric approach. Model assumptions were confirmed using DHARMa diagnostics (KS p = 0.782; dispersion p = 0.584; outlier p = 1.000).

4.4 Diversity Indices

4.4.1 Shannon Diversity Index: Species Level

Shannon diversity (H’) was calculated per core sample at species level. Shannon diversity accounts for both species richness and evenness.

Show code

abund <- dplyr::select(data, contains("INFA"))
shannon <- diversity(t(as.matrix(abund)), index = "shannon")

shannon_df <- data.frame(Sample = colnames(abund), Shannon = shannon) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_shannon <- kruskal.test(Shannon ~ Zone, data = shannon_df)

ggplot(shannon_df, aes(x = Zone, y = Shannon, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(shannon_df$Shannon) * 1.05,
           label = paste0("ns (p = ", round(kw_shannon$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Shannon Index (H') — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 5: Shannon diversity index (H’) calculated at species level per core sample (n = 3 per zone). Each dot represents one core. ns = not significant.

Results: A Kruskal-Wallis test revealed no significant difference in Shannon diversity at species level across zones (H = 1.87, df = 2, p = 0.39). Post-hoc testing was not performed as the overall test was non-significant. Species-level diversity was comparable across all three zones despite significant differences in total abundance.

4.4.2 Shannon Diversity Index: Phylum Level

Shannon diversity was additionally calculated at phylum level per core sample, measuring the evenness of individual distribution across the four phyla (Annelida, Arthropoda, Mollusca, Echinodermata).

Show code

shannon_phylum <- diversity(phylum_matrix, index = "shannon")


shannon_phylum_df <- data.frame(
  Sample = phylum_abund$sample,
  Shannon = shannon_phylum
) %>%
  mutate(Zone = str_extract(Sample, "Z[0-9]"),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_shannon_phylum <- kruskal.test(Shannon ~ Zone, data = shannon_phylum_df)

ggplot(shannon_phylum_df, aes(x = Zone, y = Shannon, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(shannon_phylum_df$Shannon) * 1.05,
           label = paste0("ns (p = ", round(kw_shannon_phylum$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Shannon Index (H') — Phylum Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 6: Shannon diversity index (H’) calculated at phylum level per core sample (n = 3 per zone). Each dot represents one core. Shannon diversity at phylum level measures how evenly individuals are distributed across the four phyla within each sample. ns = not significant.

Results: Shannon diversity calculated at phylum level did not differ significantly across zones (H = 1.69, df = 2, p = 0.43). Post-hoc testing was not performed. The evenness of individual distribution across phyla was comparable across all three habitat zones, consistent with the species-level Shannon result.

4.4.3 Simpson Diversity Index: Species Level

Simpson diversity (1-D) was calculated per core sample at species level. The Simpson index measures the probability that two randomly selected individuals belong to different species.

Show code

simpson <- diversity(t(as.matrix(abund)), index = "simpson")

simpson_df <- data.frame(Sample = colnames(abund), Simpson = simpson) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_simpson <- kruskal.test(Simpson ~ Zone, data = simpson_df)

ggplot(simpson_df, aes(x = Zone, y = Simpson, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(simpson_df$Simpson) * 1.05,
           label = paste0("ns (p = ", round(kw_simpson$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Simpson Index (1-D) — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 7: Simpson diversity index (1-D) calculated at species level per core sample (n = 3 per zone). Each dot represents one core. ns = not significant.

Results: A Kruskal-Wallis test revealed no significant difference in Simpson diversity at species level across zones (H = 1.16, df = 2, p = 0.56). Post-hoc testing was not performed. Community evenness was comparable across all three zones, consistent with the Shannon diversity result.

4.4.4 Simpson Diversity Index: Phylum Level

Simpson diversity was additionally calculated at phylum level, measuring the probability that two randomly selected individuals belong to different phyla.

Show code

simpson_phylum <- diversity(phylum_matrix, index = "simpson")

simpson_phylum_df <- data.frame(
  Sample = phylum_abund$sample,
  Simpson = simpson_phylum
) %>%
  mutate(Zone = str_extract(Sample, "Z[0-9]"),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_simpson_phylum <- kruskal.test(Simpson ~ Zone, data = simpson_phylum_df)

ggplot(simpson_phylum_df, aes(x = Zone, y = Simpson, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(simpson_phylum_df$Simpson) + 0.02,
           label = paste0("ns (p = ", round(kw_simpson_phylum$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Simpson Index (1-D) — Phylum Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 8: Simpson diversity index (1-D) calculated at phylum level per core sample (n = 3 per zone). Each dot represents one core. The Simpson index at phylum level measures the probability that two randomly selected individuals belong to different phyla. ns = not significant.

Results: Simpson diversity calculated at phylum level did not differ significantly across zones (H = 3.47, df = 2, p = 0.18). Post-hoc testing was not performed. Taken together with the species-level results, all four diversity index analyses confirm that community evenness did not differ significantly between zones at either species or phylum level.

Show code

# one-way ANOVA for phylum-level Shannon diversity
anova_shannon_phylum <- aov(Shannon ~ Zone, data = shannon_phylum_df)
summary(anova_shannon_phylum)

            Df Sum Sq Mean Sq F value Pr(>F)
Zone         2 0.1679 0.08393   0.998  0.423
Residuals    6 0.5047 0.08412

Show code

# one-way ANOVA for phylum-level Simpson diversity
anova_simpson_phylum <- aov(Simpson ~ Zone, data = simpson_phylum_df)
summary(anova_simpson_phylum)

            Df  Sum Sq Mean Sq F value Pr(>F)
Zone         2 0.06485 0.03243   1.013  0.418
Residuals    6 0.19215 0.03203

4.5 Phylum Composition: Descriptive

4.5.1 Relative Abundance by Sample and Zone

Show code

phylum_rel <- sample_long %>%
  group_by(sample, zone = str_extract(sample, "Z[0-9]"),
           replicate = str_extract(sample, "[0-9][A-Z]$"), phylum) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  group_by(sample) %>%
  mutate(rel_abundance = total / sum(total) * 100) %>%
  ungroup() %>%
  mutate(
    zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                  labels = c("Zone 1", "Zone 2", "Zone 3")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

ggplot(phylum_rel, aes(x = replicate, y = rel_abundance, fill = phylum)) +
  geom_col(position = "stack", width = 1, colour = "black", linewidth = 0.2) +
  facet_wrap(~ zone, nrow = 1, scales = "free_x") +
  scale_fill_manual(values = phylum_colours) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Relative Abundance (%)", fill = "Phylum") +
  theme_bw() +
  theme(strip.text       = element_text(face = "bold", size = 11),
        strip.background = element_rect(fill = "grey85", colour = NA),
        axis.text        = element_text(size = 9, colour = "black"),
        axis.title.y     = element_text(size = 10),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
      legend.position = "bottom",
        panel.grid       = element_blank())

Figure 9: Relative abundance (%) of each phylum per core sample grouped by zone. Each bar represents one core. Note that relative abundance may exaggerate the visual prominence of rare phyla in low-abundance samples.

4.5.2 Total Abundance by Phylum per Sample

Show code

ggplot(sample_phylum, aes(x = replicate, y = total, fill = phylum)) +
  geom_col(position = "stack", width = 1, colour = "black", linewidth = 0.2) +
  facet_wrap(~ zone, nrow = 1, scales = "free_x") +
  scale_fill_manual(values = phylum_colours) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Total Abundance", fill = "Phylum") +
  theme_bw() +
  theme(strip.text       = element_text(face = "bold", size = 11),
        strip.background = element_rect(fill = "grey85", colour = NA),
        axis.text        = element_text(size = 9, colour = "black"),
        axis.title.y     = element_text(size = 10),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
         legend.position = "bottom",
        panel.grid       = element_blank())

Figure 10: Total abundance of individuals per phylum per core sample grouped by zone. Each bar represents one core. Raw abundance is shown to avoid distortion from relative percentage calculations.

4.5.3 Species List by Zone

Show code

ggplot(species_list, aes(x = total, y = species, fill = phylum)) +
  geom_col() +
  facet_wrap(~ zone, ncol = 3, scales = "free_y") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = phylum_colours) +
  labs(x = "Total Abundance", y = "Species", fill = "Phylum") +
  theme_bw() +
  theme(axis.text.y = element_text(size = 10, face = "italic"),
        axis.text.x        = element_text(size = 8),
        axis.title         = element_text(size = 10),
        strip.text         = element_text(face = "bold", size = 12),
        strip.background   = element_rect(fill = "grey85", colour = NA),
        legend.title       = element_text(face = "bold", size = 10),
        legend.text        = element_text(face = "italic", size = 9),
         legend.position = "bottom",
        panel.grid.major.y = element_blank(),
        panel.grid.minor   = element_blank())

Show code

# increase axis text size
axis.text.y = element_text(size = 8, face = "italic")

######uipdatee 

#| fig-width: 8.27
#| fig-height: 11.69
#| out-width: "100%"
#| out-height: "100%"

ggplot(species_list, aes(x = total, y = species, fill = phylum)) +
  geom_col(width = 0.9) + 
  facet_wrap(~ zone, ncol = 3, scales = "free_y") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = phylum_colours) +
  labs(x = "Total Abundance", y = "Species", fill = "Phylum") +
  theme_bw() +
  theme(
    axis.text.y      = element_text(size = 9, face = "italic"),
    axis.text.x      = element_text(size = 10),
    axis.title       = element_text(size = 13, face = "bold"),
    strip.text       = element_text(face = "bold", size = 15),
    strip.background = element_rect(fill = "grey85", colour = NA),
    legend.title     = element_text(face = "bold", size = 11),
    legend.text      = element_text(face = "italic", size = 10),
    legend.position  = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank()
  )

Figure 11: Total abundance of each species per zone, ordered by total abundance within zone. Bars are coloured by phylum. Species absent from a zone are not shown in that panel.

4.5.4 Species Unique to Each Zone

Show code

unique_species <- species_list %>%
  group_by(species) %>%
  summarise(zones_present = n_distinct(zone),
            zone   = paste(zone[total > 0], collapse = ", "),
            total  = sum(total),
            phylum = first(phylum)) %>%
  filter(zones_present == 1) %>%
  arrange(zone, desc(total))

unique_species %>%
  dplyr::select(zone, species, phylum, total) %>%
  arrange(zone, desc(total)) %>%
  rename(Zone = zone, Species = species,
         Phylum = phylum, `Total Abundance` = total) %>%
  kable(caption = "Table 3. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(2, italic = TRUE) %>%
  pack_rows("Zone 1", 1, sum(unique_species$zone == "Zone 1")) %>%
  pack_rows("Zone 2", sum(unique_species$zone == "Zone 1") + 1,
            sum(unique_species$zone %in% c("Zone 1", "Zone 2"))) %>%
  pack_rows("Zone 3", sum(unique_species$zone %in% c("Zone 1", "Zone 2")) + 1,
            nrow(unique_species))

Table 3. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.
Zone	Species	Phylum	Total Abundance
Zone 1
Zone 1	Microdeutopus damnoniensis	Arthropoda	1
Zone 1	Scalibregma inflatum	Annelida	1
Zone 2
Zone 2	Mediomastus sp.1	Annelida	4
Zone 2	Dorvilleia sp.1	Annelida	2
Zone 2	Kurtiella bidentata	Mollusca	2
Zone 2	Urothoe marina	Arthropoda	2
Zone 2	Gari Tellinella	Mollusca	1
Zone 2	Golfingia sp.1	Annelida	1
Zone 2	Hediste diversicolor	Annelida	1
Zone 2	Heteroclymene sp.1	Annelida	1
Zone 2	Lucinoma borealis	Mollusca	1
Zone 2	Maldanidae sp.1	Annelida	1
Zone 2	Nothria conchylega	Annelida	1
Zone 2	Syllidae sp.1	Annelida	1
Zone 3
Zone 3	Microdeutopus versiculatus	Arthropoda	8
Zone 3	Platynereis dumerilii	Annelida	5
Zone 3	Dexaminidae spinosa	Arthropoda	4
Zone 3	Socarnes erythropthalmus	Arthropoda	4
Zone 3	Eurysyllis tuberculata	Annelida	3
Zone 3	Leptocheirus pilosus	Arthropoda	3
Zone 3	Achelia echinata	Arthropoda	2
Zone 3	Autolytus sp.1	Annelida	2
Zone 3	Gooddallia triangularis	Mollusca	2
Zone 3	Harmothoe sp.2	Annelida	2
Zone 3	Leptochiton cancellatus	Mollusca	2
Zone 3	Metaphoxus pectinatus	Arthropoda	2
Zone 3	Pariambus typicus	Arthropoda	2
Zone 3	Tanaidacea sp.3	Arthropoda	2
Zone 3	Terebellidae sp.2	Annelida	2
Zone 3	Aplysia punctata	Mollusca	1
Zone 3	Chondrochelia savignyi	Arthropoda	1
Zone 3	Dexaminidae thea	Arthropoda	1
Zone 3	Eteone flava	Annelida	1
Zone 3	Fabricia sp.1	Annelida	1
Zone 3	Harpinia crenulata	Arthropoda	1
Zone 3	Maldanidae sp.2	Annelida	1
Zone 3	Modiolus modiolus	Mollusca	1
Zone 3	Nephtys homburgi	Annelida	1
Zone 3	Nicomache sp.1	Annelida	1
Zone 3	Odontosyllis gibba	Annelida	1
Zone 3	Sabellidae sp.2	Annelida	1
Zone 3	Tellinidae sp.1	Mollusca	1
Zone 3	Tricolia pullus	Mollusca	1

Results: A number of species were recorded exclusively within a single zone. Zone 3 (control) contained the greatest number of zone-specific species, consistent with the higher overall species richness in the control.

4.5.5 Venn Diagram of Shared Species

Show code

z1_species <- species_list %>% filter(zone == "Zone 1", total > 0) %>%
  pull(species) %>% as.character()
z2_species <- species_list %>% filter(zone == "Zone 2", total > 0) %>%
  pull(species) %>% as.character()
z3_species <- species_list %>% filter(zone == "Zone 3", total > 0) %>%
  pull(species) %>% as.character()

ggVennDiagram(
  list("Zone 1" = z1_species, "Zone 2" = z2_species, "Zone 3" = z3_species),
  label_alpha = 0
) +
  scale_fill_gradient(low = "white", high = "#2166AC") +
  labs(fill = "Species Count") +
  theme(legend.title = element_text(face = "bold", size = 10))

Figure 12: Venn diagram showing the number of species shared between and unique to each habitat zone. Numbers in overlapping regions indicate shared species; numbers in non-overlapping regions indicate zone-specific species.

4.6 Community Composition:

4.7 PERMANOVA

A PERMANOVA (adonis2, vegan package) was used to test whether species composition differed significantly across habitat zones, using Bray-Curtis dissimilarity with 999 permutations. Homogeneity of multivariate dispersion was verified using betadisper prior to interpreting the PERMANOVA result.

Show code

perm_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zone_vector <- perm_matrix$zone
species_mat <- perm_matrix %>%
  dplyr::select(-sample, -zone) %>%
  as.matrix()

permanova <- adonis2(species_mat ~ zone_vector, method = "bray", permutations = 999)
print(permanova)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = species_mat ~ zone_vector, permutations = 999, method = "bray")
         Df SumOfSqs      R2      F Pr(>F)   
Model     2  0.83489 0.37512 1.8009  0.006 **
Residual  6  1.39079 0.62488                 
Total     8  2.22568 1.00000                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show code

dist_matrix <- vegdist(species_mat, method = "bray")
dispersion  <- betadisper(dist_matrix, zone_vector)
permutest(dispersion)


Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     2 0.010412 0.0052057 1.6236    999  0.279
Residuals  6 0.019238 0.0032063

Show code

## tab 1
permanova_table <- data.frame(
  Source = c("Zone", "Residual", "Total", "betadisper: Zone", "betadisper: Residual"),
  Df     = c(2, 6, 8, 2, 6),
  SS     = c(0.835, 1.391, 2.226, 0.010, 0.019),
  R2     = c(0.375, 0.625, 1.000, "—", "—"),
  F      = c(1.801, "—", "—", 1.624, "—"),
  p      = c("0.006 **", "—", "—", "0.272 ns", "—")
)

kable(permanova_table,
      col.names = c("Source", "df", "SS", "R²", "F", "p-value"),
      caption = "Table X. PERMANOVA results for infaunal species composition across habitat zones based on Bray-Curtis dissimilarity with 999 permutations. betadisper tests homogeneity of multivariate dispersion. SS = sum of squares; R² = proportion of variance explained; ns = not significant.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(3, bold = TRUE) %>%
  pack_rows("PERMANOVA", 1, 3) %>%
  pack_rows("betadisper", 4, 5)

Table X. PERMANOVA results for infaunal species composition across habitat zones based on Bray-Curtis dissimilarity with 999 permutations. betadisper tests homogeneity of multivariate dispersion. SS = sum of squares; R² = proportion of variance explained; ns = not significant.
Source	df	SS	R²	F	p-value
PERMANOVA
Zone	2	0.835	0.375	1.801	0.006 **
Residual	6	1.391	0.625	—	—
Total	8	2.226	1	—	—
betadisper
betadisper: Zone	2	0.010	—	1.624	0.272 ns
betadisper: Residual	6	0.019	—	—	—

Show code

## tab 2 

permanova_table <- data.frame(
  Test       = c("PERMANOVA", "betadisper"),
  F          = c(1.80, 1.62),
  R2         = c(0.375, "—"),
  p          = c("0.006 **", "0.272 ns")
)

kable(permanova_table,
      col.names = c("Test", "F", "R²", "p-value"),
      caption = "Table X. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE)

Table X. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.
Test	F	R²	p-value
PERMANOVA	1.80	0.375	0.006 **
betadisper	1.62	—	0.272 ns

Results: PERMANOVA revealed that species composition differed significantly across habitat zones (F = 1.80, R² = 0.375, p = 0.009). Zone explains 37.5% of the variation in species composition, with the remaining 62.5% attributable to within-zone variability between samples. Homogeneity of multivariate dispersion was confirmed (p = 0.287), indicating that the PERMANOVA result reflects genuine differences in community composition rather than differences in within-group variability.

5 nMDS

Show code

# calculate nMDS ordination
set.seed(123)
nmds <- metaMDS(species_mat, distance = "bray", k = 2, trymax = 100)

Wisconsin double standardization
Run 0 stress 0.1296653 
Run 1 stress 0.1296653 
... Procrustes: rmse 2.57544e-05  max resid 5.062835e-05 
... Similar to previous best
Run 2 stress 0.1296653 
... Procrustes: rmse 2.046149e-06  max resid 3.662313e-06 
... Similar to previous best
Run 3 stress 0.1296654 
... Procrustes: rmse 5.769671e-05  max resid 0.0001188607 
... Similar to previous best
Run 4 stress 0.1296653 
... Procrustes: rmse 9.324728e-06  max resid 1.922477e-05 
... Similar to previous best
Run 5 stress 0.1296653 
... Procrustes: rmse 5.86126e-06  max resid 9.805432e-06 
... Similar to previous best
Run 6 stress 0.1783099 
Run 7 stress 0.1577492 
Run 8 stress 0.1296653 
... Procrustes: rmse 1.137076e-06  max resid 2.109404e-06 
... Similar to previous best
Run 9 stress 0.1822321 
Run 10 stress 0.1296653 
... Procrustes: rmse 3.072587e-06  max resid 6.178088e-06 
... Similar to previous best
Run 11 stress 0.1662196 
Run 12 stress 0.1577492 
Run 13 stress 0.1789585 
Run 14 stress 0.1662196 
Run 15 stress 0.2429611 
Run 16 stress 0.1296653 
... Procrustes: rmse 1.055402e-06  max resid 1.571138e-06 
... Similar to previous best
Run 17 stress 0.1710196 
Run 18 stress 0.1296653 
... Procrustes: rmse 7.435892e-06  max resid 1.457155e-05 
... Similar to previous best
Run 19 stress 0.3209238 
Run 20 stress 0.1296653 
... Procrustes: rmse 1.338029e-06  max resid 2.291495e-06 
... Similar to previous best
*** Best solution repeated 10 times

Show code

# extract scores and add zone labels
nmds_scores <- as.data.frame(scores(nmds, display = "sites")) %>%
  mutate(zone = zone_vector,
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

# calculate zone centroids for labels
centroids <- nmds_scores %>%
  group_by(zone) %>%
  summarise(NMDS1 = mean(NMDS1), NMDS2 = mean(NMDS2))

# plot
ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_point(size = 4, alpha = 0.8) +
  stat_ellipse(aes(group = zone), type = "t", level = 0.95,
               linetype = "dashed", linewidth = 0.8, alpha = 0.3,
               geom = "polygon", fill = NA) +
  geom_label(data = centroids, aes(label = zone),
             fontface = "bold", size = 3.5, alpha = 0.8) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Show code

############

library(ggforce)

#| fig-width: 7
#| fig-height: 6

set.seed(123)
nmds <- metaMDS(species_mat, distance = "bray", k = 2, trymax = 100)

Wisconsin double standardization
Run 0 stress 0.1296653 
Run 1 stress 0.1296653 
... Procrustes: rmse 2.57544e-05  max resid 5.062835e-05 
... Similar to previous best
Run 2 stress 0.1296653 
... Procrustes: rmse 2.046149e-06  max resid 3.662313e-06 
... Similar to previous best
Run 3 stress 0.1296654 
... Procrustes: rmse 5.769671e-05  max resid 0.0001188607 
... Similar to previous best
Run 4 stress 0.1296653 
... Procrustes: rmse 9.324728e-06  max resid 1.922477e-05 
... Similar to previous best
Run 5 stress 0.1296653 
... Procrustes: rmse 5.86126e-06  max resid 9.805432e-06 
... Similar to previous best
Run 6 stress 0.1783099 
Run 7 stress 0.1577492 
Run 8 stress 0.1296653 
... Procrustes: rmse 1.137076e-06  max resid 2.109404e-06 
... Similar to previous best
Run 9 stress 0.1822321 
Run 10 stress 0.1296653 
... Procrustes: rmse 3.072587e-06  max resid 6.178088e-06 
... Similar to previous best
Run 11 stress 0.1662196 
Run 12 stress 0.1577492 
Run 13 stress 0.1789585 
Run 14 stress 0.1662196 
Run 15 stress 0.2429611 
Run 16 stress 0.1296653 
... Procrustes: rmse 1.055402e-06  max resid 1.571138e-06 
... Similar to previous best
Run 17 stress 0.1710196 
Run 18 stress 0.1296653 
... Procrustes: rmse 7.435892e-06  max resid 1.457155e-05 
... Similar to previous best
Run 19 stress 0.3209238 
Run 20 stress 0.1296653 
... Procrustes: rmse 1.338029e-06  max resid 2.291495e-06 
... Similar to previous best
*** Best solution repeated 10 times

Show code

nmds_scores <- as.data.frame(scores(nmds, display = "sites")) %>%
  mutate(zone = zone_vector,
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

# calculate centroids for label placement inside ellipses
centroids <- nmds_scores %>%
  group_by(zone) %>%
  summarise(NMDS1 = mean(NMDS1), NMDS2 = mean(NMDS2))

ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_mark_ellipse(aes(group = zone),
                    alpha = 0.15,
                    linewidth = 0.8,
                    con.cap = 0) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text(data = centroids,
            aes(x = NMDS1, y = NMDS2, label = zone, colour = zone),
            fontface = "bold",
            size = 3.5,
            inherit.aes = FALSE) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Show code

####


ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_mark_ellipse(aes(group = zone),
                    alpha = 0.15,
                    linewidth = 0.8,
                    con.cap = 0) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text(data = centroids,
            aes(x = NMDS1, y = NMDS2, label = zone, colour = zone),
            fontface = "bold", size = 3.5,
            inherit.aes = FALSE) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  xlim(-0.9, 0.6) +   # expand x axis
  ylim(-0.6, 0.5) +   # expand y axis
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

5.1 Phylum Composition: Bias-Reduced Multinomial Logistic Regression

A bias-reduced multinomial logistic regression (brmultinom, brglm2 package) was fitted with phylum composition as the response variable and habitat zone as the predictor. Zone 3 (control) was set as the reference category. A likelihood ratio test (LRT) was used to assess overall model significance.

Show code

mlr_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, phylum) %>%
  summarise(count = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

response_matrix <- mlr_data %>%
  dplyr::select(Annelida, Arthropoda, Mollusca, Echinodermata) %>%
  as.matrix()

mlr_model <- brmultinom(response_matrix ~ zone, data = mlr_data, maxit = 999)
summary(mlr_model)

Call:
brmultinom(formula = response_matrix ~ zone, data = mlr_data, 
    maxit = 999)

Coefficients:
              (Intercept)     zoneZ1      zoneZ2
Arthropoda     -0.7196229 -1.2262873 -1.66865696
Mollusca       -3.0981939 -0.3140535 -0.03730036
Echinodermata  -3.8454083  2.0425990 -1.48731028

Std. Errors:
              (Intercept)    zoneZ1    zoneZ2
Arthropoda      0.1208138 0.4431481 0.3626326
Mollusca        0.3327022 0.9070168 0.5887044
Echinodermata   0.4778803 0.6236551 1.5075411

Residual Deviance: 99.24516 
Log-likelihood: -49.62258 
AIC: 117.2452

Type of estimator: AS_mixed (mixed bias-reducing adjusted score equations)
Number of Fisher Scoring iterations: 4

Show code

mlr_null   <- brmultinom(response_matrix ~ 1, data = mlr_data, maxit = 999)
lrt_result <- lrtest(mlr_null, mlr_model)
print(lrt_result)

Likelihood ratio test

Model 1: response_matrix ~ 1
Model 2: response_matrix ~ zone
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   3 -77.144                         
2   9 -49.623  6 55.044  4.543e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results: The likelihood ratio test confirmed that habitat zone significantly predicted phylum composition (χ² = 55.04, df = 6, p < 0.001). Relative to Zone 3 (control), Arthropoda were less prevalent in both Zone 1 (β = -1.23) and Zone 2 (β = -1.67). Echinodermata showed higher relative abundance in Zone 1 (β = 2.04). Annelida increased in predicted probability from Zone 3 to Zone 2, suggesting increasing polychaete dominance in the restoration zones.

5.1.1 Model Predictions: Phylum Composition by Zone

Show code

preds  <- ggpredict(mlr_model, terms = "zone")
pred_df <- as.data.frame(preds) %>%
  rename(zone = x, phylum = response.level, probability = predicted) %>%
  mutate(
    zone   = factor(zone, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

ggplot(pred_df, aes(x = zone, y = probability,
                     colour = phylum, group = phylum)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_colour_manual(values = phylum_colours) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  labs(x = "Habitat Zone", y = "Predicted Probability",
       caption = "brmultinom; LRT: χ² = 55.04, df = 6, p < 0.001. Zone 3 = reference category.") +
  theme_bw() +
  theme(axis.text        = element_text(size = 9, colour = "black"),
        axis.title       = element_text(size = 10),
        strip.text       = element_text(face = "italic", size = 10),
        strip.background = element_rect(fill = "grey85", colour = NA),
        legend.position  = "none",
        plot.caption     = element_text(size = 8, hjust = 0, colour = "grey40"))

Show code

## graph 2

library(ggforce)

pred_df <- as.data.frame(preds) %>%
  rename(zone = x, phylum = response.level, probability = predicted) %>%
  mutate(
    zone   = factor(zone, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

# create numeric x for smooth curves
pred_df$x_num <- as.numeric(pred_df$zone)

ggplot(pred_df, aes(x = x_num, y = probability,
                     colour = phylum, group = phylum)) +
  geom_smooth(method = "loess", se = FALSE, linewidth = 1.2, span = 1.5) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3,
                     labels = c("Zone 3", "Zone 1", "Zone 2")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  scale_colour_manual(
    values = phylum_colours,
    labels = c("Annelida", "Arthropoda", "Mollusca", "Echinodermata")
  ) +
  labs(x = "Habitat Zone",
       y = "Predicted Probability",
       colour = "Phylum",
       caption = "brmultinom; LRT: \u03c7\u00b2 = 55.04, df = 6, p < 0.001. Zone 3 = reference category.") +
  theme_bw() +
  theme(axis.text        = element_text(size = 10, colour = "black"),
        axis.title       = element_text(size = 11),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.caption     = element_text(size = 8, hjust = 0, colour = "grey40"))

`geom_smooth()` using formula = 'y ~ x'

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.

Figure 13: Model-predicted probabilities of each phylum’s relative abundance across habitat zones from a bias-reduced multinomial logistic regression. Zone 3 is the reference category. Each panel shows one phylum.

6 Research Question 2: Rapid Biodiversity Assessment

Can amphipod or polychaete diversity predict total infaunal biodiversity? To avoid autocorrelation, total diversity metrics were calculated excluding the focal predictor group. When amphipod diversity was the predictor, total diversity excluded amphipods, and vice versa for polychaetes.

6.1 Data Preparation

Show code

# amphipod diversity per sample
amph_wide <- sample_long %>%
  dplyr::filter(order == "Amphipoda") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

amph_mat <- amph_wide %>% dplyr::select(-sample) %>% as.matrix()

amphipod_div <- amph_wide %>%
  mutate(amph_richness = specnumber(amph_mat),
         amph_shannon  = diversity(amph_mat, index = "shannon")) %>%
  dplyr::select(sample, amph_richness, amph_shannon)

# polychaete diversity per sample
poly_wide <- sample_long %>%
  dplyr::filter(order == "Polychaeta") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

poly_mat <- poly_wide %>% dplyr::select(-sample) %>% as.matrix()

poly_div <- poly_wide %>%
  mutate(poly_richness = specnumber(poly_mat),
         poly_shannon  = diversity(poly_mat, index = "shannon")) %>%
  dplyr::select(sample, poly_richness, poly_shannon)

# total diversity EXCLUDING amphipods
excl_amph_wide <- sample_long %>%
  dplyr::filter(order != "Amphipoda") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_amph_mat <- excl_amph_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_amph <- excl_amph_wide %>%
  mutate(total_sp_richness_excl_amph = specnumber(excl_amph_mat),
         total_shannon_excl_amph     = diversity(excl_amph_mat, index = "shannon")) %>%
  dplyr::select(sample, zone, total_sp_richness_excl_amph, total_shannon_excl_amph)

# total diversity EXCLUDING polychaetes
excl_poly_wide <- sample_long %>%
  dplyr::filter(order != "Polychaeta") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_poly_mat <- excl_poly_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_poly <- excl_poly_wide %>%
  mutate(total_sp_richness_excl_poly = specnumber(excl_poly_mat),
         total_shannon_excl_poly     = diversity(excl_poly_mat, index = "shannon")) %>%
  dplyr::select(sample, total_sp_richness_excl_poly, total_shannon_excl_poly)

# join into single dataframe
plot_df <- total_excl_amph %>%
  left_join(amphipod_div,   by = "sample") %>%
  left_join(poly_div,       by = "sample") %>%
  left_join(total_excl_poly, by = "sample") %>%
  mutate(zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

#######

# percentage of total individuals that are amphipods and polychaetes
sample_long %>%
  mutate(group = case_when(
    order == "Amphipoda"  ~ "Amphipoda",
    order == "Polychaeta" ~ "Polychaeta",
    TRUE ~ "Other"
  )) %>%
  group_by(group) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  mutate(percentage = round(total / sum(total) * 100, 1))

# A tibble: 3 × 3
  group      total percentage
  <chr>      <dbl>      <dbl>
1 Amphipoda    105       21  
2 Other         43        8.6
3 Polychaeta   352       70.4

Show code

# percentage of total species that are amphipods and polychaetes
sample_long %>%
  dplyr::select(species, order) %>%
  distinct() %>%
  mutate(group = case_when(
    order == "Amphipoda"  ~ "Amphipoda",
    order == "Polychaeta" ~ "Polychaeta",
    TRUE ~ "Other"
  )) %>%
  group_by(group) %>%
  summarise(n_species = n(), .groups = "drop") %>%
  mutate(percentage = round(n_species / sum(n_species) * 100, 1))

# A tibble: 3 × 3
  group      n_species percentage
  <chr>          <int>      <dbl>
1 Amphipoda         18       25  
2 Other             17       23.6
3 Polychaeta        37       51.4

6.2 Regression Plots

Show code

p1 <- ggplot(plot_df, aes(x = amph_richness,
                           y = total_sp_richness_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.825, p = 0.001", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Species Richness",
       y = "Total Species Richness (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p2 <- ggplot(plot_df, aes(x = amph_shannon,
                           y = total_shannon_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.678, p = 0.006", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Shannon Diversity",
       y = "Total Shannon Diversity (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p3 <- ggplot(plot_df, aes(x = poly_richness,
                           y = total_sp_richness_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.499, p = 0.033", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Species Richness",
       y = "Total Species Richness (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p4 <- ggplot(plot_df, aes(x = poly_shannon,
                           y = total_shannon_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.611, p = 0.013", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Shannon Diversity",
       y = "Total Shannon Diversity (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

(p1 + p2) / (p3 + p4) +
  plot_annotation(
    title = "Amphipod and Polychaete Diversity as Predictors of Total Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12))
  ) +
  plot_layout(guides = "collect") &
  scale_colour_manual(values = zone_colours) &
  theme(legend.position = "right")

Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 14: Linear regression plots showing amphipod (top row) and polychaete (bottom row) diversity as predictors of total infaunal diversity. Total diversity metrics exclude the focal predictor group to avoid autocorrelation. Points are coloured by zone. R² and p-values from linear regression are shown in the top right of each panel.

6.3 Statistical Tests

Show code

lm_amph_rich3 <- lm(total_sp_richness_excl_amph ~ amph_richness, data = plot_df)
lm_amph_shan3 <- lm(total_shannon_excl_amph      ~ amph_shannon,  data = plot_df)
lm_poly_rich3 <- lm(total_sp_richness_excl_poly  ~ poly_richness, data = plot_df)
lm_poly_shan3 <- lm(total_shannon_excl_poly       ~ poly_shannon,  data = plot_df)

cor_amph_rich_p <- cor.test(plot_df$amph_richness, plot_df$total_sp_richness_excl_amph, method = "pearson")
cor_amph_shan_p <- cor.test(plot_df$amph_shannon,  plot_df$total_shannon_excl_amph,     method = "pearson")
cor_poly_rich_p <- cor.test(plot_df$poly_richness, plot_df$total_sp_richness_excl_poly, method = "pearson")
cor_poly_shan_p <- cor.test(plot_df$poly_shannon,  plot_df$total_shannon_excl_poly,     method = "pearson")

cor_amph_rich_k <- cor.test(plot_df$amph_richness, plot_df$total_sp_richness_excl_amph, method = "kendall")

Warning in cor.test.default(plot_df$amph_richness,
plot_df$total_sp_richness_excl_amph, : Cannot compute exact p-value with ties

Show code

cor_amph_shan_k <- cor.test(plot_df$amph_shannon,  plot_df$total_shannon_excl_amph,     method = "kendall")

Warning in cor.test.default(plot_df$amph_shannon,
plot_df$total_shannon_excl_amph, : Cannot compute exact p-value with ties

Show code

cor_poly_rich_k <- cor.test(plot_df$poly_richness, plot_df$total_sp_richness_excl_poly, method = "kendall")

Warning in cor.test.default(plot_df$poly_richness,
plot_df$total_sp_richness_excl_poly, : Cannot compute exact p-value with ties

Show code

cor_poly_shan_k <- cor.test(plot_df$poly_shannon,  plot_df$total_shannon_excl_poly,     method = "kendall")


## one table
combined_table <- data.frame(
  Metric = c("Species Richness", "", "Shannon Diversity", ""),
  Relationship = c(
    "Amphipod richness → Total richness (excl. amph.)",
    "Polychaete richness → Total richness (excl. poly.)",
    "Amphipod Shannon → Total Shannon (excl. amph.)",
    "Polychaete Shannon → Total Shannon (excl. poly.)"
  ),
  R2        = c(get_lm_r2(lm_amph_rich3), get_lm_r2(lm_poly_rich3),
                get_lm_r2(lm_amph_shan3), get_lm_r2(lm_poly_shan3)),
  F_stat    = c(get_lm_f(lm_amph_rich3),  get_lm_f(lm_poly_rich3),
                get_lm_f(lm_amph_shan3),  get_lm_f(lm_poly_shan3)),
  LM_p      = round(c(get_lm_p(lm_amph_rich3), get_lm_p(lm_poly_rich3),
                       get_lm_p(lm_amph_shan3), get_lm_p(lm_poly_shan3)), 3),
  Pearson_r = round(c(cor_amph_rich_p$estimate, cor_poly_rich_p$estimate,
                       cor_amph_shan_p$estimate, cor_poly_shan_p$estimate), 3),
  Pearson_p = round(c(cor_amph_rich_p$p.value,  cor_poly_rich_p$p.value,
                       cor_amph_shan_p$p.value,  cor_poly_shan_p$p.value),  3),
  Kendall_t = round(c(cor_amph_rich_k$estimate, cor_poly_rich_k$estimate,
                       cor_amph_shan_k$estimate, cor_poly_shan_k$estimate), 3),
  Kendall_p = round(c(cor_amph_rich_k$p.value,  cor_poly_rich_k$p.value,
                       cor_amph_shan_k$p.value,  cor_poly_shan_k$p.value),  3)
)

kable(combined_table,
      col.names = c("Metric", "Relationship", "R²", "F", "LM p",
                    "Pearson r", "Pearson p", "Kendall τ", "Kendall p"),
      caption = "Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(2, bold = TRUE) %>%
  row_spec(1, background = "#E8F4FD") %>%
  row_spec(2, background = "#FDF2E9") %>%
  row_spec(3, background = "#E8F4FD") %>%
  row_spec(4, background = "#FDF2E9") %>%
  pack_rows("Species Richness", 1, 2) %>%
  pack_rows("Shannon Diversity", 3, 4)

Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.
Metric	Relationship	R²	F	LM p	Pearson r	Pearson p	Kendall τ	Kendall p
Species Richness
Species Richness	Amphipod richness → Total richness (excl. amph.)	0.825	32.97	0.001	0.908	0.001	0.647	0.019
	Polychaete richness → Total richness (excl. poly.)	0.499	6.96	0.033	0.706	0.033	0.493	0.072
Shannon Diversity
Shannon Diversity	Amphipod Shannon → Total Shannon (excl. amph.)	0.678	14.75	0.006	0.823	0.006	0.667	0.014
	Polychaete Shannon → Total Shannon (excl. poly.)	0.611	11.00	0.013	0.782	0.013	0.833	0.001

Results: Amphipod species richness was a significant predictor of total infaunal species richness (R² = 0.825, p = 0.001), confirmed by Kendall’s rank correlation (τ = 0.647, p = 0.019). Amphipod Shannon diversity was also a significant predictor of total Shannon diversity (R² = 0.678, p = 0.006). Polychaete richness (R² = 0.499, p = 0.033) and Shannon diversity (R² = 0.611, p = 0.013) were also significant predictors, though with consistently weaker relationships than amphipods across all three tests.

7 Sampling Bias: Core Penetration Depth

Core penetration depth was assessed as a potential source of sampling bias. One sample (INFA_Z1_3A) had insufficient penetration recorded as “N/A (<10 mm)” and was assigned a depth of 10 mm based on the recorded core volume of 0.0785 litres.

Show code

lm_depth_rich  <- lm(total_sp_richness ~ core_depth, data = div_depth)
lm_depth_shan  <- lm(total_shannon     ~ core_depth, data = div_depth)
lm_depth_abund <- lm(total_abundance   ~ core_depth, data = div_depth)

cor_depth_rich_p  <- cor.test(div_depth$core_depth, div_depth$total_sp_richness, method = "pearson")
cor_depth_shan_p  <- cor.test(div_depth$core_depth, div_depth$total_shannon,     method = "pearson")
cor_depth_abund_p <- cor.test(div_depth$core_depth, div_depth$total_abundance,   method = "pearson")

cor_depth_rich_k  <- cor.test(div_depth$core_depth, div_depth$total_sp_richness, method = "kendall")

Warning in cor.test.default(div_depth$core_depth, div_depth$total_sp_richness,
: Cannot compute exact p-value with ties

Show code

cor_depth_shan_k  <- cor.test(div_depth$core_depth, div_depth$total_shannon,     method = "kendall")
cor_depth_abund_k <- cor.test(div_depth$core_depth, div_depth$total_abundance,   method = "kendall")

Warning in cor.test.default(div_depth$core_depth, div_depth$total_abundance, :
Cannot compute exact p-value with ties

Show code

depth_table <- data.frame(
  Relationship = c("Core depth → Species richness",
                   "Core depth → Shannon diversity",
                   "Core depth → Total abundance"),
  R2        = c(get_lm_r2(lm_depth_rich), get_lm_r2(lm_depth_shan),
                get_lm_r2(lm_depth_abund)),
  F_stat    = c(get_lm_f(lm_depth_rich),  get_lm_f(lm_depth_shan),
                get_lm_f(lm_depth_abund)),
  LM_p      = round(c(get_lm_p(lm_depth_rich), get_lm_p(lm_depth_shan),
                       get_lm_p(lm_depth_abund)), 3),
  Pearson_r = round(c(cor_depth_rich_p$estimate, cor_depth_shan_p$estimate,
                       cor_depth_abund_p$estimate), 3),
  Pearson_p = round(c(cor_depth_rich_p$p.value,  cor_depth_shan_p$p.value,
                       cor_depth_abund_p$p.value),  3),
  Kendall_t = round(c(cor_depth_rich_k$estimate, cor_depth_shan_k$estimate,
                       cor_depth_abund_k$estimate), 3),
  Kendall_p = round(c(cor_depth_rich_k$p.value,  cor_depth_shan_k$p.value,
                       cor_depth_abund_k$p.value),  3)
)

kable(depth_table,
      col.names = c("Relationship", "R²", "F", "LM p-value",
                    "Pearson r", "Pearson p", "Kendall τ", "Kendall p"),
      caption = "Table 6. Core penetration depth as a predictor of infaunal diversity metrics.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE)

Table 6. Core penetration depth as a predictor of infaunal diversity metrics.
Relationship	R²	F	LM p-value	Pearson r	Pearson p	Kendall τ	Kendall p
Core depth → Species richness	0.010	0.07	0.798	0.100	0.798	0.254	0.345
Core depth → Shannon diversity	0.022	0.16	0.705	0.148	0.705	0.167	0.612
Core depth → Total abundance	0.000	0.00	0.982	-0.009	0.982	0.085	0.753

Show code

p_depth1 <- ggplot(div_depth, aes(x = core_depth, y = total_sp_richness,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.010, p = 0.798)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Species Richness",
       colour = "Zone") +
  theme_classic()

p_depth2 <- ggplot(div_depth, aes(x = core_depth, y = total_shannon,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.022, p = 0.705)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Shannon Diversity",
       colour = "Zone") +
  theme_classic()

p_depth3 <- ggplot(div_depth, aes(x = core_depth, y = total_abundance,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² < 0.001, p = 0.982)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Abundance",
       colour = "Zone") +
  theme_classic()

(p_depth1 + p_depth2) / (p_depth3 + plot_spacer()) +
  plot_annotation(
    title = "Core Penetration Depth as a Predictor of Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 13))
  ) +
  plot_layout(guides = "collect") &
  theme(legend.position = "right")

`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 15: Linear regression plots of core penetration depth against total species richness, Shannon diversity and total abundance. Points are coloured by zone. All relationships were non-significant (ns).

Results: No significant relationship was detected between core penetration depth and total species richness (R² = 0.010, p = 0.798), Shannon diversity (R² = 0.022, p = 0.705) or total abundance (R² < 0.001, p = 0.982). These results indicate that variation in core penetration depth did not constitute a significant source of sampling bias, and all nine samples were retained in subsequent analyses.

8 Summary of Statistical Results

Show code

summary_table <- data.frame(
  Test = c(
    "Shapiro-Wilk (Z1)", "Shapiro-Wilk (Z2)", "Shapiro-Wilk (Z3)",
    "Kruskal-Wallis: Total abundance (per sample)",
    "Dunn's post-hoc: Z1 vs Z2",
    "Dunn's post-hoc: Z1 vs Z3",
    "Dunn's post-hoc: Z2 vs Z3",
    "GLM (NB): Total abundance ~ zone",
    "GLM (NB): Species richness ~ zone",
    "Kruskal-Wallis: Shannon diversity (H') — species level",
    "Kruskal-Wallis: Shannon diversity (H') — phylum level",
    "Kruskal-Wallis: Simpson diversity (1-D) — species level",
    "Kruskal-Wallis: Simpson diversity (1-D) — phylum level",
    "PERMANOVA: Species composition",
    "betadisper: Dispersion homogeneity",
    "brmultinom + LRT: Phylum composition",
    "Core depth → Species richness",
    "Core depth → Shannon diversity",
    "Core depth → Total abundance"
  ),
  Result = c(
    "W = 0.385, p < 0.001",
    "W = 0.447, p < 0.001",
    "W = 0.540, p < 0.001",
    "H = 6.88, df = 2, p = 0.032",
    "p = 0.347 (ns)",
    "p = 0.013",
    "p = 0.233 (ns)",
    "Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001",
    "Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001",
    "H = 1.87, df = 2, p = 0.39 (ns)",
    "H = 1.69, df = 2, p = 0.43 (ns)",
    "H = 1.16, df = 2, p = 0.56 (ns)",
    "H = 3.47, df = 2, p = 0.18 (ns)",
    "F = 1.80, R² = 0.375, p = 0.009",
    "p = 0.287 (ns — assumption met)",
    "χ² = 55.04, df = 6, p < 0.001",
    "R² = 0.010, p = 0.798 (ns)",
    "R² = 0.022, p = 0.705 (ns)",
    "R² < 0.001, p = 0.982 (ns)"
  ),
  Conclusion = c(
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Significant difference in abundance across zones",
    "Z1 and Z2 not significantly different",
    "Z3 significantly different from Z1",
    "Z2 not significantly different from Z3",
    "Both restoration zones have significantly lower abundance than control",
    "Both restoration zones have significantly lower richness than control",
    "No significant difference in Shannon diversity",
    "No significant difference in Shannon diversity at phylum level",
    "No significant difference in Simpson diversity",
    "No significant difference in Simpson diversity at phylum level",
    "Species composition differs significantly across zones",
    "Equal dispersion — PERMANOVA result is valid",
    "Phylum composition differs significantly across zones",
    "Core depth not a significant source of sampling bias",
    "Core depth not a significant source of sampling bias",
    "Core depth not a significant source of sampling bias"
  )
)

kable(summary_table,
      col.names = c("Test", "Result", "Conclusion"),
      caption = "Table 7. Summary of all statistical tests performed.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(0, bold = TRUE)

Table 7. Summary of all statistical tests performed.
Test	Result	Conclusion
Shapiro-Wilk (Z1)	W = 0.385, p < 0.001	Non-normal — non-parametric tests required
Shapiro-Wilk (Z2)	W = 0.447, p < 0.001	Non-normal — non-parametric tests required
Shapiro-Wilk (Z3)	W = 0.540, p < 0.001	Non-normal — non-parametric tests required
Kruskal-Wallis: Total abundance (per sample)	H = 6.88, df = 2, p = 0.032	Significant difference in abundance across zones
Dunn's post-hoc: Z1 vs Z2	p = 0.347 (ns)	Z1 and Z2 not significantly different
Dunn's post-hoc: Z1 vs Z3	p = 0.013	Z3 significantly different from Z1
Dunn's post-hoc: Z2 vs Z3	p = 0.233 (ns)	Z2 not significantly different from Z3
GLM (NB): Total abundance ~ zone	Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001	Both restoration zones have significantly lower abundance than control
GLM (NB): Species richness ~ zone	Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001	Both restoration zones have significantly lower richness than control
Kruskal-Wallis: Shannon diversity (H') — species level	H = 1.87, df = 2, p = 0.39 (ns)	No significant difference in Shannon diversity
Kruskal-Wallis: Shannon diversity (H') — phylum level	H = 1.69, df = 2, p = 0.43 (ns)	No significant difference in Shannon diversity at phylum level
Kruskal-Wallis: Simpson diversity (1-D) — species level	H = 1.16, df = 2, p = 0.56 (ns)	No significant difference in Simpson diversity
Kruskal-Wallis: Simpson diversity (1-D) — phylum level	H = 3.47, df = 2, p = 0.18 (ns)	No significant difference in Simpson diversity at phylum level
PERMANOVA: Species composition	F = 1.80, R² = 0.375, p = 0.009	Species composition differs significantly across zones
betadisper: Dispersion homogeneity	p = 0.287 (ns — assumption met)	Equal dispersion — PERMANOVA result is valid
brmultinom + LRT: Phylum composition	χ² = 55.04, df = 6, p < 0.001	Phylum composition differs significantly across zones
Core depth → Species richness	R² = 0.010, p = 0.798 (ns)	Core depth not a significant source of sampling bias
Core depth → Shannon diversity	R² = 0.022, p = 0.705 (ns)	Core depth not a significant source of sampling bias
Core depth → Total abundance	R² < 0.001, p = 0.982 (ns)	Core depth not a significant source of sampling bias

9 species in zone

Show code

# species richness and total abundance per zone
sample_long %>%
  group_by(zone) %>%
  summarise(
    total_abundance = sum(abundance),
    species_richness = n_distinct(species[abundance > 0])
  )

# A tibble: 3 × 3
  zone  total_abundance species_richness
  <chr>           <dbl>            <int>
1 Z1                 59               20
2 Z2                116               33
3 Z3                325               58