Infaunal Biodiversity Analysis

Galway Bay Native Oyster Restoration Project

Author

Aoife Taylor

Published

April 6, 2026

1 Introduction

This analysis investigates patterns in infaunal biodiversity across three habitat zones within the Galway Bay Native Oyster Restoration Project. Since 2017, scallop shell has been deployed within the St George’s Bed, Inner Galway Bay, to provide settlement substrate for the European flat oyster (Ostrea edulis). Three habitat zones were delineated for biodiversity assessment:

Zone 1 (Z1): Scallop shell mounds (>20 cm relief; early reef structures)
Zone 2 (Z2): Scattered scallop shell
Zone 3 (Z3): Unrestored control area

Three replicate cores were collected per zone (n = 9 total). This analysis addresses two research questions:

What is the impact of oyster restoration projects on infaunal biodiversity?
Can amphipod or polychaete biodiversity predict total infaunal biodiversity (Rapid Biodiversity Assessment)?

2 Data Import and Preparation

Show code

# load all packages
library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(vegan)
library(knitr)
library(kableExtra)
library(dunn.test)
library(brglm2)
library(lmtest)
library(ggeffects)
library(patchwork)
library(MASS)
library(DHARMa)
library(ggVennDiagram)
library(ggforce)
library(conflicted)
conflicts_prefer(dplyr::select)
conflicts_prefer(dplyr::filter)

# import infauna data
data <- read_csv(
  file = "infauna_copy(R sheet).csv",
  locale = locale(encoding = "latin1")
)

# remove empty rows and columns
data <- data[rowSums(is.na(data)) != ncol(data),
             colSums(is.na(data)) != nrow(data)]

# remove taxa too small to be retained on 1mm sieve
data <- data %>%
  filter(!grepl("Oligochaeta|Nematoda sp.1|Nemertea sp.1", species))

# standardise taxonomic names
data <- data %>%
  mutate(
    species = case_when(
      species == "aplysia punctata"        ~ "Aplysia punctata",
      species == "gooddallia triangularis"  ~ "Gooddallia triangularis",
      species == "heteroclymene sp.1"      ~ "Heteroclymene sp.1",
      species == "metaphoxus Fultoni"      ~ "Metaphoxus fultoni",
      species == "syllidae sp.1"           ~ "Syllidae sp.1",
      species == "Golfingia  sp.1"         ~ "Golfingia sp.1",
      species == "Nothria conchylega "     ~ "Nothria conchylega",
      species == "Mediomastua sp.1"        ~ "Mediomastus sp.1",
      TRUE ~ species
    ),
    genus = case_when(
      genus == "aplysia"        ~ "Aplysia",
      genus == "gooddallia"     ~ "Gooddallia",
      genus == "heteroclymene"  ~ "Heteroclymene",
      genus == "urothoe"        ~ "Urothoe",
      genus == "Golfingia "     ~ "Golfingia",
      genus == "Spirobranchus " ~ "Spirobranchus",
      genus == "Mediomastua"    ~ "Mediomastus",
      TRUE ~ genus
    ),
    family = case_when(
      family == "maldanidae"    ~ "Maldanidae",
      family == "tellinidae"    ~ "Tellinidae",
      family == "Serpulidae "   ~ "Serpulidae",
      family == "Golfingiidae " ~ "Golfingiidae",
      family == "Eunicidea"     ~ "Eunicidae",
      family == "Nereidinae"    ~ "Nereididae",
      TRUE ~ family
    ),
    order = case_when(
      order == "amphipod"    ~ "Amphipoda",
      order == "Tanaid"      ~ "Tanaidacea",
      order == "ophiuroids"  ~ "Ophiurida",
      order == "gastropod"   ~ "Gastropoda",
      order == "pycnogonids" ~ "Pycnogonida",
      TRUE ~ order
    ),
    phylum = case_when(
      phylum == "Anthropoda" ~ "Arthropoda",
      TRUE ~ phylum
    )
  )

# zone colours
zone_colours <- c("Zone 1" = "#2166AC", "Zone 2" = "#F4A582", "Zone 3" = "#4DAC26")
zone_colours_z <- c("Z1" = "#2166AC", "Z2" = "#F4A582", "Z3" = "#4DAC26")

# phylum colours
phylum_colours <- c(
  "Annelida"      = "#E8732A",
  "Arthropoda"    = "#9B59B6",
  "Mollusca"      = "#1ABC9C",
  "Echinodermata" = "#89CFF0"
)

# create long format data
sample_long <- data %>%
  pivot_longer(
    cols = starts_with("INFA"),
    names_to = "sample",
    values_to = "abundance"
  ) %>%
  mutate(zone = str_extract(sample, "Z[0-9]"))

# per-sample abundance summary
sample_abundance <- sample_long %>%
  group_by(sample, zone) %>%
  summarise(total_abundance = sum(abundance), .groups = "drop")

# total diversity metrics per sample
total_div <- sample_long %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0) %>%
  mutate(
    total_sp_richness = specnumber(dplyr::select(., -sample, -zone)),
    total_shannon     = diversity(as.matrix(dplyr::select(., -sample, -zone)),
                                  index = "shannon"),
    total_abundance   = rowSums(dplyr::select(., -sample, -zone))
  ) %>%
  dplyr::select(sample, zone, total_sp_richness, total_shannon, total_abundance)

# zone-level species matrix for Shapiro-Wilk tests
zone_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample_type", values_to = "value") %>%
  mutate(zone = case_when(
    grepl("INFA_Z1", sample_type) ~ "zone1",
    grepl("INFA_Z2", sample_type) ~ "zone2",
    grepl("INFA_Z3", sample_type) ~ "zone3"
  )) %>%
  group_by(phylum, order, family, genus, species, zone) %>%
  summarise(total_value = sum(value), .groups = "drop") %>%
  pivot_wider(names_from = zone, values_from = total_value)

# per-sample phylum summary
sample_phylum <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(
    zone      = str_extract(sample, "Z[0-9]"),
    replicate = str_extract(sample, "[0-9][A-Z]$"),
    zone      = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")),
    phylum    = factor(phylum, levels = c("Annelida", "Arthropoda",
                                          "Mollusca", "Echinodermata"))
  ) %>%
  group_by(sample, zone, replicate, phylum) %>%
  summarise(total = sum(abundance), .groups = "drop")

# species list per zone
species_list <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]"),
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3"))) %>%
  group_by(zone, species) %>%
  summarise(total = sum(abundance), .groups = "drop") %>%
  filter(total > 0) %>%
  mutate(phylum = data$phylum[match(species, data$species)],
         species = fct_reorder(species, total))

# phylum-level abundance
phylum_abund <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  group_by(sample, phylum) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = abundance, values_fill = 0)

phylum_matrix <- phylum_abund %>%
  dplyr::select(-sample) %>%
  as.matrix()

# import and clean core depth data
core <- read_csv("infauna_copy(cores).csv") %>%
  dplyr::select("SiteID", "EstCoreDepthmm", "VolumeLtr", "species total", "abundance")
core <- core[rowSums(is.na(core)) != ncol(core),
             colSums(is.na(core)) != nrow(core)]
core <- core[-11, ]

core_clean <- core %>%
  dplyr::select(SiteID, EstCoreDepthmm) %>%
  filter(!is.na(SiteID)) %>%
  mutate(EstCoreDepthmm = case_when(
    SiteID == "INFA_Z1_3A" ~ "10",
    TRUE ~ EstCoreDepthmm
  )) %>%
  filter(!grepl("N/A", EstCoreDepthmm)) %>%
  mutate(sample = SiteID, core_depth = as.numeric(EstCoreDepthmm)) %>%
  dplyr::select(sample, core_depth)

div_depth <- total_div %>% left_join(core_clean, by = "sample")

# helper functions for regression tables
get_lm_p  <- function(m) summary(m)$coefficients[2, 4]
get_lm_r2 <- function(m) round(summary(m)$r.squared, 3)
get_lm_f  <- function(m) round(summary(m)$fstatistic[1], 2)

3 Sampling Assessment

3.1 Species Accumulation Curves

Species accumulation curves were constructed using the specaccum function (method = “exact”, vegan package), which calculates the mathematically expected cumulative species richness averaged across all possible orderings of the samples within each zone. The shaded ribbon represents ± 1 standard deviation.

Show code

species_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(zone, sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zones <- c("Z1", "Z2", "Z3")
accum_list <- lapply(zones, function(z) {
  mat <- species_matrix %>%
    filter(zone == z) %>%
    dplyr::select(-zone, -sample) %>%
    as.matrix()
  sp <- specaccum(mat, method = "exact")
  data.frame(zone = z, samples = sp$sites, richness = sp$richness, sd = sp$sd)
})

Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero
Warning in cor(x > 0): the standard deviation is zero

Show code

accum_df <- bind_rows(accum_list)

ggplot(accum_df, aes(x = samples, y = richness, colour = zone, fill = zone)) +
  geom_ribbon(aes(ymin = richness - sd, ymax = richness + sd),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  scale_fill_manual(values = zone_colours_z,
                    labels = c("Zone 1", "Zone 2", "Zone 3")) +
  labs(x = "Number of Samples", y = "Cumulative Species Richness",
       colour = "Zone", fill = "Zone") +
  theme_classic() +
  theme(axis.text = element_text(size = 9, colour = "black"),
        axis.title = element_text(size = 10),
        legend.title = element_text(face = "bold", size = 10),
        legend.text = element_text(size = 9),
        panel.grid = element_blank())

Figure 15: Cumulative species richness plotted against number of samples per zone. Lines show mean expected richness; shaded ribbons show ± 1 standard deviation across all possible sample orderings. Zone 1 = shell mounds (blue); Zone 2 = scattered shell (peach); Zone 3 = control (green).

Results: All three curves show a continuously rising trajectory with no sign of levelling off at three samples, indicating that species richness had not reached an asymptote by the end of sampling. Zone 3 (control) showed the highest cumulative richness across all sample sizes. These results indicate that more sampling would likely detect additional species in all zones.

4 Research Question 1: Impact of Oyster Restoration on Infaunal Biodiversity

4.1 Normality Testing

Shapiro-Wilk tests were applied to species-level abundance data within each zone to determine whether parametric or non-parametric tests were appropriate.

Show code

shapiro_z1 <- shapiro.test(zone_data$zone1)
shapiro_z2 <- shapiro.test(zone_data$zone2)
shapiro_z3 <- shapiro.test(zone_data$zone3)

normality_table <- data.frame(
  Zone    = c("Zone 1", "Zone 2", "Zone 3"),
  W       = round(c(shapiro_z1$statistic, shapiro_z2$statistic,
                    shapiro_z3$statistic), 4),
  p_value = formatC(c(shapiro_z1$p.value, shapiro_z2$p.value,
                      shapiro_z3$p.value), format = "e", digits = 3)
)

kable(normality_table,
      col.names = c("Zone", "W statistic", "p-value"),
      caption = "Table 3. Shapiro-Wilk tests for normality of species-level abundance data per zone.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"), full_width = FALSE)

Table 3. Shapiro-Wilk tests for normality of species-level abundance data per zone.
Zone	W statistic	p-value
Zone 1	0.3884	9.153e-16
Zone 2	0.3347	2.044e-16
Zone 3	0.4729	1.186e-14

Results: Shapiro-Wilk tests revealed significant departures from normality in all three zones (p < 0.001). All subsequent statistical tests therefore used non-parametric methods.

4.2 Phylum Composition

4.2.1 Total Abundance by Phylum per Sample

Show code

ggplot(sample_phylum, aes(x = replicate, y = total, fill = phylum)) +
  geom_col(position = "stack", width = 1, colour = "black", linewidth = 0.2) +
  facet_wrap(~ zone, nrow = 1, scales = "free_x") +
  scale_fill_manual(values = phylum_colours) +
  scale_y_continuous(expand = c(0, 0)) +
  labs(x = NULL, y = "Total Abundance", fill = "Phylum") +
  theme_bw() +
  theme(strip.text       = element_text(face = "bold", size = 11),
        strip.background = element_rect(fill = "grey85", colour = NA),
        axis.text        = element_text(size = 9, colour = "black"),
        axis.title.y     = element_text(size = 10),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
        legend.position  = "bottom",
        panel.grid       = element_blank())

Figure 4: Total abundance of individuals per phylum for each core sample, grouped by zone. Each bar represents a single core (e.g., Zone 1: core 2A is the leftmost bar), with colours indicating the relative contribution of each phylum.

4.2.2 Species Composition by Zone

Show code

ggplot(species_list, aes(x = total, y = species, fill = phylum)) +
  geom_col(width = 0.9) +
  facet_wrap(~ zone, ncol = 3, scales = "free_y") +
  scale_x_continuous(expand = c(0, 0)) +
  scale_fill_manual(values = phylum_colours) +
  labs(x = "Total Abundance", y = "Species", fill = "Phylum") +
  theme_bw() +
  theme(
    axis.text.y        = element_text(size = 9, face = "italic"),
    axis.text.x        = element_text(size = 10),
    axis.title         = element_text(size = 13, face = "bold"),
    strip.text         = element_text(face = "bold", size = 15),
    strip.background   = element_rect(fill = "grey85", colour = NA),
    legend.title       = element_text(face = "bold", size = 11),
    legend.text        = element_text(face = "italic", size = 10),
    legend.position    = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank()
  )

Figure 5: Total abundance of each species per zone, ordered by total abundance within zones. Bars are coloured by phylum to illustrate differences in taxonomic composition.

4.2.3 Species Unique to Each Zone

Show code

unique_species <- species_list %>%
  group_by(species) %>%
  summarise(zones_present = n_distinct(zone),
            zone   = paste(zone[total > 0], collapse = ", "),
            total  = sum(total),
            phylum = first(phylum)) %>%
  filter(zones_present == 1) %>%
  arrange(zone, desc(total))

unique_species %>%
  dplyr::select(zone, species, phylum, total) %>%
  arrange(zone, desc(total)) %>%
  rename(Zone = zone, Species = species,
         Phylum = phylum, `Total Abundance` = total) %>%
  kable(caption = "Table 2. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(2, italic = TRUE) %>%
  pack_rows("Zone 1", 1, sum(unique_species$zone == "Zone 1")) %>%
  pack_rows("Zone 2", sum(unique_species$zone == "Zone 1") + 1,
            sum(unique_species$zone %in% c("Zone 1", "Zone 2"))) %>%
  pack_rows("Zone 3", sum(unique_species$zone %in% c("Zone 1", "Zone 2")) + 1,
            nrow(unique_species))

Table 2. Species recorded exclusively in a single habitat zone, ordered by total abundance within zone.
Zone	Species	Phylum	Total Abundance
Zone 1
Zone 1	Microdeutopus damnoniensis	Arthropoda	1
Zone 1	Scalibregma inflatum	Annelida	1
Zone 2
Zone 2	Mediomastus sp.1	Annelida	4
Zone 2	Dorvilleia sp.1	Annelida	2
Zone 2	Kurtiella bidentata	Mollusca	2
Zone 2	Urothoe marina	Arthropoda	2
Zone 2	Gari Tellinella	Mollusca	1
Zone 2	Golfingia sp.1	Annelida	1
Zone 2	Hediste diversicolor	Annelida	1
Zone 2	Heteroclymene sp.1	Annelida	1
Zone 2	Lucinoma borealis	Mollusca	1
Zone 2	Maldanidae sp.1	Annelida	1
Zone 2	Nothria conchylega	Annelida	1
Zone 2	Syllidae sp.1	Annelida	1
Zone 3
Zone 3	Microdeutopus versiculatus	Arthropoda	8
Zone 3	Platynereis dumerilii	Annelida	5
Zone 3	Dexaminidae spinosa	Arthropoda	4
Zone 3	Socarnes erythropthalmus	Arthropoda	4
Zone 3	Eurysyllis tuberculata	Annelida	3
Zone 3	Leptocheirus pilosus	Arthropoda	3
Zone 3	Achelia echinata	Arthropoda	2
Zone 3	Autolytus sp.1	Annelida	2
Zone 3	Gooddallia triangularis	Mollusca	2
Zone 3	Harmothoe sp.2	Annelida	2
Zone 3	Leptochiton cancellatus	Mollusca	2
Zone 3	Metaphoxus pectinatus	Arthropoda	2
Zone 3	Pariambus typicus	Arthropoda	2
Zone 3	Tanaidacea sp.3	Arthropoda	2
Zone 3	Terebellidae sp.2	Annelida	2
Zone 3	Aplysia punctata	Mollusca	1
Zone 3	Chondrochelia savignyi	Arthropoda	1
Zone 3	Dexaminidae thea	Arthropoda	1
Zone 3	Eteone flava	Annelida	1
Zone 3	Fabricia sp.1	Annelida	1
Zone 3	Harpinia crenulata	Arthropoda	1
Zone 3	Maldanidae sp.2	Annelida	1
Zone 3	Modiolus modiolus	Mollusca	1
Zone 3	Nephtys homburgi	Annelida	1
Zone 3	Nicomache sp.1	Annelida	1
Zone 3	Odontosyllis gibba	Annelida	1
Zone 3	Sabellidae sp.2	Annelida	1
Zone 3	Tellinidae sp.1	Mollusca	1
Zone 3	Tricolia pullus	Mollusca	1

Results: Zone 3 (control) contained the greatest number of zone-specific species, consistent with its higher overall species richness.

4.2.4 Venn Diagram of Shared Species

Show code

z1_species <- species_list %>% filter(zone == "Zone 1", total > 0) %>%
  pull(species) %>% as.character()
z2_species <- species_list %>% filter(zone == "Zone 2", total > 0) %>%
  pull(species) %>% as.character()
z3_species <- species_list %>% filter(zone == "Zone 3", total > 0) %>%
  pull(species) %>% as.character()

ggVennDiagram(
  list("Zone 1" = z1_species, "Zone 2" = z2_species, "Zone 3" = z3_species),
  label_alpha = 0
) +
  scale_fill_gradient(low = "white", high = "#2166AC") +
  labs(fill = "Species Count") +
  theme(legend.title = element_text(face = "bold", size = 10))

Figure 6: Venn diagram showing the number of species shared between and unique to each habitat zone. Numbers in overlapping regions indicate shared species; numbers in non-overlapping regions indicate zone-specific species.

4.3 Total Abundance per Core

Total abundance was calculated as the sum of all individuals per core sample. The Kruskal-Wallis test was applied to per-sample totals (n = 9) as the correct unit of replication.

Show code

kw_abund_sample <- kruskal.test(total_abundance ~ zone, data = sample_abundance)

dunn_abund_sample <- dunn.test(sample_abundance$total_abundance,
                                sample_abundance$zone,
                                method = "bonferroni")

  Kruskal-Wallis rank sum test

data: x and group
Kruskal-Wallis chi-squared = 6.8796, df = 2, p-value = 0.03

                   Dunn's Pairwise Comparison of x by group                   
                                 (Bonferroni)                                 
Col Mean-│
Row Mean │         Z1         Z2
─────────┼──────────────────────
      Z2 │  -1.197569
         │     0.3466 
         │
      Z3 │  -2.619684  -1.422114
         │     0.0132*    0.2325 

α = 0.05
Reject Ho if p ≤ α/2, where p = Pr(Z ≥ |z|)

Show code

sig_labels <- data.frame(
  zone  = c("Z1", "Z2", "Z3"),
  label = c("a", "ab", "b"),
  y_pos = rep(max(sample_abundance$total_abundance) * 1.1, 3)
)

ggplot(sample_abundance, aes(x = zone, y = total_abundance, fill = zone)) +
  geom_boxplot(outlier.shape = 19, outlier.size = 1.5) +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  geom_text(data = sig_labels,
            aes(x = zone, y = y_pos, label = label),
            inherit.aes = FALSE, size = 5, fontface = "bold") +
  scale_fill_manual(values = zone_colours_z) +
  scale_x_discrete(labels = c("Z1" = "Zone 1", "Z2" = "Zone 2", "Z3" = "Zone 3")) +
  labs(x = "Zone", y = "Total Abundance per Core") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11),
        plot.margin = margin(t = 20, r = 10, b = 10, l = 10))

Figure 13: Total abundance of all infaunal individuals per core sample across zones (n = 3 per zone). Each dot represents one core. Letters indicate statistically homogeneous groups based on Dunn’s post-hoc test with Bonferroni correction (a = Zone 1; ab = Zone 2; b = Zone 3).

Results: A Kruskal-Wallis test revealed a significant difference in total abundance across zones (H = 6.88, df = 2, p = 0.032). Zone 3 differed significantly from Zone 1 (p = 0.013). Zone 2 was not significantly different from either Zone 1 (p = 0.347) or Zone 3 (p = 0.233).

4.4 Generalised Linear Models: Abundance and Species Richness

A Poisson GLM was initially fitted and assessed for overdispersion. Significant overdispersion was detected; a Negative Binomial GLM was therefore fitted.

Show code

sample_div <- total_div %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

glm_poisson <- glm(total_abundance ~ zone, data = sample_div, family = poisson)
simulateResiduals(glm_poisson, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.9829763 0.04791643 0.4901769 0.03824786 0.3542827 0.9817204 0.9503528 0.7571552 0.006356984

Show code

testDispersion(glm_poisson)


    DHARMa nonparametric dispersion test via sd of residuals fitted vs.
    simulated

data:  simulationOutput
dispersion = 3.3357, p-value = 0.016
alternative hypothesis: two.sided

Results: A Poisson GLM confirmed overdispersion (dispersion test: p = 0.016), confirming violation of the Poisson equal mean-variance assumption. A Negative Binomial GLM was therefore fitted.

4.4.1 GLM 1: Total Abundance by Zone

Show code

glm_abund <- glm.nb(total_abundance ~ zone, data = sample_div)
summary(glm_abund)


Call:
glm.nb(formula = total_abundance ~ zone, data = sample_div, init.theta = 29.30877834, 
    link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   4.6852     0.1202  38.976  < 2e-16 ***
zoneZ1       -1.7063     0.2068  -8.250  < 2e-16 ***
zoneZ2       -1.0302     0.1856  -5.551 2.84e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(29.3088) family taken to be 1)

    Null deviance: 89.720  on 8  degrees of freedom
Residual deviance:  9.585  on 6  degrees of freedom
AIC: 76.451

Number of Fisher Scoring iterations: 1

              Theta:  29.3 
          Std. Err.:  24.2 

 2 x log-likelihood:  -68.451

Show code

simulateResiduals(glm_abund, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.8972407 0.07831351 0.4332401 0.08440482 0.3987502 0.9058376 0.8257076 0.6615099 0.123393

Show code

pred_abund <- ggpredict(glm_abund, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_abund, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Total Abundance") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 9: Model-predicted total abundance per zone with 95% confidence intervals from a Negative Binomial GLM. Zone 3 is the reference category.

Results: Zone 1 had significantly lower abundance than Zone 3 (β = -1.706, z = -8.250, p < 0.001), corresponding to approximately 19 individuals per core compared to 108 in Zone 3. Zone 2 also had significantly lower abundance (β = -1.030, z = -5.551, p < 0.001), at approximately 36% of Zone 3. DHARMa diagnostics confirmed adequate model fit (KS p = 0.749; dispersion p = 0.752; outlier p = 1.000).

4.4.2 GLM 2: Species Richness by Zone

Show code

glm_rich <- glm.nb(total_sp_richness ~ zone, data = sample_div)
summary(glm_rich)


Call:
glm.nb(formula = total_sp_richness ~ zone, data = sample_div, 
    init.theta = 180.0094501, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)   3.3673     0.1155  29.148  < 2e-16 ***
zoneZ1       -1.1337     0.2256  -5.024 5.05e-07 ***
zoneZ2       -0.6373     0.1922  -3.316 0.000914 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(180.0095) family taken to be 1)

    Null deviance: 38.4836  on 8  degrees of freedom
Residual deviance:  8.0991  on 6  degrees of freedom
AIC: 58.327

Number of Fisher Scoring iterations: 1

              Theta:  180 
          Std. Err.:  855 

 2 x log-likelihood:  -50.327

Show code

simulateResiduals(glm_rich, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.7017479 0.2649475 0.6477857 0.3480546 0.156752 0.9545328 0.6808527 0.855776 0.05934819

Show code

pred_rich <- ggpredict(glm_rich, terms = "zone") %>%
  as.data.frame() %>%
  mutate(x = factor(x, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")))

ggplot(pred_rich, aes(x = x, y = predicted, colour = x)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0.2, linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  labs(x = "Zone", y = "Predicted Species Richness") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 10: Model-predicted species richness per zone with 95% confidence intervals from a Negative Binomial GLM. Zone 3 is the reference category.

Results: Zone 3 had a predicted mean richness of 29 species per core. Zone 1 had significantly lower richness (β = -1.134, z = -5.024, p < 0.001), at approximately 9 species per core. Zone 2 also had significantly lower richness (β = -0.637, z = -3.316, p < 0.001), at approximately 15 species per core. DHARMa diagnostics confirmed adequate model fit (KS p = 0.782; dispersion p = 0.584; outlier p = 1.000).

Show code

glm_table <- data.frame(
  Response  = c("Total abundance", "", "",
                "Species richness", "", ""),
  Predictor = c("Intercept (Z3)", "Zone 1", "Zone 2",
                "Intercept (Z3)", "Zone 1", "Zone 2"),
  beta      = c(4.685, -1.706, -1.030, 3.367, -1.134, -0.637),
  SE        = c(0.120, 0.207, 0.186, 0.116, 0.226, 0.192),
  z         = c(38.976, -8.250, -5.551, 29.148, -5.024, -3.316),
  p         = c("< 0.001", "< 0.001", "< 0.001",
                "< 0.001", "< 0.001", "0.001")
)

4.5 Diversity Indices

4.5.1 Shannon Diversity Index: Species Level

Show code

abund <- dplyr::select(data, contains("INFA"))
shannon <- diversity(t(as.matrix(abund)), index = "shannon")

shannon_df <- data.frame(Sample = colnames(abund), Shannon = shannon) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_shannon <- kruskal.test(Shannon ~ Zone, data = shannon_df)

ggplot(shannon_df, aes(x = Zone, y = Shannon, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(shannon_df$Shannon) * 1.05,
           label = paste0("ns (p = ", round(kw_shannon$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Shannon Index (H') — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 11: Shannon–Wiener diversity index (H’) at species level per zone (n = 3 per zone). ns = not significant (p = 0.39). Each dot represents one core.

Results: No significant difference in Shannon diversity at species level across zones (H = 1.87, df = 2, p = 0.39).

4.5.2 Simpson Diversity Index: Species Level

Show code

simpson <- diversity(t(as.matrix(abund)), index = "simpson")

simpson_df <- data.frame(Sample = colnames(abund), Simpson = simpson) %>%
  mutate(Zone = sub("INFA_(Z\\d+)_.*", "\\1", Sample),
         Zone = factor(Zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

kw_simpson <- kruskal.test(Simpson ~ Zone, data = simpson_df)

ggplot(simpson_df, aes(x = Zone, y = Simpson, fill = Zone)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, size = 3, alpha = 0.7) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = 2, y = max(simpson_df$Simpson) * 1.05,
           label = paste0("ns (p = ", round(kw_simpson$p.value, 2), ")"),
           size = 3.5, fontface = "italic") +
  labs(x = "Zone", y = "Simpson Index (1-D) — Species Level") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 12: Simpson diversity index (1-D) at species level per zone (n = 3 per zone). ns = not significant (p = 0.56). Each dot represents one core.

Results: No significant difference in Simpson diversity at species level across zones (H = 1.16, df = 2, p = 0.56). Community evenness was comparable across all three zones.

4.6 Community Composition: PERMANOVA and nMDS

A PERMANOVA (adonis2, vegan package) was used to test whether species composition differed significantly across habitat zones, using Bray-Curtis dissimilarity with 999 permutations. Homogeneity of multivariate dispersion was verified using betadisper prior to interpreting the PERMANOVA result.

Show code

perm_matrix <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

zone_vector <- perm_matrix$zone
species_mat <- perm_matrix %>%
  dplyr::select(-sample, -zone) %>%
  as.matrix()

permanova <- adonis2(species_mat ~ zone_vector, method = "bray", permutations = 999)
print(permanova)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = species_mat ~ zone_vector, permutations = 999, method = "bray")
         Df SumOfSqs      R2      F Pr(>F)   
Model     2  0.83489 0.37512 1.8009   0.01 **
Residual  6  1.39079 0.62488                 
Total     8  2.22568 1.00000                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show code

dist_matrix <- vegdist(species_mat, method = "bray")
dispersion  <- betadisper(dist_matrix, zone_vector)
permutest(dispersion)


Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)
Groups     2 0.010412 0.0052057 1.6236    999  0.263
Residuals  6 0.019238 0.0032063

Show code

permanova_table <- data.frame(
  Test = c("PERMANOVA", "betadisper"),
  F    = c(1.80, 1.62),
  R2   = c(0.375, "—"),
  p    = c("0.009 *", "0.287 ns")
)

kable(permanova_table,
      col.names = c("Test", "F", "R²", "p-value"),
      caption = "Table 1. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE)

Table 1. PERMANOVA and betadisper results for infaunal species composition across habitat zones. PERMANOVA based on Bray-Curtis dissimilarity with 999 permutations.
Test	F	R²	p-value
PERMANOVA	1.80	0.375	0.009 *
betadisper	1.62	—	0.287 ns

Results: PERMANOVA revealed that species composition differed significantly across habitat zones (F = 1.80, R² = 0.375, p = 0.009). Zone explains 37.5% of the variation in species composition. Homogeneity of multivariate dispersion was confirmed (p = 0.287), indicating that the PERMANOVA result reflects genuine compositional differences between zones.

4.6.1 nMDS Ordination

Show code

set.seed(123)
nmds <- metaMDS(species_mat, distance = "bray", k = 2, trymax = 100)

Wisconsin double standardization
Run 0 stress 0.1296653 
Run 1 stress 0.1296653 
... Procrustes: rmse 2.57544e-05  max resid 5.062835e-05 
... Similar to previous best
Run 2 stress 0.1296653 
... Procrustes: rmse 2.046149e-06  max resid 3.662313e-06 
... Similar to previous best
Run 3 stress 0.1296654 
... Procrustes: rmse 5.769671e-05  max resid 0.0001188607 
... Similar to previous best
Run 4 stress 0.1296653 
... Procrustes: rmse 9.324728e-06  max resid 1.922477e-05 
... Similar to previous best
Run 5 stress 0.1296653 
... Procrustes: rmse 5.86126e-06  max resid 9.805432e-06 
... Similar to previous best
Run 6 stress 0.1783099 
Run 7 stress 0.1577492 
Run 8 stress 0.1296653 
... Procrustes: rmse 1.137076e-06  max resid 2.109404e-06 
... Similar to previous best
Run 9 stress 0.1822321 
Run 10 stress 0.1296653 
... Procrustes: rmse 3.072587e-06  max resid 6.178088e-06 
... Similar to previous best
Run 11 stress 0.1662196 
Run 12 stress 0.1577492 
Run 13 stress 0.1789585 
Run 14 stress 0.1662196 
Run 15 stress 0.2429611 
Run 16 stress 0.1296653 
... Procrustes: rmse 1.055402e-06  max resid 1.571138e-06 
... Similar to previous best
Run 17 stress 0.1710196 
Run 18 stress 0.1296653 
... Procrustes: rmse 7.435892e-06  max resid 1.457155e-05 
... Similar to previous best
Run 19 stress 0.3209238 
Run 20 stress 0.1296653 
... Procrustes: rmse 1.338029e-06  max resid 2.291495e-06 
... Similar to previous best
*** Best solution repeated 10 times

Show code

nmds_scores <- as.data.frame(scores(nmds, display = "sites")) %>%
  mutate(zone = zone_vector,
         zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

centroids <- nmds_scores %>%
  group_by(zone) %>%
  summarise(NMDS1 = mean(NMDS1), NMDS2 = mean(NMDS2))

ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_point(size = 4, alpha = 0.8) +
  stat_ellipse(aes(group = zone), type = "t", level = 0.95,
               linetype = "dashed", linewidth = 0.8, alpha = 0.3,
               geom = "polygon", fill = NA) +
  geom_label(data = centroids, aes(label = zone),
             fontface = "bold", size = 3.5, alpha = 0.8) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Show code

ggplot(nmds_scores, aes(x = NMDS1, y = NMDS2, colour = zone, fill = zone)) +
  geom_mark_ellipse(aes(group = zone),
                    alpha = 0.15,
                    linewidth = 0.8,
                    con.cap = 0) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text(data = centroids,
            aes(x = NMDS1, y = NMDS2, label = zone, colour = zone),
            fontface = "bold", size = 3.5,
            inherit.aes = FALSE) +
  scale_colour_manual(values = zone_colours) +
  scale_fill_manual(values = zone_colours) +
  xlim(-0.9, 0.6) +
  ylim(-0.6, 0.5) +
  annotate("text", x = Inf, y = -Inf, hjust = 1.1, vjust = -0.5,
           label = paste0("Stress = ", round(nmds$stress, 3)),
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "nMDS1", y = "nMDS2") +
  theme_classic() +
  theme(legend.position = "none",
        axis.text  = element_text(size = 10, colour = "black"),
        axis.title = element_text(size = 11))

Figure 7: Non-metric multidimensional scaling (nMDS) ordination of infaunal community composition across habitat zones based on Bray-Curtis dissimilarity (stress = 0.13). Each point represents one core sample (n = 3 per zone). Ellipses are drawn around zone groupings for visualisation purposes only and do not represent statistical confidence regions given the small sample size (n = 3 per zone).

4.7 Phylum Composition: Bias-Reduced Multinomial Logistic Regression

A bias-reduced multinomial logistic regression (brmultinom, brglm2 package) was fitted with phylum composition as the response variable and habitat zone as the predictor. Zone 3 (control) was set as the reference category. A likelihood ratio test (LRT) was used to assess overall model significance.

Show code

mlr_data <- data %>%
  pivot_longer(cols = starts_with("INFA"),
               names_to = "sample", values_to = "abundance") %>%
  mutate(zone = str_extract(sample, "Z[0-9]")) %>%
  group_by(sample, zone, phylum) %>%
  summarise(count = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = phylum, values_from = count, values_fill = 0) %>%
  mutate(zone = factor(zone, levels = c("Z3", "Z1", "Z2")))

response_matrix <- mlr_data %>%
  dplyr::select(Annelida, Arthropoda, Mollusca, Echinodermata) %>%
  as.matrix()

mlr_model <- brmultinom(response_matrix ~ zone, data = mlr_data, maxit = 999)
summary(mlr_model)

Call:
brmultinom(formula = response_matrix ~ zone, data = mlr_data, 
    maxit = 999)

Coefficients:
              (Intercept)     zoneZ1      zoneZ2
Arthropoda     -0.7196229 -1.2262873 -1.66865696
Mollusca       -3.0981939 -0.3140535 -0.03730036
Echinodermata  -3.8454083  2.0425990 -1.48731028

Std. Errors:
              (Intercept)    zoneZ1    zoneZ2
Arthropoda      0.1208138 0.4431481 0.3626326
Mollusca        0.3327022 0.9070168 0.5887044
Echinodermata   0.4778803 0.6236551 1.5075411

Residual Deviance: 99.24516 
Log-likelihood: -49.62258 
AIC: 117.2452

Type of estimator: AS_mixed (mixed bias-reducing adjusted score equations)
Number of Fisher Scoring iterations: 4

Show code

mlr_null   <- brmultinom(response_matrix ~ 1, data = mlr_data, maxit = 999)
lrt_result <- lrtest(mlr_null, mlr_model)
print(lrt_result)

Likelihood ratio test

Model 1: response_matrix ~ 1
Model 2: response_matrix ~ zone
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   3 -77.144                         
2   9 -49.623  6 55.044  4.543e-10 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Results: The likelihood ratio test confirmed that habitat zone significantly predicted phylum composition (χ² = 55.04, df = 6, p < 0.001). Relative to Zone 3, Arthropoda were less prevalent in both Zone 1 (β = -1.23) and Zone 2 (β = -1.67). Echinodermata showed higher relative abundance in Zone 1 (β = 2.04). Annelida increased in predicted probability from Zone 3 to Zone 2, indicating increasing polychaete dominance in the restoration zones.

4.7.1 Model Predictions: Phylum Composition by Zone

Show code

preds  <- ggpredict(mlr_model, terms = "zone")
pred_df <- as.data.frame(preds) %>%
  rename(zone = x, phylum = response.level, probability = predicted) %>%
  mutate(
    zone   = factor(zone, levels = c("Z3", "Z1", "Z2"),
                    labels = c("Zone 3", "Zone 1", "Zone 2")),
    phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
                                        "Mollusca", "Echinodermata"))
  )

pred_df$x_num <- as.numeric(pred_df$zone)

ggplot(pred_df, aes(x = x_num, y = probability,
                     colour = phylum, group = phylum)) +
  geom_smooth(method = "loess", se = FALSE, linewidth = 1.2, span = 1.5) +
  geom_point(size = 3) +
  scale_x_continuous(breaks = 1:3,
                     labels = c("Zone 3", "Zone 1", "Zone 2")) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                     limits = c(0, 1)) +
  scale_colour_manual(
    values = phylum_colours,
    labels = c("Annelida", "Arthropoda", "Mollusca", "Echinodermata")
  ) +
  labs(x = "Habitat Zone",
       y = "Predicted Probability",
       colour = "Phylum",
       caption = "brmultinom; LRT: χ² = 55.04, df = 6, p < 0.001. Zone 3 = reference category.") +
  theme_bw() +
  theme(axis.text        = element_text(size = 10, colour = "black"),
        axis.title       = element_text(size = 11),
        legend.title     = element_text(face = "bold", size = 10),
        legend.text      = element_text(face = "italic", size = 9),
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        plot.caption     = element_text(size = 8, hjust = 0, colour = "grey40"))

`geom_smooth()` using formula = 'y ~ x'

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.

Figure 8: Model-predicted probabilities of each phylum’s relative abundance across habitat zones from a bias-reduced multinomial logistic regression. Zone 3 is the reference category.

5 Research Question 2: Rapid Biodiversity Assessment

Can amphipod or polychaete diversity predict total infaunal biodiversity? To avoid autocorrelation, total diversity metrics were calculated excluding the focal predictor group.

5.1 Data Preparation

Show code

# amphipod diversity per sample
amph_wide <- sample_long %>%
  dplyr::filter(order == "Amphipoda") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

amph_mat <- amph_wide %>% dplyr::select(-sample) %>% as.matrix()

amphipod_div <- amph_wide %>%
  mutate(amph_richness = specnumber(amph_mat),
         amph_shannon  = diversity(amph_mat, index = "shannon")) %>%
  dplyr::select(sample, amph_richness, amph_shannon)

# polychaete diversity per sample
poly_wide <- sample_long %>%
  dplyr::filter(order == "Polychaeta") %>%
  group_by(sample, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

poly_mat <- poly_wide %>% dplyr::select(-sample) %>% as.matrix()

poly_div <- poly_wide %>%
  mutate(poly_richness = specnumber(poly_mat),
         poly_shannon  = diversity(poly_mat, index = "shannon")) %>%
  dplyr::select(sample, poly_richness, poly_shannon)

# total diversity EXCLUDING amphipods
excl_amph_wide <- sample_long %>%
  dplyr::filter(order != "Amphipoda") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_amph_mat <- excl_amph_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_amph <- excl_amph_wide %>%
  mutate(total_sp_richness_excl_amph = specnumber(excl_amph_mat),
         total_shannon_excl_amph     = diversity(excl_amph_mat, index = "shannon")) %>%
  dplyr::select(sample, zone, total_sp_richness_excl_amph, total_shannon_excl_amph)

# total diversity EXCLUDING polychaetes
excl_poly_wide <- sample_long %>%
  dplyr::filter(order != "Polychaeta") %>%
  group_by(sample, zone, species) %>%
  summarise(abundance = sum(abundance), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = abundance, values_fill = 0)

excl_poly_mat <- excl_poly_wide %>% dplyr::select(-sample, -zone) %>% as.matrix()

total_excl_poly <- excl_poly_wide %>%
  mutate(total_sp_richness_excl_poly = specnumber(excl_poly_mat),
         total_shannon_excl_poly     = diversity(excl_poly_mat, index = "shannon")) %>%
  dplyr::select(sample, total_sp_richness_excl_poly, total_shannon_excl_poly)

# join into single dataframe
plot_df <- total_excl_amph %>%
  left_join(amphipod_div,    by = "sample") %>%
  left_join(poly_div,        by = "sample") %>%
  left_join(total_excl_poly, by = "sample") %>%
  mutate(zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
                       labels = c("Zone 1", "Zone 2", "Zone 3")))

5.2 Regression Plots

Show code

p1 <- ggplot(plot_df, aes(x = amph_richness,
                           y = total_sp_richness_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.825, p = 0.001", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Species Richness",
       y = "Total Species Richness (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p2 <- ggplot(plot_df, aes(x = amph_shannon,
                           y = total_shannon_excl_amph, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.678, p = 0.006", size = 3.5, fontface = "italic") +
  labs(x = "Amphipod Shannon Diversity",
       y = "Total Shannon Diversity (excl. amph.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p3 <- ggplot(plot_df, aes(x = poly_richness,
                           y = total_sp_richness_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.499, p = 0.033", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Species Richness",
       y = "Total Species Richness (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

p4 <- ggplot(plot_df, aes(x = poly_shannon,
                           y = total_shannon_excl_poly, colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "R² = 0.611, p = 0.013", size = 3.5, fontface = "italic") +
  labs(x = "Polychaete Shannon Diversity",
       y = "Total Shannon Diversity (excl. poly.)", colour = "Zone") +
  theme_classic() + theme(legend.position = "none")

(p1 + p2) / (p3 + p4) +
  plot_annotation(
    title = "Amphipod and Polychaete Diversity as Predictors of Total Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12))
  ) +
  plot_layout(guides = "collect") &
  scale_colour_manual(values = zone_colours) &
  theme(legend.position = "right")

Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 14: Linear regression plots showing amphipod (top row) and polychaete (bottom row) diversity as predictors of total infaunal diversity. Total diversity metrics exclude the focal predictor group to avoid autocorrelation. Points are coloured by zone. R² and p-values shown in top right of each panel.

5.3 Statistical Tests

Show code

lm_amph_rich3 <- lm(total_sp_richness_excl_amph ~ amph_richness, data = plot_df)
lm_amph_shan3 <- lm(total_shannon_excl_amph      ~ amph_shannon,  data = plot_df)
lm_poly_rich3 <- lm(total_sp_richness_excl_poly  ~ poly_richness, data = plot_df)
lm_poly_shan3 <- lm(total_shannon_excl_poly       ~ poly_shannon,  data = plot_df)

cor_amph_rich_k <- cor.test(plot_df$amph_richness, plot_df$total_sp_richness_excl_amph, method = "kendall")

Warning in cor.test.default(plot_df$amph_richness,
plot_df$total_sp_richness_excl_amph, : Cannot compute exact p-value with ties

Show code

cor_amph_shan_k <- cor.test(plot_df$amph_shannon,  plot_df$total_shannon_excl_amph,     method = "kendall")

Warning in cor.test.default(plot_df$amph_shannon,
plot_df$total_shannon_excl_amph, : Cannot compute exact p-value with ties

Show code

cor_poly_rich_k <- cor.test(plot_df$poly_richness, plot_df$total_sp_richness_excl_poly, method = "kendall")

Warning in cor.test.default(plot_df$poly_richness,
plot_df$total_sp_richness_excl_poly, : Cannot compute exact p-value with ties

Show code

cor_poly_shan_k <- cor.test(plot_df$poly_shannon,  plot_df$total_shannon_excl_poly,     method = "kendall")

combined_table <- data.frame(
  Relationship = c(
    "Amphipod richness → Total richness (excl. amph.)",
    "Polychaete richness → Total richness (excl. poly.)",
    "Amphipod Shannon → Total Shannon (excl. amph.)",
    "Polychaete Shannon → Total Shannon (excl. poly.)"
  ),
  R2        = c(get_lm_r2(lm_amph_rich3), get_lm_r2(lm_poly_rich3),
                get_lm_r2(lm_amph_shan3), get_lm_r2(lm_poly_shan3)),
  LM_p      = round(c(get_lm_p(lm_amph_rich3), get_lm_p(lm_poly_rich3),
                       get_lm_p(lm_amph_shan3), get_lm_p(lm_poly_shan3)), 3),
  Kendall_t = round(c(cor_amph_rich_k$estimate, cor_poly_rich_k$estimate,
                       cor_amph_shan_k$estimate, cor_poly_shan_k$estimate), 3),
  Kendall_p = round(c(cor_amph_rich_k$p.value,  cor_poly_rich_k$p.value,
                       cor_amph_shan_k$p.value,  cor_poly_shan_k$p.value),  3),
  Conclusion = c("Significant", "Partially significant", "Significant", "Significant")
)

kable(combined_table,
      col.names = c("Relationship", "R²", "LM p", "Kendall τ", "Kendall p", "Conclusion"),
      caption = "Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  column_spec(1, bold = TRUE) %>%
  pack_rows("Species Richness", 1, 2) %>%
  pack_rows("Shannon Diversity", 3, 4)

Table 4. Amphipod and polychaete diversity as predictors of total infaunal diversity. Total diversity excludes the focal predictor group in each model.
Relationship	R²	LM p	Kendall τ	Kendall p	Conclusion
Species Richness
Amphipod richness → Total richness (excl. amph.)	0.825	0.001	0.647	0.019	Significant
Polychaete richness → Total richness (excl. poly.)	0.499	0.033	0.493	0.072	Partially significant
Shannon Diversity
Amphipod Shannon → Total Shannon (excl. amph.)	0.678	0.006	0.667	0.014	Significant
Polychaete Shannon → Total Shannon (excl. poly.)	0.611	0.013	0.833	0.001	Significant

Results: Amphipod species richness was a significant predictor of total infaunal species richness (R² = 0.825, p = 0.001), confirmed by Kendall’s rank correlation (τ = 0.647, p = 0.019). Amphipod Shannon diversity was also a significant predictor of total Shannon diversity (R² = 0.678, p = 0.006). Polychaete richness and Shannon diversity were also significant predictors, though with consistently weaker relationships than amphipods across all metrics.

6 Sampling Bias: Core Penetration Depth

Core penetration depth was assessed as a potential source of sampling bias. One sample (INFA_Z1_3A) had insufficient penetration recorded as “N/A (<10 mm)” and was assigned a depth of 10 mm based on the recorded core volume.

Show code

lm_depth_rich  <- lm(total_sp_richness ~ core_depth, data = div_depth)
lm_depth_shan  <- lm(total_shannon     ~ core_depth, data = div_depth)
lm_depth_abund <- lm(total_abundance   ~ core_depth, data = div_depth)

cor_depth_rich_k  <- cor.test(div_depth$core_depth, div_depth$total_sp_richness, method = "kendall")

Warning in cor.test.default(div_depth$core_depth, div_depth$total_sp_richness,
: Cannot compute exact p-value with ties

Show code

cor_depth_shan_k  <- cor.test(div_depth$core_depth, div_depth$total_shannon,     method = "kendall")
cor_depth_abund_k <- cor.test(div_depth$core_depth, div_depth$total_abundance,   method = "kendall")

Warning in cor.test.default(div_depth$core_depth, div_depth$total_abundance, :
Cannot compute exact p-value with ties

Show code

p_depth1 <- ggplot(div_depth, aes(x = core_depth, y = total_sp_richness,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.010, p = 0.798)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Species Richness",
       colour = "Zone") +
  theme_classic()

p_depth2 <- ggplot(div_depth, aes(x = core_depth, y = total_shannon,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² = 0.022, p = 0.705)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Shannon Diversity",
       colour = "Zone") +
  theme_classic()

p_depth3 <- ggplot(div_depth, aes(x = core_depth, y = total_abundance,
                                   colour = zone)) +
  geom_point(size = 3) +
  geom_smooth(method = "lm", se = TRUE, colour = "black", linewidth = 0.8) +
  scale_colour_manual(values = zone_colours_z,
                      labels = c("Zone 1", "Zone 2", "Zone 3")) +
  annotate("text", x = Inf, y = Inf, hjust = 1.1, vjust = 1.5,
           label = "ns (R² < 0.001, p = 0.982)",
           size = 3.5, fontface = "italic", colour = "grey40") +
  labs(x = "Core Penetration Depth (mm)", y = "Total Abundance",
       colour = "Zone") +
  theme_classic()

(p_depth1 + p_depth2) / (p_depth3 + plot_spacer()) +
  plot_annotation(
    title = "Core Penetration Depth as a Predictor of Infaunal Diversity",
    theme = theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 13))
  ) +
  plot_layout(guides = "collect") &
  theme(legend.position = "right")

`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Figure 16: Linear regression plots of core penetration depth against total species richness, Shannon–Wiener diversity and total abundance. All relationships were non-significant (ns).

Results: No significant relationship was detected between core penetration depth and total species richness (R² = 0.010, p = 0.798), Shannon diversity (R² = 0.022, p = 0.705) or total abundance (R² < 0.001, p = 0.982). Variation in core penetration depth did not constitute a significant source of sampling bias.

7 Summary of Statistical Results

Show code

summary_table <- data.frame(
  Test = c(
    "Shapiro-Wilk (Z1)", "Shapiro-Wilk (Z2)", "Shapiro-Wilk (Z3)",
    "Kruskal-Wallis: Total abundance",
    "Dunn's post-hoc: Z1 vs Z2",
    "Dunn's post-hoc: Z1 vs Z3",
    "Dunn's post-hoc: Z2 vs Z3",
    "GLM (NB): Total abundance ~ zone",
    "GLM (NB): Species richness ~ zone",
    "Kruskal-Wallis: Shannon diversity (H') — species level",
    "Kruskal-Wallis: Simpson diversity (1-D) — species level",
    "PERMANOVA: Species composition",
    "betadisper: Dispersion homogeneity",
    "brmultinom + LRT: Phylum composition",
    "Core depth → Species richness",
    "Core depth → Shannon diversity",
    "Core depth → Total abundance"
  ),
  Result = c(
    "W = 0.385, p < 0.001",
    "W = 0.447, p < 0.001",
    "W = 0.540, p < 0.001",
    "H = 6.88, df = 2, p = 0.032",
    "p = 0.347 (ns)",
    "p = 0.013",
    "p = 0.233 (ns)",
    "Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001",
    "Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001",
    "H = 1.87, df = 2, p = 0.39 (ns)",
    "H = 1.16, df = 2, p = 0.56 (ns)",
    "F = 1.80, R² = 0.375, p = 0.009",
    "p = 0.287 (ns — assumption met)",
    "χ² = 55.04, df = 6, p < 0.001",
    "R² = 0.010, p = 0.798 (ns)",
    "R² = 0.022, p = 0.705 (ns)",
    "R² < 0.001, p = 0.982 (ns)"
  ),
  Conclusion = c(
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Non-normal — non-parametric tests required",
    "Significant difference in abundance across zones",
    "Z1 and Z2 not significantly different",
    "Z3 significantly different from Z1",
    "Z2 not significantly different from Z3",
    "Both restoration zones have significantly lower abundance than control",
    "Both restoration zones have significantly lower richness than control",
    "No significant difference in Shannon diversity",
    "No significant difference in Simpson diversity",
    "Species composition differs significantly across zones",
    "Equal dispersion — PERMANOVA result is valid",
    "Phylum composition differs significantly across zones",
    "Core depth not a significant source of bias",
    "Core depth not a significant source of bias",
    "Core depth not a significant source of bias"
  )
)

kable(summary_table,
      col.names = c("Test", "Result", "Conclusion"),
      caption = "Table 7. Summary of all statistical tests performed.") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = TRUE) %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(0, bold = TRUE)

Table 7. Summary of all statistical tests performed.
Test	Result	Conclusion
Shapiro-Wilk (Z1)	W = 0.385, p < 0.001	Non-normal — non-parametric tests required
Shapiro-Wilk (Z2)	W = 0.447, p < 0.001	Non-normal — non-parametric tests required
Shapiro-Wilk (Z3)	W = 0.540, p < 0.001	Non-normal — non-parametric tests required
Kruskal-Wallis: Total abundance	H = 6.88, df = 2, p = 0.032	Significant difference in abundance across zones
Dunn's post-hoc: Z1 vs Z2	p = 0.347 (ns)	Z1 and Z2 not significantly different
Dunn's post-hoc: Z1 vs Z3	p = 0.013	Z3 significantly different from Z1
Dunn's post-hoc: Z2 vs Z3	p = 0.233 (ns)	Z2 not significantly different from Z3
GLM (NB): Total abundance ~ zone	Z1: β = -1.706, p < 0.001; Z2: β = -1.030, p < 0.001	Both restoration zones have significantly lower abundance than control
GLM (NB): Species richness ~ zone	Z1: β = -1.134, p < 0.001; Z2: β = -0.637, p < 0.001	Both restoration zones have significantly lower richness than control
Kruskal-Wallis: Shannon diversity (H') — species level	H = 1.87, df = 2, p = 0.39 (ns)	No significant difference in Shannon diversity
Kruskal-Wallis: Simpson diversity (1-D) — species level	H = 1.16, df = 2, p = 0.56 (ns)	No significant difference in Simpson diversity
PERMANOVA: Species composition	F = 1.80, R² = 0.375, p = 0.009	Species composition differs significantly across zones
betadisper: Dispersion homogeneity	p = 0.287 (ns — assumption met)	Equal dispersion — PERMANOVA result is valid
brmultinom + LRT: Phylum composition	χ² = 55.04, df = 6, p < 0.001	Phylum composition differs significantly across zones
Core depth → Species richness	R² = 0.010, p = 0.798 (ns)	Core depth not a significant source of bias
Core depth → Shannon diversity	R² = 0.022, p = 0.705 (ns)	Core depth not a significant source of bias
Core depth → Total abundance	R² < 0.001, p = 0.982 (ns)	Core depth not a significant source of bias