# load all packages
library(tidyverse)
library(ggplot2)
library(dplyr)
library(tidyr)
library(vegan)
library(knitr)
library(kableExtra)
library(dunn.test)
library(brglm2)
library(lmtest)
library(ggeffects)
library(patchwork)
library(MASS)
library(DHARMa)
library(ggVennDiagram)
library(conflicted)
conflicts_prefer(dplyr::select)
conflicts_prefer(dplyr::filter)
# import infauna data
data <- read_csv(
file = "infauna_copy(R sheet).csv",
locale = locale(encoding = "latin1")
)
# remove empty rows and columns
data <- data[rowSums(is.na(data)) != ncol(data),
colSums(is.na(data)) != nrow(data)]
# remove taxa too small to be retained on 1mm sieve
data <- data %>%
filter(!grepl("Oligochaeta|Nematoda sp.1|Nemertea sp.1", species))
# standardise taxonomic names
data <- data %>%
mutate(
species = case_when(
species == "aplysia punctata" ~ "Aplysia punctata",
species == "gooddallia triangularis" ~ "Gooddallia triangularis",
species == "heteroclymene sp.1" ~ "Heteroclymene sp.1",
species == "metaphoxus Fultoni" ~ "Metaphoxus fultoni",
species == "syllidae sp.1" ~ "Syllidae sp.1",
species == "Golfingia sp.1" ~ "Golfingia sp.1",
species == "Nothria conchylega " ~ "Nothria conchylega",
species == "Mediomastua sp.1" ~ "Mediomastus sp.1",
TRUE ~ species
),
genus = case_when(
genus == "aplysia" ~ "Aplysia",
genus == "gooddallia" ~ "Gooddallia",
genus == "heteroclymene" ~ "Heteroclymene",
genus == "urothoe" ~ "Urothoe",
genus == "Golfingia " ~ "Golfingia",
genus == "Spirobranchus " ~ "Spirobranchus",
genus == "Mediomastua" ~ "Mediomastus",
TRUE ~ genus
),
family = case_when(
family == "maldanidae" ~ "Maldanidae",
family == "tellinidae" ~ "Tellinidae",
family == "Serpulidae " ~ "Serpulidae",
family == "Golfingiidae " ~ "Golfingiidae",
family == "Eunicidea" ~ "Eunicidae",
family == "Nereidinae" ~ "Nereididae",
TRUE ~ family
),
order = case_when(
order == "amphipod" ~ "Amphipoda",
order == "Tanaid" ~ "Tanaidacea",
order == "ophiuroids" ~ "Ophiurida",
order == "gastropod" ~ "Gastropoda",
order == "pycnogonids" ~ "Pycnogonida",
TRUE ~ order
),
phylum = case_when(
phylum == "Anthropoda" ~ "Arthropoda",
TRUE ~ phylum
)
)
# zone colours — deliberately different from phylum colours
zone_colours <- c("Zone 1" = "#2166AC", "Zone 2" = "#F4A582", "Zone 3" = "#4DAC26")
# phylum colours
phylum_colours <- c(
"Annelida" = "#E8732A",
"Arthropoda" = "#9B59B6",
"Mollusca" = "#1ABC9C",
"Echinodermata" = "#89CFF0"
)
# zone colours for Z1/Z2/Z3 coding (used in plots before factor relabelling)
zone_colours_z <- c("Z1" = "#2166AC", "Z2" = "#F4A582", "Z3" = "#4DAC26")
# create long format data with zone and sample labels (9 samples)
sample_long <- data %>%
pivot_longer(
cols = starts_with("INFA"),
names_to = "sample",
values_to = "abundance"
) %>%
mutate(zone = str_extract(sample, "Z[0-9]"))
# per-sample abundance summary (n = 9)
sample_abundance <- sample_long %>%
group_by(sample, zone) %>%
summarise(total_abundance = sum(abundance), .groups = "drop")
# total diversity metrics per sample
total_div <- sample_long %>%
group_by(sample, zone, species) %>%
summarise(abundance = sum(abundance), .groups = "drop") %>%
pivot_wider(names_from = species, values_from = abundance, values_fill = 0) %>%
mutate(
total_sp_richness = specnumber(dplyr::select(., -sample, -zone)),
total_shannon = diversity(as.matrix(dplyr::select(., -sample, -zone)),
index = "shannon"),
total_abundance = rowSums(dplyr::select(., -sample, -zone))
) %>%
dplyr::select(sample, zone, total_sp_richness, total_shannon, total_abundance)
# zone-level species matrix for Shapiro-Wilk tests
zone_data <- data %>%
pivot_longer(cols = starts_with("INFA"),
names_to = "sample_type", values_to = "value") %>%
mutate(zone = case_when(
grepl("INFA_Z1", sample_type) ~ "zone1",
grepl("INFA_Z2", sample_type) ~ "zone2",
grepl("INFA_Z3", sample_type) ~ "zone3"
)) %>%
group_by(phylum, order, family, genus, species, zone) %>%
summarise(total_value = sum(value), .groups = "drop") %>%
pivot_wider(names_from = zone, values_from = total_value)
# per-sample phylum summary
sample_phylum <- data %>%
pivot_longer(cols = starts_with("INFA"),
names_to = "sample", values_to = "abundance") %>%
mutate(
zone = str_extract(sample, "Z[0-9]"),
replicate = str_extract(sample, "[0-9][A-Z]$"),
zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
labels = c("Zone 1", "Zone 2", "Zone 3")),
phylum = factor(phylum, levels = c("Annelida", "Arthropoda",
"Mollusca", "Echinodermata"))
) %>%
group_by(sample, zone, replicate, phylum) %>%
summarise(total = sum(abundance), .groups = "drop")
# species list per zone
species_list <- data %>%
pivot_longer(cols = starts_with("INFA"),
names_to = "sample", values_to = "abundance") %>%
mutate(zone = str_extract(sample, "Z[0-9]"),
zone = factor(zone, levels = c("Z1", "Z2", "Z3"),
labels = c("Zone 1", "Zone 2", "Zone 3"))) %>%
group_by(zone, species) %>%
summarise(total = sum(abundance), .groups = "drop") %>%
filter(total > 0) %>%
mutate(phylum = data$phylum[match(species, data$species)],
species = fct_reorder(species, total))
# phylum-level abundance for Shannon/Simpson phylum analyses
phylum_abund <- data %>%
pivot_longer(cols = starts_with("INFA"),
names_to = "sample", values_to = "abundance") %>%
group_by(sample, phylum) %>%
summarise(abundance = sum(abundance), .groups = "drop") %>%
pivot_wider(names_from = phylum, values_from = abundance, values_fill = 0)
phylum_matrix <- phylum_abund %>%
dplyr::select(-sample) %>%
as.matrix()
# import and clean core depth data
core <- read_csv("infauna_copy(cores).csv") %>%
dplyr::select("SiteID", "EstCoreDepthmm", "VolumeLtr", "species total", "abundance")
core <- core[rowSums(is.na(core)) != ncol(core),
colSums(is.na(core)) != nrow(core)]
core <- core[-11, ]
core_clean <- core %>%
dplyr::select(SiteID, EstCoreDepthmm) %>%
filter(!is.na(SiteID)) %>%
mutate(EstCoreDepthmm = case_when(
SiteID == "INFA_Z1_3A" ~ "10",
TRUE ~ EstCoreDepthmm
)) %>%
filter(!grepl("N/A", EstCoreDepthmm)) %>%
mutate(sample = SiteID, core_depth = as.numeric(EstCoreDepthmm)) %>%
dplyr::select(sample, core_depth)
div_depth <- total_div %>% left_join(core_clean, by = "sample")
# helper functions for regression tables
get_lm_p <- function(m) summary(m)$coefficients[2, 4]
get_lm_r2 <- function(m) round(summary(m)$r.squared, 3)
get_lm_f <- function(m) round(summary(m)$fstatistic[1], 2)