bomba

introduction

Results

firstly using the Using the whole community data, a single environmental parameter that best explains variance in alpha diversity over time was found to be Nitrate_Nitrate

# packages 
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.2.3
Warning: package 'ggplot2' was built under R version 4.2.3
Warning: package 'tibble' was built under R version 4.2.3
Warning: package 'tidyr' was built under R version 4.2.3
Warning: package 'readr' was built under R version 4.2.3
Warning: package 'purrr' was built under R version 4.2.3
Warning: package 'dplyr' was built under R version 4.2.3
Warning: package 'stringr' was built under R version 4.2.3
Warning: package 'forcats' was built under R version 4.2.3
Warning: package 'lubridate' was built under R version 4.2.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
Loading required package: permute
Warning: package 'permute' was built under R version 4.2.3
Loading required package: lattice
This is vegan 2.6-8
library(ggplot2)
library(dplyr)
library(readr)
library(patchwork)
library(zoo)
Warning: package 'zoo' was built under R version 4.2.3

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
asv <- read.csv("WEC_ASV_data.csv", row.names = 1)
env <- read.csv("WEC_environmental_data.csv")
tax <- read.csv("WEC_taxonomic_data.csv")

# change wec to columns 
asvv <- t(asv)

#Convert to Relative Abundance
asv_rel <- asvv / rowSums(asvv)

# Calculate Shannon Diversity
alpha_div <- diversity(asv_rel, index = "shannon")

# add shannon to the env table 
env$shannon <- alpha_div[match(env$Sample, rownames(asvv))]

# date to character
env$Date <- as.character(env$Date)
env$Date <- as.Date(env$Date)
env$Year <- format(env$Date, "%Y")

# shannion over time YEAR
ggplot(env, aes(x = Year, y = shannon)) +
  geom_line() +
  geom_point() +
  theme_minimal() +
  labs(title = "Shannon Diversity Over Time",
       x = "Date", y = "Shannon Index")

# seeing whioch varuiable is best
env_numeric <- env %>%
  select(where(is.numeric)) %>%
  select(-shannon) 

# Remove numeric variables with more than 25% missing values
env_numeric <- env_numeric %>%
  select(where(~ mean(!is.na(.)) > 0.75))


model_results <- map_dfr(names(env_numeric), function(var) {
  x <- env_numeric[[var]]
  
  # Skip variables with all NA or zero variance
  if (all(is.na(x)) || var(x, na.rm = TRUE) == 0) return(NULL)
  
  model <- lm(env$shannon ~ x)
  model_summary <- summary(model)
  
  tibble(
    variable = var,
    r_squared = model_summary$r.squared,
    p_value = coef(model_summary)[2, 4]
  )
})

# View top predictors NITRATE_nitrate 
model_results %>%
  arrange(desc(r_squared)) %>%
  head(20)
# A tibble: 20 × 3
   variable                   r_squared  p_value
   <chr>                          <dbl>    <dbl>
 1 NITRATE_NITRITE               0.394  8.93e-29
 2 PHOSPHATE                     0.362  1.08e-25
 3 SILICATE                      0.346  6.21e-24
 4 maximum.weekly.temperature    0.335  3.76e-24
 5 average.weekly.temperature    0.284  6.39e-20
 6 average.weekly.river.flow     0.190  2.80e-13
 7 average.weekly.wave.height    0.183  7.94e-13
 8 AMMONIA                       0.177  5.09e-11
 9 average.weekly.wind.speed     0.157  7.18e-11
10 max.weekly.wave.height        0.152  1.00e-10
11 temp_C                        0.138  1.00e- 8
12 max.daily.river.flow          0.134  1.60e- 9
13 maximum.wind.speed            0.0939 6.33e- 7
14 Chl_10m                       0.0891 1.78e- 6
15 salinity_PSU                  0.0835 3.22e- 6
16 Chl_0m                        0.0500 4.19e- 4
17 average.weekly.pressure       0.0235 1.48e- 2
18 total.rainfall.for.week       0.0235 1.45e- 2
19 Density_kg.m3                 0.0170 3.89e- 2
20 NITRITE                       0.0152 5.19e- 2

To identify the environmental variable that best explains variation in alpha diversity (Shannon Index) across the time series, individual linear models were fitted for each environmental predictor. The results revealed that NITRATE_NITRITE was the strongest single predictor, explaining approximately 39.4% of the variance in alpha diversity (R² = 0.394, p < 2.2e-16). Other top contributors included PHOSPHATE (R² = 0.362), SILICATE (R² = 0.346), and maximum weekly temperature (R² = 0.335), all of which were highly statistically significant. These findings highlight the importance of both nutrient availability and thermal conditions in shaping microbial community diversity in the Western English Channel.

Next is a optimized multiple parameter regression that explains variance in alpha diversity over time.

# Use multiple parameter regression to create an optimised model that explains variance in alpha diversity over time 
model_full <- lm(shannon ~ PHOSPHATE * season + NITRATE_NITRITE + temp_C + salinity_PSU, data = env)
summary(model_full)

Call:
lm(formula = shannon ~ PHOSPHATE * season + NITRATE_NITRITE + 
    temp_C + salinity_PSU, data = env)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.07458 -0.16190  0.01951  0.21766  0.57568 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)             4.416329   8.798962   0.502    0.616  
PHOSPHATE               0.113172   0.662849   0.171    0.865  
seasonspring           -0.435993   0.209894  -2.077    0.039 *
seasonsummer           -0.242125   0.209464  -1.156    0.249  
seasonwinter            0.272782   0.395345   0.690    0.491  
NITRATE_NITRITE         0.044213   0.032975   1.341    0.181  
temp_C                 -0.007486   0.018794  -0.398    0.691  
salinity_PSU            0.001631   0.247976   0.007    0.995  
PHOSPHATE:seasonspring  0.426070   0.561581   0.759    0.449  
PHOSPHATE:seasonsummer  0.343239   0.815900   0.421    0.674  
PHOSPHATE:seasonwinter -0.274581   0.846884  -0.324    0.746  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3095 on 206 degrees of freedom
  (47 observations deleted due to missingness)
Multiple R-squared:  0.4855,    Adjusted R-squared:  0.4605 
F-statistic: 19.44 on 10 and 206 DF,  p-value: < 2.2e-16
# Define colors for each season
season_colors <- c(
  "Spring" = "#66C2A5",
  "Summer" = "#FC9272",
  "Autumn" = "#FDAE6B",
  "Winter" = "#6BAED6"
)


# Capitalize the first letter of each season
env$season <- stringr::str_to_title(env$season)

# Then rerun the plot
ggplot(env, aes(x = NITRATE_NITRITE, y = shannon, color = season)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, size = 1.2) +
  facet_wrap(~season) +
  scale_color_manual(values = season_colors) +
  labs(
    title = "Seasonal Relationship Between Nitrate Concentration and Shannon Diversity",
    x = "Nitrate + Nitrite Concentration (µM)",
    y = "Shannon Diversity Index",
    color = "Season"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.line = element_line(color = "black"),
    panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
    strip.text = element_text(face = "bold", size = 12),
    legend.position = "none"
  )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 14 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 14 rows containing missing values or values outside the scale range
(`geom_point()`).

# Get R² for each season
r2_by_season <- env %>%
  group_by(season) %>%
  summarise(
    r_squared = summary(lm(shannon ~ NITRATE_NITRITE, data = cur_data()))$r.squared
  )
Warning: There was 1 warning in `summarise()`.
ℹ In argument: `r_squared = summary(lm(shannon ~ NITRATE_NITRITE, data =
  cur_data()))$r.squared`.
ℹ In group 1: `season = "Autumn"`.
Caused by warning:
! `cur_data()` was deprecated in dplyr 1.1.0.
ℹ Please use `pick()` instead.
# View the results
print(r2_by_season)
# A tibble: 4 × 2
  season r_squared
  <chr>      <dbl>
1 Autumn    0.119 
2 Spring    0.275 
3 Summer    0.0398
4 Winter    0.0350

To explore whether the relationship between nitrate concentrations and alpha diversity varied seasonally, the data were visualized using a faceted regression plot and analyzed with separate linear models for each season. The resulting figure shows clear differences in the strength of association across seasons. In spring, nitrate concentrations explained approximately 27.5% of the variance in Shannon diversity (R² = 0.275), indicating a moderate positive relationship. In autumn, the relationship was weaker (R² = 0.119), while in summer and winter, nitrate had minimal explanatory power (R² = 0.040 and R² = 0.035, respectively). These findings suggest that the influence of nitrate on microbial alpha diversity is strongest in spring, potentially due to seasonal nutrient availability or community shifts, and is negligible in other seasons. The faceted plot visually supports this conclusion, with the regression line in spring showing a noticeably stronger slope compared to the flatter trends in other panels.

Lastly a taxanomic plot of Magnetospiraceae and their alpha diversity and relative abundance over time. Using Fourier analysis to determine if there is a seasonal pattern in relative abundance Magnetospiraceae.

# Subset taxonomy to just Magnetospiraceae
magneto_only <- subset(tax, Family == "Magnetospiraceae")

# Extract list of ASV IDs for this family
magneto_asvs <- magneto_only %>%
  pull(ASV)  # Replace 'ASV' with the correct column name for ASV IDs if different

# Subset ASV table to only those ASVs
asv_magneto <- asv_rel[, colnames(asv_rel) %in% magneto_asvs]


# Calculate Relative Abundance & Diversity
# Total relative abundance of Magnetospiraceae per sample
env$magneto_abund <- rowSums(asv_magneto, na.rm = TRUE)

# Shannon diversity (within-Magnetospiraceae)
env$magneto_alpha <- vegan::diversity(asv_magneto, index = "shannon")


# Plotting Time Series
# Ensure Date is correctly formatted
env$Date <- as.Date(env$Date)

# Set darker custom season colors
env$season_color <- case_when(
  env$season == "Winter" ~ "#6BAED6",   # blue
  env$season == "Spring" ~ "#66C2A5",   # green
  env$season == "Summer" ~ "#FC9272",   # red
  env$season == "Autumn" ~ "#FDAE6B"    # orange
)

# Create shading blocks based on season
season_blocks <- env %>%
  select(Date, season_color) %>%
  distinct() %>%
  arrange(Date) %>%
  mutate(Date_end = lead(Date, default = last(Date)))

# Plot 1: Relative Abundance
p1 <- ggplot() +
  geom_rect(data = season_blocks, aes(xmin = Date, xmax = Date_end,
                                      ymin = -Inf, ymax = Inf, fill = season_color), alpha = 0.25) +
  geom_line(data = env, aes(x = Date, y = magneto_abund), color = "black", size = 0.8) +
  geom_smooth(data = env, aes(x = Date, y = magneto_abund), method = "loess", se = FALSE,
              color = "red", linetype = "dashed") +
  scale_fill_identity(
    guide = "legend", name = "Season",
    labels = c("Winter", "Spring", "Summer", "Autumn"),
    breaks = c("#6BAED6", "#66C2A5", "#FC9272", "#FDAE6B")
  ) +
  theme_minimal() +
  theme(
    axis.line = element_line(color = "black", size = 1),
    legend.position = "top",
    plot.title = element_text(face = "bold")
  ) +
  labs(
    title = "Magnetospiraceae Relative Abundance",
    x = NULL, y = "Relative Abundance"
  )
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
# Plot 2: Alpha Diversity
p2 <- ggplot() +
  geom_rect(data = season_blocks, aes(xmin = Date, xmax = Date_end,
                                      ymin = -Inf, ymax = Inf, fill = season_color), alpha = 0.25) +
  geom_line(data = env, aes(x = Date, y = magneto_alpha), color = "black", size = 0.8) +
  geom_smooth(data = env, aes(x = Date, y = magneto_alpha), method = "loess", se = FALSE,
              color = "red", linetype = "dashed") +
  scale_fill_identity() +
  theme_minimal() +
  theme(
    axis.line = element_line(color = "black", size = 1),
    legend.position = "none",
    plot.title = element_text(face = "bold")
  ) +
  labs(
    title = "Magnetospiraceae Alpha Diversity (Shannon)",
    x = "Date", y = "Shannon Diversity",
    caption = "Shaded areas represent seasons: Winter (blue), Spring (green), Summer (red), Autumn (orange).\nDashed red lines indicate LOESS-smoothed trends."
  )

# Combine vertically
p1 / p2
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

Temporal patterns in Magnetospiraceae relative abundance and alpha diversity in the Western English Channel (2012–2018). This figure displays seasonal dynamics in both the relative abundance (top panel) and Shannon diversity (bottom panel) of ASVs classified under the family Magnetospiraceae. Each point represents a weekly sample, and shaded backgrounds indicate seasons: Winter (blue), Spring (green), Summer (red), and Autumn (orange). Dashed red lines show LOESS-smoothed trends to highlight overall temporal patterns.

Peaks in relative abundance appear to occur regularly in late spring and summer, suggesting that environmental conditions during these months may favor the growth of Magnetospiraceae. These peaks coincide with increases in alpha diversity, indicating that not only are these microbes more abundant in these periods, but there may also be a greater variety of ASVs within the family. Conversely, diversity and abundance tend to drop during winter, implying potential environmental constraints such as temperature or light limitation.

# Ensure Date is Date type
env$Date <- as.Date(env$Date)

# Filter and prepare Magnetospiraceae abundance
magneto_df <- env %>%
  select(Date, magneto_abund) %>%
  filter(!is.na(Date) & !is.na(magneto_abund))

# Create complete weekly sequence
weekly_seq <- seq(min(magneto_df$Date), max(magneto_df$Date), by = "week")

# Merge and interpolate
z <- zoo(magneto_df$magneto_abund, order.by = magneto_df$Date)
z_full <- merge(z, zoo(, weekly_seq), all = TRUE)
z_interp <- na.approx(z_full)

# FFT
magneto_ts <- as.numeric(z_interp)
fft_result <- abs(fft(magneto_ts))

# Prepare for ggplot
fft_df <- data.frame(
  Frequency = seq_along(fft_result) / length(fft_result),
  Amplitude = fft_result
)

# Plot using ggplot2
ggplot(fft_df[1:30, ], aes(x = Frequency, y = Amplitude)) +
  geom_col(fill = "steelblue") +
  labs(
    title = "Fourier Transform: Magnetospiraceae Relative Abundance",
    x = "Frequency (cycles/sample)",
    y = "Amplitude"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    axis.line.x = element_line(size = 1.2, color = "black"),
    axis.line.y = element_line(size = 1.2, color = "black"),
    panel.grid = element_blank()
  )

Fourier analysis of the interpolated weekly relative abundance of Magnetospiraceae revealed a distinct periodic signal in the microbial community. The dominant frequency occurred at approximately 0.019 cycles/sample, corresponding to an annual cycle (∼52 weeks), suggesting a strong seasonal pattern in the abundance of this family. Several smaller peaks at higher frequencies were also observed, indicating the presence of potential sub-seasonal fluctuations. These findings support the presence of regular temporal dynamics, likely driven by seasonal environmental changes in the Western English Channel.

REFERNCES