Soil Vegetation Analysis 2026

Libraries

Show Code

# --- 1. CORE DATA MANIPULATION & TIDYING ---
library(tidyverse)    # Essential: dplyr, ggplot2, tidyr, readr, purrr, stringr
library(lubridate)    # Date/time handling (Year/Month parsing)
library(reshape2)     # Necessary for melt() in correlation heatmaps
library(scales)       # Advanced axis scaling for plots 
library(ggplot2)
library(dplyr)
library(readr)
library(stringr)
library(patchwork)
library(magick)

# --- 2. BAYESIAN MODELING ---
library(brms)         # Bayesian Multilevel Models 
library(tidybayes)    
library(broom.mixed)  

# --- 3. TREND DETECTION & RESTREND ---
library(Kendall)      # Mann-Kendall trend tests
library(trend)        # sens.slope() calculation

# --- 4. SPATIAL & REMOTE SENSING DATA ---
library(sf)           # Vector data 
library(terra)        # Raster handling 
library(usdm)         # VIF analysis for multicollinearity 

# --- 5. MULTIVARIATE ANALYSIS---
library(vegan)        # RDA, PERMANOVA, and Bray-Curtis
library(FactoMineR)   # PCA for environmental variable reduction
library(factoextra)   # Clean visualization of PCA/Eigenvalues
library(betapart)      # Beta diversity partitioning (Jaccard/Sorensen)
library(entropart)     # Hill numbers (DivPart, DivProfile) and MetaCommunity
library(iNEXT.3D)      # 3D rarefaction and diversity estimation
library(iNEXT.beta3D)  # Beta diversity rarefaction

# --- 6. STATISTICAL DIAGNOSTICS ---
library(car)          # ANOVA and Levene’s test
library(rstatix)      # Pipe-friendly statistical tests
library(lmtest)       # Heteroscedasticity testing

# --- 7. VISUALIsATION & REPORTING ---
library(patchwork)    # Combining multiple plots 
library(GGally)       # ggpairs for initial data exploration
library(knitr)        # kable() for table generation
library(kableExtra)   # Styling kable tables for PDF/HTML reports

This document contains analyses of data collected from the 10 x 10 km study area. Soil samples were collected from each subplot (100m2) to create a composite sample for every plot (1000m2).

Rangeland plants data were collected along North-South and East-West 28-metres transects cutting across each plot. At every 2 metres along the transect, the nearest annual and perennial grasses, forbs, shrubs and woody species were recorded.

Trees height and diametre at breast height, and shrubs height, length and width and species names were recorded in each subplot in every plot. Point count transects for bird monitoring were 1 km long cutting across each cluster (1km2).

Acoustic recorders were placed at the centre of each cluster.

For the rangeland plants, birds from the point count monitoring and birds from the BirdNet derived avian species, I used the package enropart to calculate the diversity using Hill’s numbers. Vegetation structural variables (shrub volume per hectare, tree volume per hectare, percentage cover per hectare, tree basal area), were calculated within the tidyverse framework. I used Kaleidoscope to compute the acoustic indices.

All variables were aggregated at the cluster level.

Soil Data

Show Code

getwd()

[1] "C:/Users/n1227824/OneDrive - Nottingham Trent University/R/R_ldsf.rs.acoustics/scripts"

Show Code

Soil_Data <- read_csv("data/soil.data.csv")
head(Soil_Data)

# A tibble: 6 × 30
  SSN    Job.No Study Site  cluster  plot depth_std depth_top depth_bottom    pH
  <chr>  <chr>  <chr> <chr>   <dbl> <dbl> <chr>         <dbl>        <dbl> <dbl>
1 WA090… ICR-2… NTU … ENAR…       1     1 TOP              NA           NA  6.86
2 WA090… ICR-2… NTU … ENAR…       1     1 SUB              NA           NA  6.97
3 WA090… ICR-2… NTU … ENAR…       1     2 TOP              NA           NA  6.77
4 WA090… ICR-2… NTU … ENAR…       1     2 SUB              NA           NA  6.96
5 WA090… ICR-2… NTU … ENAR…       1     3 TOP              NA           NA  6.67
6 WA090… ICR-2… NTU … ENAR…       1     3 SUB              NA           NA  6.95
# ℹ 20 more variables: `SOC (%)` <dbl>, `TN (%)` <dbl>, `EC  (uS/cm)` <dbl>,
#   `m3.P (mg/kg)` <dbl>, `m3.Al (mg/kg)` <dbl>, `m3.B (mg/kg)` <dbl>,
#   `m3.Ca (mg/kg)` <dbl>, `m3.Fe (mg/kg)` <dbl>, `m3.K (mg/kg)` <dbl>,
#   `m3.Mg (mg/kg)` <dbl>, `m3.Na (mg/kg)` <dbl>, `CEC (cmolc/kg)` <dbl>,
#   `ExAc (cmolc/kg)` <dbl>, PSI <dbl>, `Clay (%)` <dbl>, `Silt (%)` <dbl>,
#   `Sand (%)` <dbl>, Soil.Texture.Interpretation <chr>, Interpretation <lgl>,
#   ...30 <chr>

Show Code

# Select and rename relevant columns
soil_data <- Soil_Data %>%
  dplyr::select(-c(1:4, 30, 29, 28)) %>%
  rename(
    EC = `EC  (uS/cm)`, P = `m3.P (mg/kg)`,      Al = `m3.Al (mg/kg)`, 
    B = `m3.B (mg/kg)`, Ca = `m3.Ca (mg/kg)`,    Fe = `m3.Fe (mg/kg)`, 
    K = `m3.K (mg/kg)`, Mg = `m3.Mg (mg/kg)`,    Na = `m3.Na (mg/kg)`,  
    CEC = `CEC (cmolc/kg)`, ExAc = `ExAc (cmolc/kg)`, TN = `TN (%)`,  
    SOC = `SOC (%)`,  Clay = `Clay (%)`,  Sand = `Sand (%)`, Silt = `Silt (%)`) 

soil_data$cluster <- as.factor(soil_data$cluster)  
soil_data <- soil_data %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0)))  


# Compute the mean for each plot within each cluster
soil_data <- soil_data %>%
  group_by(cluster, plot) %>%
  summarise(across(where(is.numeric), mean, na.rm = TRUE))  

 head (soil_data)

# A tibble: 6 × 22
# Groups:   cluster [1]
  cluster  plot depth_top depth_bottom    pH   SOC     TN    EC     P    Al
  <fct>   <dbl>     <dbl>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
1 1           1         0        10     6.89  1.28 0.12    123.  20.1  852.
2 1           2         0        12.7   6.9   1.21 0.117   120.  24.1  860.
3 1           3         0         0     6.81  1.18 0.125   123.  41.2  850.
4 1           4         0        16.5   6.86  1.22 0.12    120.  24.6  856.
5 1           5         0         8.33  7.10  0.96 0.0967  125.  27.0  846.
6 1           6         0        11     6.6   1.07 0.12    105.  28.3  873.
# ℹ 12 more variables: B <dbl>, Ca <dbl>, Fe <dbl>, K <dbl>, Mg <dbl>,
#   Na <dbl>, CEC <dbl>, ExAc <dbl>, PSI <dbl>, Clay <dbl>, Silt <dbl>,
#   Sand <dbl>

Soil pca

To identify the main factors influencing soil properties, I used Principal Component Analysis (PCA) to transform the original 18, correlated variables into a smaller set of uncorrelated Principal Components (PCs), which capture the maximum variance in the data (Souza, 2025). I carried out the PCA using the PCA() function from the FactoMineR package in R (Husson et al., 2024). I determined the significance of each principal component by examining its eigenvalue; components with eigenvalues greater than one were considered meaningful. I examined variable loadings and correlation structures to interpret the ecological significance of each axis.

Show Code

soil.data <- soil_data[,-c(1:4)]

# Perform PCA
pc.results.soil <- PCA(soil.data, scale.unit = TRUE, graph = FALSE)

# Extract eigenvalues
eigenvalues <- pc.results.soil$eig[,1]  
head(eigenvalues)

   comp 1    comp 2    comp 3    comp 4    comp 5    comp 6 
7.5175646 4.4735839 1.9603027 1.2654116 1.0108506 0.4796236

Show Code

# Correlation circle - variable contribution to PCs
fviz_pca_var(pc.results.soil,  col.var = "contrib",gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),  repel = TRUE, title = "", 
             legend.title = list(color = "Contribution Level")) +
              labs(color = "Contribution")

Show Code

# Extract correlation of variables with PCs
correlations <- pc.results.soil$var$coord
head(correlations)

          Dim.1      Dim.2       Dim.3       Dim.4       Dim.5
pH   0.89288765 -0.3198878  0.17780786 -0.10981891 -0.11333255
SOC  0.25929878  0.9409967  0.01981635 -0.01406242  0.08483381
TN   0.05406606  0.9729491 -0.02929538 -0.01689972  0.12645926
EC   0.59203055  0.7034661  0.10441728  0.22294499 -0.17805452
P    0.44298990  0.5502939  0.47522877  0.25119190 -0.07212982
Al  -0.65468650  0.2344754 -0.52590976  0.14447660  0.40048924

Show Code

# Extract PCA scores and cluster info
pca_scores <- data.frame(
  PC1 = pc.results.soil$ind$coord[, 1], PC2 = pc.results.soil$ind$coord[, 2],
  Cluster = as.factor(soil_data$cluster))

# Calculate mean PC1 and PC2 per cluster
cluster_means <- pca_scores %>%
  group_by(Cluster) %>%
  summarise(
    Mean_PC1 = mean(PC1),  Mean_PC2 = mean(PC2))

soil.pc<- cluster_means %>% rename(soil.pc1= Mean_PC1, soil.pc2= Mean_PC2)
head (soil.pc)

# A tibble: 6 × 3
  Cluster soil.pc1 soil.pc2
  <fct>      <dbl>    <dbl>
1 1          0.872   -1.67 
2 2          1.02    -1.04 
3 3          1.66     1.92 
4 4         -2.26     0.904
5 5         -3.06    -0.350
6 6          2.72    -2.01

✅ Interpretation: T#The eigenvalues represent the amount of variance captured
by each principal component (PC).The first five components have eigenvalues greater than 1, suggesting they retain meaningful information.

✅ Interpretation: Cumulative variance: PC1 explains 41.8% of the total variance. PC2 adds 24.9%, bringing the cumulative variance to 66.7% #Variable Contributions to Principal Components (Correlation with PCs)

✅ Interpretation: PC1:These properties are strongly affected by fertilisers especially NPK Strong positive correlations: CEC (0.98), Ca (0.98), Mg (0.95), B (0.84), pH (0.89). High values of these components show high use of the fertilisers and lime. Further, the soils showed strong negative correlation: Al (-0.65), Sand (-0.54), ExAc (-0.51), Fe (-0.49). Suggests PC1 represents cation exchange capacity (CEC) and soil fertility.

✅ Interpretation: PC2:High PC2: Probably in conservation areas and unfarmed hilly areas (due to organic matter accumulation).Strongest positive correlations: TN (0.97), SOC (0.94),EC (0.70), Sand (0.62). Negative correlations Clay (-0.69), Na (-0.60) Indicates PC2 captures organic matter and soil texture gradients.

Soil spatial statistics

To test for multivariate differences in overall soil composition among the 16 clusters, I conducted a permutational multivariate analysis of variance (PERMANOVA) with Euclidean distances, based on standardised soil variables (adonis2 function, vegan package, Oksanen et al., 2025). I identified which specific soil variables exhibited significant spatial variation by applying Kruskal-Wallis tests to each variable individually, using a non-parametric approach due to violations of normality. I subsequently tested for spatial differences in the extracted scores of the first two principal components (PC1 and PC2) among clusters using one-way ANOVA.

Show Code

# Shapiro-Wilk test for all raw soil variables
soil_normality <- soil_data %>%
  pivot_longer(cols = c(pH, SOC, TN, EC, P, Al, B, Ca, Fe, K, Mg, Na, CEC, ExAc, PSI, Clay, Silt, Sand), 
               names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  shapiro_test(Value)

# PERMANOVA Does the overall soil change between clusters?
soil_matrix <- soil_data %>% ungroup() %>% dplyr:: select(-cluster, -plot)
adonis2(soil_matrix ~ cluster, data = soil_data)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = soil_matrix ~ cluster, data = soil_data)
          Df SumOfSqs      R2      F Pr(>F)    
Model     15   1.9487 0.59707 14.028  0.001 ***
Residual 142   1.3151 0.40293                  
Total    157   3.2638 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Kruskal-Wallis Which specific nutrients or textures are different?
soil_kruskal <- soil_data %>%
  pivot_longer(
    cols = c(pH, SOC, TN, EC, P, Al, B, Ca, Fe, K, Mg, Na, CEC, ExAc, PSI, Clay, Silt, Sand),
    names_to = "Variable",
    values_to = "Value"
  ) %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ cluster) %>%
  add_significance() %>%
  filter(p < 0.05) 


soil_kruskal %>%
  kable(caption = "Significant Soil Property Differences Across Clusters") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%", height = "400px")

Significant Soil Property Differences Across Clusters
Variable	.y.	n	statistic	df	p	method	p.signif
Al	Value	158	97.01431	15	0.00e+00	Kruskal-Wallis	****
B	Value	158	71.90914	15	0.00e+00	Kruskal-Wallis	****
CEC	Value	158	94.06721	15	0.00e+00	Kruskal-Wallis	****
Ca	Value	158	94.12593	15	0.00e+00	Kruskal-Wallis	****
Clay	Value	158	76.87263	15	0.00e+00	Kruskal-Wallis	****
EC	Value	158	47.05928	15	3.60e-05	Kruskal-Wallis	****
ExAc	Value	158	44.76172	15	8.35e-05	Kruskal-Wallis	****
Fe	Value	158	39.87837	15	4.73e-04	Kruskal-Wallis	***
K	Value	158	86.76481	15	0.00e+00	Kruskal-Wallis	****
Mg	Value	158	93.31948	15	0.00e+00	Kruskal-Wallis	****
Na	Value	158	85.33213	15	0.00e+00	Kruskal-Wallis	****
P	Value	158	44.24040	15	1.01e-04	Kruskal-Wallis	***
PSI	Value	158	52.21129	15	5.20e-06	Kruskal-Wallis	****
SOC	Value	158	47.75408	15	2.79e-05	Kruskal-Wallis	****
Sand	Value	158	77.46504	15	0.00e+00	Kruskal-Wallis	****
Silt	Value	158	35.38149	15	2.17e-03	Kruskal-Wallis	**
TN	Value	158	53.98273	15	2.60e-06	Kruskal-Wallis	****
pH	Value	158	105.81762	15	0.00e+00	Kruskal-Wallis	****

Show Code

#  PCA Test (PC1 & PC2 acrosee the landscape) ---
summary(aov(PC1 ~ Cluster, data = pca_scores))

             Df Sum Sq Mean Sq F value Pr(>F)    
Cluster      15  733.2   48.88   15.27 <2e-16 ***
Residuals   142  454.6    3.20                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov(PC2 ~ Cluster, data = pca_scores))

             Df Sum Sq Mean Sq F value   Pr(>F)    
Cluster      15  237.9  15.859   4.802 1.83e-07 ***
Residuals   142  468.9   3.302                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

✅ Interpretation:

The PERMANOVA revealed that overall soil composition differed significantly among the 16 clusters (F = 14.03, p = 0.001). The cluster grouping explained 59.7% of the multivariate variance in soil properties. Kruskal-Wallis tests on individual soil variables confirmed that all 18 measured variables exhibited statistically significant spatial variation (all p < 0.01).

Plants Data

Rangeland Plants metacommunity

Following Marcon et al. (2025) I used the entropart R package to create a MetaCommunity object. These objects integrated abundance data for rangeland plant species—including perennial and annual grasses, shrubs, and trees recorded along 28-m transects—as well as avian community data derived from both traditional point counts and BirdNET automated detections. In this study, each metacommunity was defined as the total collection of species recorded at the landscape level across 16 clusters, with each cluster treated as a local community. These metacommunities served as the computational basis for partitioning diversity, as it allows for the simultaneous calculation of local α (alpha), between-site β (beta) and total landscape γ (gamma) diversity while weighting each community according to its sample size (Marcon et al., 2025).

Show Code

# Read CSV file
rangelands.data <- read_csv("data/rl.data.csv")

head(rangelands.data)

# A tibble: 6 × 163
  year.month lifeform    species    p1    p2    p3    p4    p5    p6    p7    p8
  <chr>      <chr>       <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 01-Apr-24  Perennial … Aristi…     0     0     0     0     0     0     0     0
2 01-Apr-24  Perennial … Bothri…     0     0     0     0     0     1     0     0
3 01-Apr-24  Perennial … Cynodo…    28    25    30    29    30     5     1    26
4 01-Apr-24  Perennial … Eragro…     0     5     0     0     0    21     1     0
5 01-Apr-24  Perennial … N           0     0     0     1     0     3    10     0
6 01-Apr-24  Perennial … Panicu…     0     0     0     0     0     0    11     0
# ℹ 152 more variables: p9 <dbl>, p10 <dbl>, p11 <dbl>, p12 <dbl>, p13 <dbl>,
#   p14 <dbl>, p15 <dbl>, p16 <dbl>, p17 <dbl>, p18 <dbl>, p19 <dbl>,
#   p20 <dbl>, p21 <dbl>, p22 <dbl>, p23 <dbl>, p24 <dbl>, p25 <dbl>,
#   p26 <dbl>, p27 <dbl>, p28 <dbl>, p29 <dbl>, p30 <dbl>, p31 <dbl>,
#   p32 <dbl>, p33 <dbl>, p34 <dbl>, p35 <dbl>, p36 <dbl>, p37 <dbl>,
#   p38 <dbl>, p39 <dbl>, p40 <dbl>, p41 <dbl>, p42 <dbl>, p43 <dbl>,
#   p44 <dbl>, p45 <dbl>, p46 <dbl>, p47 <dbl>, p48 <dbl>, p49 <dbl>, …

Show Code

rangelands.data <- rangelands.data %>%
  #filter(lifeform != "Shrub") %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
filter(!species %in% c("N", "NA"))

rangelands.data %>% 
   dplyr:: select( -year.month, -lifeform)%>%
  #select(!species &!lifeform ) 


  pivot_longer(cols= !species, names_to = "plot", values_to = "freq") %>%
  # filter(rowSums(across(where(is.numeric)))!=0) %>%
  pivot_wider(names_from = species, values_from = freq, values_fn = sum) %>%
  replace(is.na(.), 0) %>% 
  mutate(cluster=paste0(rep(LETTERS[3], 10), rep(1:16, each = 10))) %>% 
  relocate(cluster) %>% 
  pivot_longer(!cluster &!plot, names_to = "species", values_to = "freq") %>% 
  group_by(cluster, species) %>% 
  summarise_if(is.numeric, ~ sum(.)) %>% 
  pivot_wider(names_from = cluster, values_from = freq)%>% 
  relocate(C10:C16, .after = C9)->rangelands.data.MC

mc.rangelands<-MetaCommunity(rangelands.data.MC)
summary(mc.rangelands)

Meta-community (class 'MetaCommunity') made of 35797 individuals in 16 
communities and 132 species. 

Its sample coverage is 0.999804470055454 

Community weights are: 
    C1     C2     C3     C4     C5     C6     C7     C8     C9    C10    C11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   C12    C13    C14    C15    C16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
2286 1869 2352 2321 2046 2100 2648 2105 2294 2557 2452 2009 2127 2614 1509 2508 
Community sample coverages are: 
       C1        C2        C3        C4        C5        C6        C7        C8 
0.9978155 0.9989322 0.9970245 0.9974171 0.9975586 0.9980961 0.9996241 0.9943024 
       C9       C10       C11       C12       C13       C14       C15       C16 
0.9965153 0.9949181 0.9979635 0.9995052 0.9967125 0.9965582 0.9980225 0.9984073

✅ Interpretation: Analysis of the plant community across the 16 clusters revealed significant spatial structuring. The analysis identified a total of 132 species distributed across 16 local communities, with an exceptionally high estimated sample coverage (0.9998), indicating that the survey nearly captured the full extent of the species richness present in the study area.

Rangeland Plants Community Alpha Diversity

I then evaluated the local community structure by calculating theα-diversity for each of the 16 clusters using Hill numbers (q = 0, 1, and 2) via the DivPart function of entropart with the “Best” estimator (Marcon et al., 2025). This provided measures of species richness (q = 0), Shannon-effective diversity (q = 1), and Simpson-effective diversity (q = 2). Diversity profiles were generated using the DivProfile function (entropart package) to visualise the transition from richness to dominance-weighted diversity (Marcon et al., 2025). Additionally, I calculated community evenness as the ratio of Simpson diversity to richness (q2 / q0).

Note: This data frame contains grasses, shrubs and trees collected along the 28 metres transect in each plot

Show Code

#all species in rangeland form

# Alpha diversity for all(q=0,1,2)
alpha.rangelands.0 <- DivPart(q=0, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.0)

HCDT diversity partitioning of order 0 of metaCommunity mc.rangelands

Alpha diversity of communities: 
 C1  C2  C3  C4  C5  C6  C7  C8  C9 C10 C11 C12 C13 C14 C15 C16 
 55  45  50  61  57  50  56  65  47  63  52  50  44  55  48  55 
Total alpha diversity of the communities: 
[1] 53.3125
Beta diversity of the communities: 
    None 
2.475967 
Gamma diversity of the metacommunity: 
None 
 132

Show Code

alpha.rangelands.0$CommunityAlphaDiversities

 C1  C2  C3  C4  C5  C6  C7  C8  C9 C10 C11 C12 C13 C14 C15 C16 
 55  45  50  61  57  50  56  65  47  63  52  50  44  55  48  55

Show Code

alpha.rangelands.1 <- DivPart(q=1, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.1)

HCDT diversity partitioning of order 1 of metaCommunity mc.rangelands

Alpha diversity of communities: 
      C1       C2       C3       C4       C5       C6       C7       C8 
17.63735 12.90104 14.27879 21.82798 20.95318 12.45692 22.71278 18.77973 
      C9      C10      C11      C12      C13      C14      C15      C16 
10.71013 18.63753 17.69965 19.80972 11.77773 15.18085 17.12073 18.86585 
Total alpha diversity of the communities: 
[1] 16.55762
Beta diversity of the communities: 
    None 
1.398844 
Gamma diversity of the metacommunity: 
    None 
23.16152

Show Code

alpha.rangelands.1$CommunityAlphaDiversities

      C1       C2       C3       C4       C5       C6       C7       C8 
17.63735 12.90104 14.27879 21.82798 20.95318 12.45692 22.71278 18.77973 
      C9      C10      C11      C12      C13      C14      C15      C16 
10.71013 18.63753 17.69965 19.80972 11.77773 15.18085 17.12073 18.86585

Show Code

alpha.rangelands.2 <- DivPart(q=2, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.2)

HCDT diversity partitioning of order 2 of metaCommunity mc.rangelands

Alpha diversity of communities: 
       C1        C2        C3        C4        C5        C6        C7        C8 
 8.586473  6.057753  7.667439 11.938755 10.382129  5.226332 14.109708 10.163111 
       C9       C10       C11       C12       C13       C14       C15       C16 
 6.149157 10.222547 11.008562 13.225724  6.249617  7.351418  9.184408 10.778188 
Total alpha diversity of the communities: 
[1] 8.52314
Beta diversity of the communities: 
    None 
1.181552 
Gamma diversity of the metacommunity: 
    None 
10.07053

Show Code

alpha.rangelands.2$CommunityAlphaDiversities

       C1        C2        C3        C4        C5        C6        C7        C8 
 8.586473  6.057753  7.667439 11.938755 10.382129  5.226332 14.109708 10.163111 
       C9       C10       C11       C12       C13       C14       C15       C16 
 6.149157 10.222547 11.008562 13.225724  6.249617  7.351418  9.184408 10.778188

Show Code

data.frame(alpha.rangelands.0$CommunityAlphaDiversities,alpha.rangelands.1$CommunityAlphaDiversitie,alpha.rangelands.2$CommunityAlphaDiversities)->alphas.rangelands

colnames(alphas.rangelands) <- c("q0", "q1", "q2")

# View the result
head(alphas.rangelands)

   q0       q1        q2
C1 55 17.63735  8.586473
C2 45 12.90104  6.057753
C3 50 14.27879  7.667439
C4 61 21.82798 11.938755
C5 57 20.95318 10.382129
C6 50 12.45692  5.226332

Show Code

alphas.rangelands %>%
  rownames_to_column() %>%
  rename(cluster = rowname) %>%
  mutate(cluster = factor(cluster)) %>%
  #bind_rows(tibble(cluster = "C15", q0s = 0, q1s = 0, q2s = 0, D.eve = 0)) %>%
  mutate(cluster = fct_relevel(cluster, paste0("C", 1:16))) %>%
  arrange(cluster) -> alphas.rangelands

head(alphas.rangelands)

  cluster q0       q1        q2
1      C1 55 17.63735  8.586473
2      C2 45 12.90104  6.057753
3      C3 50 14.27879  7.667439
4      C4 61 21.82798 11.938755
5      C5 57 20.95318 10.382129
6      C6 50 12.45692  5.226332

Rangeland Plants Spatial Statistics

To test for significant spatial differences in community composition, a PERMANOVA was performed on the plot-level plant species abundance matrix (n = 160) and at the point station level for birds (n = 96) using Bray–Curtis dissimilarity. Spatial variation in alpha diversity across the 16 clusters was evaluated using one-way ANOVA (stats package).

Show Code

rangelands_plots <- rangelands.data %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
  filter(!species %in% c("N", "NA")) %>%
  dplyr::select(-year.month, -lifeform) %>%
  pivot_longer(cols = !species, names_to = "plot", values_to = "freq") %>%
  group_by(plot, species) %>%
  summarise(freq = sum(freq, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = freq, values_fill = 0) %>%
  # Apply the cluster grouping (10 plots per cluster)
  mutate(cluster = paste0("C", rep(1:16, each = 10))) %>%
  relocate(cluster, plot)

#  META-COMMUNITY & ALPHA DIVERSITY at plot level

mc_data <- rangelands_plots %>%
  ungroup() %>%
  dplyr:: select(-cluster) %>%
  column_to_rownames("plot") %>%
  t() %>%
  as.data.frame()

mc.plots <- MetaCommunity(mc_data)

#Hill Numbers (q=0, 1, 2) per plot
q0_vals <- DivPart(q=0, mc.plots, Correction = "Best")$CommunityAlphaDiversities
q1_vals <- DivPart(q=1, mc.plots, Correction = "Best")$CommunityAlphaDiversities
q2_vals <- DivPart(q=2, mc.plots, Correction = "Best")$CommunityAlphaDiversities

plot_diversity <- rangelands_plots %>%
  dplyr:: select(cluster, plot) %>%
  mutate(q0 = q0_vals, q1 = q1_vals, q2 = q2_vals)

# PERMANOVA (Community Composition) ---
species_matrix <- rangelands_plots %>% ungroup() %>% dplyr:: select(-cluster, -plot)

permanova_result <- adonis2(species_matrix ~ cluster, 
                             data = rangelands_plots, 
                             method = "bray", 
                             permutations = 999)
print(permanova_result)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = species_matrix ~ cluster, data = rangelands_plots, permutations = 999, method = "bray")
          Df SumOfSqs      R2      F Pr(>F)    
Model     15    6.630 0.21198 2.5824  0.001 ***
Residual 144   24.647 0.78802                  
Total    159   31.277 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Normality of Residuals (Shapiro)
plot_diversity_long <- plot_diversity %>%
  pivot_longer(cols = c(q0, q1, q2), names_to = "q_order", values_to = "value")

shapiro_results <- plot_diversity_long %>%
  group_by(q_order) %>%
  shapiro_test(value)

#  Homogeneity of Variance (Levene's Test for q1)
levene_q1 <-leveneTest(q1 ~ cluster, data = plot_diversity)

print(shapiro_results)

# A tibble: 3 × 4
  q_order variable statistic       p
  <chr>   <chr>        <dbl>   <dbl>
1 q0      value        0.990 0.325  
2 q1      value        0.978 0.0109 
3 q2      value        0.969 0.00122

Show Code

print(levene_q1)

Levene's Test for Homogeneity of Variance (center = median)
       Df F value Pr(>F)
group  15  0.8086 0.6667
      144

Show Code

# ANOVA MODELS ---
aov_q0 <- aov(q0 ~ cluster, data = plot_diversity)
aov_q1 <- aov(q1 ~ cluster, data = plot_diversity)
aov_q2 <- aov(q2 ~ cluster, data = plot_diversity)


summary(aov_q0)

             Df Sum Sq Mean Sq F value  Pr(>F)   
cluster      15    976   65.05   2.184 0.00929 **
Residuals   144   4288   29.78                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q1)

             Df Sum Sq Mean Sq F value   Pr(>F)    
cluster      15    511   34.07   4.157 2.58e-06 ***
Residuals   144   1180    8.20                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q2)

             Df Sum Sq Mean Sq F value   Pr(>F)    
cluster      15  351.5  23.431    4.86 1.37e-07 ***
Residuals   144  694.3   4.821                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Run Kruskal-Wallis for all diversity orders to confirm the ANOVA test since q1 and q2 didnt meet the assumption of normality
kruskal_q0 <- kruskal.test(q0 ~ cluster, data = plot_diversity)
kruskal_q1 <- kruskal.test(q1 ~ cluster, data = plot_diversity)
kruskal_q2 <- kruskal.test(q2 ~ cluster, data = plot_diversity)

list(q0 = kruskal_q0, q1 = kruskal_q1, q2 = kruskal_q2)

$q0

    Kruskal-Wallis rank sum test

data:  q0 by cluster
Kruskal-Wallis chi-squared = 30.776, df = 15, p-value = 0.009411


$q1

    Kruskal-Wallis rank sum test

data:  q1 by cluster
Kruskal-Wallis chi-squared = 49.428, df = 15, p-value = 1.493e-05


$q2

    Kruskal-Wallis rank sum test

data:  q2 by cluster
Kruskal-Wallis chi-squared = 54.44, df = 15, p-value = 2.217e-06

Show Code

#ANOVA AND KRUSKAL RESULTS ARE CONSISTENT

✅ Interpretation: One-way ANOVA revealed significant spatial variation in alpha diversity among the 16 clusters across all Hill-number orders (Table 4). Differences were significant for q = 0 (p = 0.009), q = 1 (p < 0.001), and q = 2 (p < 0.001). The strength of the cluster effect increased with q, indicating that dominant‑species diversity varied more strongly across the landscape than the species richness.

Rangeland Plants diversity profiles

Diversity profiles were generated using the DivProfile function (entropart package) to visualise the transition from richness to dominance-weighted diversity (Marcon et al., 2025).

Show Code

dp.rangelands<-DivProfile(seq(0,2,1), mc.rangelands, Correction = "Best", NumberOfSimulations = 10)

Show Code

plot(dp.rangelands)

Rangeland Plants beta diversity

I quantified species turnover (β-diversity) using abundance-based Bray–Curtis dissimilarity via the betapart package(Baselga et al., 2025). The beta diversity analysis, based on the abundance-weighted Bray-Curtis dissimilarity index, provides quantitative evidence of the spatial variation in species composition and abundance structure. The total dissimilarity (β BRAY) is divided into two components: β BRAY.BAL, representing balanced species turnover or replacement, and βBRAY.GRA, representing differences due to species richness and nestedness.

$beta.BRAY.BAL
[1] 0.8182445

$beta.BRAY.GRA
[1] 0.1694376

$beta.BRAY
[1] 0.9876821

✅ Interpretation: The extremely high total dissimilarity (β.BRAY = 0.988) indicates near-complete differentiation in community structure across clusters. This was overwhelmingly driven by species turnover (β.BRAY.BAL = 0.818), with a minor contribution from nestedness resulting from abundance gradients (β.BRAY.GRA = 0.169). The statistical significance of this compositional differentiation was confirmed by PERMANOVA (F₁₅,₁₄₄ = 2.58, p < 0.001; R² = 0.212), which demonstrated that spatial grouping explained a substantial portion of community variation. These results demonstrate that strong species replacement and pronounced abundance gradients jointly shape landscape-level variation, resulting in a highly heterogeneous rangeland environment despite the shared presence of common dominant taxa.

Rangeland Plants Species rank curves

The species rank abundance curves is important to characterise structural shifts in plant communities across the landscape’s 16 clusters.

Show Code

rangelands.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  print(n=96)

Show Code

rangelands.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  pivot_longer(!species, names_to = "cluster", values_to = "freq") %>% 
  filter(freq!=0) %>% 
  group_by(cluster) %>% 
  mutate(rank = rank(desc(freq))) %>% 
  ggplot(aes(x=rank, y=log(freq+1)))+
  geom_line() + 
  geom_point()+
  theme_bw() +
  facet_wrap(.~ cluster)

Shrub volume,cover,biomass (PLOT LEVEL)


Metric	Description	Formula	Units

Total Cover	The sum of the crown area of all shrubs	A=∑π×(length/2)×(width/2)	m²

Total Volume	The sum of the estimated volume of all shrubs	V=∑1/3×A×height	m³

Show Code

shrubsv <- read_csv("data/raw.data.shrub.csv")

shrubsv <- shrubsv %>% 
  mutate(
    Cluster = as.character(Cluster),
    Plot = as.character(Plot)
  )

# Create full grid of all 160 plots
all_plots_grid <- expand_grid(
  Cluster = as.character(1:16), 
  Plot = as.character(1:10)
)

# Calculate individual shrub metrics
shrub_df <- shrubsv %>%
  mutate(
    crown_area_m2 = pi * (shlength / 2) * (shwidth / 2),
    volume_m3 = (1/3) * crown_area_m2 * shheight
  )

#  Plot-Level Summary (n=160)
shrub_plot_summary <- shrub_df %>%
  group_by(Cluster, Plot) %>%
  summarise(
    n_shrubs = n(),
    total_cover_m2 = sum(crown_area_m2, na.rm = TRUE),
    total_vol_m3 = sum(volume_m3, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
 
  right_join(all_plots_grid, by = c("Cluster", "Plot")) %>%
  mutate(across(where(is.numeric), ~replace_na(., 0))) %>%
  mutate(
    cover_percent.sh = (total_cover_m2 / 1000) * 100,
    volume_ha.sh = total_vol_m3 * 10,
    density_ha.sh = n_shrubs * 10
  )

# Cluster-Level Summary (n=16)
shrub.cluster_summary <- shrub_plot_summary %>%
  group_by(Cluster) %>%
  summarise(
    SH.C = mean(cover_percent.sh),
    SH.V = mean(volume_ha.sh),
    .groups = 'drop'
  ) %>%
  arrange(as.integer(Cluster))

shrub.volume.cover<- shrub.cluster_summary %>% 
 dplyr::select(Cluster, SH.C, SH.V)

head(shrub.volume.cover)

# A tibble: 6 × 3
  Cluster   SH.C   SH.V
  <chr>    <dbl>  <dbl>
1 1        8.97   591. 
2 2        4.87   339. 
3 3       51.5   3298. 
4 4        9.89   655. 
5 5        8.85   460. 
6 6        0.242   10.3

Tree volume, cover, biomass (PLOT LEVEL)


Metric	Description	Formula	Units

Total Basal Area	The sum of the cross-sectional area of all tree trunks	A=∑[π×(dbh_cm/100)²/4 ]	m²

Total Volume	The sum of the estimated volume of all trees	V=∑A×height×0.5	m³

Total Cover	The sum of the estimated canopy area of all trees	A=∑π×[(dbh_cm/100)]/2)	m²

Show Code

treesv <- read_csv("data/raw.data.trees.csv")
treesv <- treesv %>% mutate(Cluster = as.character(Cluster))


all_clusters <- tibble(Cluster = as.character(1:16))
tree_df_prepared <- treesv %>%
  mutate(
    dbh_cm = trcircumf / pi,
    plot_num = as.integer(Plot),
    cluster_num = as.integer(Cluster),
    Cluster = as.character(Cluster),
    Plot = as.character(Plot)
  )

# Calculate metrics per tree
tree_metrics <- tree_df_prepared %>%
  mutate(
    basal_area_m2 = (pi * (dbh_cm / 100)^2) / 4,
    volume_m3 = basal_area_m2 * trheight * 0.5,
    crown_cover_m2 = pi * ((dbh_cm / 100) / 2)^2
  )

# Summarise metrics per cluster 
tree_cluster_summary <- tree_metrics %>%
  group_by(Cluster) %>%
  summarise(
    total_basal_area.tr = sum(basal_area_m2, na.rm = TRUE),
    total_volume.tr = sum(volume_m3, na.rm = TRUE),
    total_cover.tr = sum(crown_cover_m2, na.rm = TRUE),
    n_trees = n(),
    .groups = 'drop'
  ) %>%
  mutate(
    basal_area_ha.tr = total_basal_area.tr / 100,
    volume_ha.tr = total_volume.tr / 100,
    density_ha.tr = n_trees / 100,
    cover_percent.tr = (total_cover.tr / 1e6) * 100
  )

# Join with full cluster list and replace NA with 0
tree.cluster_summary <- all_clusters %>%
  left_join(tree_cluster_summary, by = "Cluster") %>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


tree_reps <- tree_metrics %>%
  group_by(Cluster, Plot) %>%
  summarise(
    total_ba = sum(basal_area_m2, na.rm = TRUE),
    total_vol = sum(volume_m3, na.rm = TRUE),
    total_cov = sum(crown_cover_m2, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    basal_area_ha.tr = total_ba * 10,  #
    volume_ha.tr = total_vol * 10,
    cover_percent.tr = (total_cov / 1000) * 100
  ) %>%
  right_join(expand_grid(Cluster = as.character(1:16), Plot = as.character(1:10)), 
             by = c("Cluster", "Plot")) %>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


# Calculate Mean and Standard Error for each cluster
tree.cluster_summary <- tree_reps %>%
  group_by(Cluster) %>%
  summarise(
    # Mean values per hectare
    Mean_BA_ha = mean(basal_area_ha.tr, na.rm = TRUE),
    Mean_Vol_ha = mean(volume_ha.tr, na.rm = TRUE),
    Mean_Cov_pct = mean(cover_percent.tr, na.rm = TRUE),
    
    # Standard Error 
    SE_BA_ha = sd(basal_area_ha.tr, na.rm = TRUE) / sqrt(n()),
    SE_Vol_ha = sd(volume_ha.tr, na.rm = TRUE) / sqrt(n()),
    SE_Cov_pct = sd(cover_percent.tr, na.rm = TRUE) / sqrt(n()),
    
    .groups = 'drop'
  ) %>%
  arrange(as.numeric(Cluster))

head(tree.cluster_summary)

# A tibble: 6 × 7
  Cluster Mean_BA_ha Mean_Vol_ha Mean_Cov_pct SE_BA_ha SE_Vol_ha SE_Cov_pct
  <chr>        <dbl>       <dbl>        <dbl>    <dbl>     <dbl>      <dbl>
1 1          6.50        24.4       0.0650     3.43     14.6      0.0343   
2 2          1.08         2.16      0.0108     0.523     1.02     0.00523  
3 3          2.29         6.83      0.0229     0.691     2.56     0.00691  
4 4          1.71         5.29      0.0171     1.01      2.96     0.0101   
5 5          5.87        12.1       0.0587     2.66      5.57     0.0266   
6 6          0.00796      0.0139    0.0000796  0.00538   0.00925  0.0000538

Show Code

colnames(tree.cluster_summary)

[1] "Cluster"      "Mean_BA_ha"   "Mean_Vol_ha"  "Mean_Cov_pct" "SE_BA_ha"    
[6] "SE_Vol_ha"    "SE_Cov_pct"

Show Code

tree.cluster_summary<-  tree.cluster_summary %>% dplyr:: select(Cluster,  Mean_BA_ha, Mean_Vol_ha, Mean_Cov_pct)
tree.vol.cov.area <-  tree.cluster_summary %>% rename(TR.A= Mean_BA_ha, TR.V=Mean_Vol_ha, TR.C = Mean_Cov_pct)
head (tree.vol.cov.area)

# A tibble: 6 × 4
  Cluster    TR.A    TR.V      TR.C
  <chr>     <dbl>   <dbl>     <dbl>
1 1       6.50    24.4    0.0650   
2 2       1.08     2.16   0.0108   
3 3       2.29     6.83   0.0229   
4 4       1.71     5.29   0.0171   
5 5       5.87    12.1    0.0587   
6 6       0.00796  0.0139 0.0000796

Shrub and tree statistics at plot level

I used non-parametric Kruskal-Wallis tests to assess spatial differences in shrub and tree cover and volume across 16 clusters, following a significant deviation from normality in the data (Shapiro-Wilk, p < 0.001). The distributions were strongly right-skewed and contained zero values corresponding to plots without woody vegetation. I defined statistical significance for all models at α = 0.05.

Show Code

#TEST ASSUMPTIONS
shrub_norm <- shapiro_test(residuals(lm(volume_ha.sh ~ Cluster, data = shrub_plot_summary)))

tree_norm <- shapiro_test(residuals(lm(volume_ha.tr ~ Cluster, data = tree_reps)))

shrub_var <- leveneTest(volume_ha.sh ~ Cluster, data = shrub_plot_summary)
tree_var <- leveneTest(volume_ha.tr ~ Cluster, data = tree_reps)

print(shrub_norm)

# A tibble: 1 × 3
  variable                                                    statistic  p.value
  <chr>                                                           <dbl>    <dbl>
1 residuals(lm(volume_ha.sh ~ Cluster, data = shrub_plot_sum…     0.273 7.88e-25

Show Code

print(tree_norm)

# A tibble: 1 × 3
  variable                                                statistic  p.value
  <chr>                                                       <dbl>    <dbl>
1 residuals(lm(volume_ha.tr ~ Cluster, data = tree_reps))     0.563 6.31e-20

Show Code

#  KRUSKAL-WALLIS

# --- SHRUBS: Volume and Cover ---
shrub_stats <- shrub_plot_summary %>%
  dplyr:: select(Cluster, cover_percent.sh, volume_ha.sh) %>%
  pivot_longer(cols = c(cover_percent.sh, volume_ha.sh), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ Cluster) %>%
  add_significance()

# --- TREES: Basal Area, Volume, and Cover ---
tree_stats <- tree_reps %>%
  dplyr:: select(Cluster, basal_area_ha.tr, volume_ha.tr, cover_percent.tr) %>%
  pivot_longer(cols = c(basal_area_ha.tr, volume_ha.tr, cover_percent.tr), 
               names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ Cluster) %>%
  add_significance()

# Combine results
woody_spatial_stats <- bind_rows(shrub_stats, tree_stats)
print(woody_spatial_stats)

# A tibble: 5 × 8
  Variable         .y.       n statistic    df          p method        p.signif
  <chr>            <chr> <int>     <dbl> <int>      <dbl> <chr>         <chr>   
1 cover_percent.sh Value   160      53.8    15 0.00000287 Kruskal-Wall… ****    
2 volume_ha.sh     Value   160      54.1    15 0.00000254 Kruskal-Wall… ****    
3 basal_area_ha.tr Value   160      45.6    15 0.0000616  Kruskal-Wall… ****    
4 cover_percent.tr Value   160      45.6    15 0.0000616  Kruskal-Wall… ****    
5 volume_ha.tr     Value   160      45.2    15 0.0000708  Kruskal-Wall… ****

✅ Interpretation: The Kruskal-Wallis tests revealed statistically significant spatial variation across all five measured woody vegetation metrics: shrub metrics (percentage cover and stem volume per hectare) and tree metrics (basal area per hectare, stem volume per hectare, and percentage cover) (p < 0.001) (Table 5).

Point count

Birds metacommunity and alpha diversity

Same diversity measures as above. Metacommunity, alpha diversity, beta and gamma diverisity.

Show Code

birds.data <- read_csv("data/point.counts.csv") %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
  filter(!species %in% c("N", "NA"))%>%
  dplyr:: select(-season)
head(birds.data)

# A tibble: 6 × 97
  species         p1    p2    p3    p4    p5    p6    p7    p8    p9   p10   p11
  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Abyssinian …     0     0     0     0     0     0     0     0     0     0     0
2 African Gra…     0     0     0     0     0     0     0     0     0     0     0
3 African Gre…     0     1     0     0     0     0     0     0     0     0     0
4 African Pie…     1     1     0     0     1     0     0     0     0     0     0
5 African Pip…     0     0     0     0     0     0     0     0     0     0     0
6 African Swi…     0     0     0     0     0     0     0     0     0     0     0
# ℹ 85 more variables: p12 <dbl>, p13 <dbl>, p14 <dbl>, p15 <dbl>, p16 <dbl>,
#   p17 <dbl>, p18 <dbl>, p19 <dbl>, p20 <dbl>, p21 <dbl>, p22 <dbl>,
#   p23 <dbl>, p24 <dbl>, p25 <dbl>, p26 <dbl>, p27 <dbl>, p28 <dbl>,
#   p29 <dbl>, p30 <dbl>, p31 <dbl>, p32 <dbl>, p33 <dbl>, p34 <dbl>,
#   p35 <dbl>, p36 <dbl>, p37 <dbl>, p38 <dbl>, p39 <dbl>, p40 <dbl>,
#   p41 <dbl>, p42 <dbl>, p43 <dbl>, p44 <dbl>, p45 <dbl>, p46 <dbl>,
#   p47 <dbl>, p48 <dbl>, p49 <dbl>, p50 <dbl>, p51 <dbl>, p52 <dbl>, …

Show Code

birds.data.MC <- birds.data %>% 
pivot_longer(cols= !species, names_to = "cluster", values_to = "freq") %>%
  # filter(rowSums(across(where(is.numeric)))!=0) %>%
  pivot_wider(names_from = species, values_from = freq, values_fn = sum) %>%
  replace(is.na(.), 0) %>% 
  mutate(cluster=paste0(rep(LETTERS[3], 6), rep(1:16, each = 6))) %>% #for 6 point count in each cluster
  relocate(cluster) %>% 
  pivot_longer(!cluster, names_to = "species", values_to = "freq") %>% 
  group_by(cluster, species) %>% 
  summarise_if(is.numeric, ~ sum(.)) %>% 
  pivot_wider(names_from = cluster, values_from = freq)%>% 
  relocate(C10:C16, .after = C9)


mc.birds <- MetaCommunity(birds.data.MC)
summary(mc.birds)

Meta-community (class 'MetaCommunity') made of 14335 individuals in 16 
communities and 265 species. 

Its sample coverage is 0.997419167541811 

Community weights are: 
    C1     C2     C3     C4     C5     C6     C7     C8     C9    C10    C11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   C12    C13    C14    C15    C16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
 924  942  870 1042  789  806  877 1010  947 1073  540  702  795  791  999 1228 
Community sample coverages are: 
       C1        C2        C3        C4        C5        C6        C7        C8 
0.9459155 0.9469642 0.9644207 0.9530211 0.9531630 0.9529121 0.9635587 0.9653700 
       C9       C10       C11       C12       C13       C14       C15       C16 
0.9599044 0.9590369 0.9279492 0.9473745 0.9497487 0.9646624 0.9650010 0.9698843

Show Code

alpha.birds.0 <- DivPart(q=0, mc.birds, Correction = "Best")
alpha.birds.1 <- DivPart(q=1, mc.birds, Correction = "Best")
alpha.birds.2 <- DivPart(q=2, mc.birds, Correction = "Best")


alphas.birds <- data.frame(
  q0b = alpha.birds.0$CommunityAlphaDiversities,
  q1b = alpha.birds.1$CommunityAlphaDiversities,
  q2b = alpha.birds.2$CommunityAlphaDiversities
) %>%
  rownames_to_column("cluster") %>%
  mutate(D.eveb = q2b / q0b,
         cluster = factor(cluster, levels = paste0("C", 1:16))) %>%
  arrange(cluster)

 head(alphas.birds)

  cluster q0b      q1b      q2b    D.eveb
1      C1 138 71.06855 45.20682 0.3275857
2      C2 141 68.15027 42.95914 0.3046748
3      C3 117 61.92999 39.16891 0.3347770
4      C4 137 61.93041 40.05031 0.2923381
5      C5 118 63.86513 44.72455 0.3790216
6      C6 117 59.57104 39.46756 0.3373296

Show Code

 dp.birds<-DivProfile(seq(0,2,1), mc.birds, Correction = "Best", NumberOfSimulations = 10)

Show Code

plot(dp.birds)

Birds spatial statistics

Show Code

birds_stations <- birds.data %>%
  # pivot_longer 
  pivot_longer(cols = !species, names_to = "station", values_to = "freq") %>%
  group_by(station, species) %>%
  summarise(freq = sum(freq, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = freq, values_fill = 0) %>%
  # Assign 16 clusters with 6 stations each (ensure your data rows match 16 * 6 = 96)
  mutate(cluster = factor(paste0("C", rep(1:16, each = 6)), 
                          levels = paste0("C", 1:16))) %>%
  relocate(cluster, station)

#  Meta-Community & Alpha Diversity at Station Level
# Transpose for entropart: species as rows, stations as columns
mc_birds_stations <- birds_stations %>%
  ungroup() %>%
  dplyr:: select(-cluster) %>%
  column_to_rownames("station") %>%
  t() %>%
  as.data.frame() %>%
  MetaCommunity()

# Hill Numbers (q=0, 1, 2) per station
q0_birds <- DivPart(q=0, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities
q1_birds <- DivPart(q=1, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities
q2_birds <- DivPart(q=2, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities

# Create diversity dataframe for testing
birds_div_stats <- birds_stations %>%
  dplyr:: select(cluster, station) %>%
  mutate(q0.b = q0_birds, q1.b = q1_birds, q2.b = q2_birds)

# PERMANOVA (Community Composition)
# Does the species mix differ significantly between clusters?
bird_species_matrix <- birds_stations %>% ungroup() %>% dplyr:: select(-cluster, -station)

birds_permanova <- adonis2(bird_species_matrix ~ cluster, 
                           data = birds_stations, 
                           method = "bray", 
                           permutations = 999)
print(birds_permanova)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = bird_species_matrix ~ cluster, data = birds_stations, permutations = 999, method = "bray")
         Df SumOfSqs      R2      F Pr(>F)    
Model    15   4.5142 0.36384 3.0503  0.001 ***
Residual 80   7.8929 0.63616                  
Total    95  12.4071 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Normality and Variance Tests
birds_shapiro <- birds_div_stats %>%
  pivot_longer(cols = c(q0.b, q1.b, q2.b), names_to = "q_order", values_to = "value") %>%
  group_by(q_order) %>%
  shapiro_test(value)

levene_birds <- leveneTest(q1.b ~ cluster, data = birds_div_stats)

print(birds_shapiro)

# A tibble: 3 × 4
  q_order variable statistic      p
  <chr>   <chr>        <dbl>  <dbl>
1 q0.b    value        0.984 0.306 
2 q1.b    value        0.981 0.164 
3 q2.b    value        0.976 0.0771

Show Code

print(levene_birds)

Levene's Test for Homogeneity of Variance (center = median)
      Df F value  Pr(>F)  
group 15  2.0879 0.01873 *
      80                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# ANOVA and Kruskal-Wallis
# Running both to ensure consistency, check if normality is violated
kruskal_birds <- list(
  q0 = kruskal.test(q0.b ~ cluster, data = birds_div_stats),
  q1 = kruskal.test(q1.b ~ cluster, data = birds_div_stats),
  q2 = kruskal.test(q2.b ~ cluster, data = birds_div_stats)
)

print(kruskal_birds)

$q0

    Kruskal-Wallis rank sum test

data:  q0.b by cluster
Kruskal-Wallis chi-squared = 28.721, df = 15, p-value = 0.01747


$q1

    Kruskal-Wallis rank sum test

data:  q1.b by cluster
Kruskal-Wallis chi-squared = 17.637, df = 15, p-value = 0.2823


$q2

    Kruskal-Wallis rank sum test

data:  q2.b by cluster
Kruskal-Wallis chi-squared = 20.733, df = 15, p-value = 0.1456

Show Code

# ANOVA MODELS ---
aov_q0.b <- aov(q0.b ~ cluster, data =birds_div_stats)
aov_q1.b <- aov(q1.b ~ cluster, data = birds_div_stats)
aov_q2.b <- aov(q2.b ~ cluster, data = birds_div_stats)


summary(aov_q0.b)

            Df Sum Sq Mean Sq F value  Pr(>F)   
cluster     15   1922  128.10    2.64 0.00276 **
Residuals   80   3882   48.53                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q1.b)

            Df Sum Sq Mean Sq F value Pr(>F)
cluster     15  544.8   36.32   1.469  0.137
Residuals   80 1977.7   24.72

Show Code

summary(aov_q2.b)

            Df Sum Sq Mean Sq F value Pr(>F)  
cluster     15  385.7   25.71   1.594 0.0942 .
Residuals   80 1290.8   16.14                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

✅ Interpretation: One-way ANOVA showed that species richness (q = 0) differed significantly among the 16 clusters, whereas Shannon diversity (q = 1) and Simpson diversity (q = 2) did not show significant spatial differences (Table 6).

Birds beta diversity

Show Code

birds.data.MC %>% 
  pivot_longer(cols = !species, names_to = "cluster", values_to = "freq") %>% 
  mutate(freq = as.integer(freq)) %>% 
  pivot_wider(names_from = cluster, values_from = freq, values_fn = sum) -> MC.beta.birds

MC.beta_freq_birds <- MC.beta.birds %>%
  mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, 0)))

# Jaccard Pairwise Dissimilarity
birds_beta_jac <- beta.pair(t(MC.beta_freq_birds[,-1]), index.family = "jac")

# Abundance-based Beta Diversity Partitioning (Bray-Curtis)
beta.part.birds <- beta.multi.abund(t(MC.beta.birds[,-1]), index.family = "bray")
print(beta.part.birds)

$beta.BRAY.BAL
[1] 0.7058388

$beta.BRAY.GRA
[1] 0.0634806

$beta.BRAY
[1] 0.7693194

Show Code

#  SPECIES RANK ABUNDANCE CURVES ---
birds.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  print(n = 5)

# A tibble: 265 × 17
  species         C1    C2    C3    C4    C5    C6    C7    C8    C9   C10   C11
  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Common Bulb…    40    55    74    72    40    18    56    57    59    60    45
2 Ring-necked…    54    46    35    42    39    36    39    41    46    49    19
3 Emerald-spo…    21    40    48    39    21    16    41    40    67    36    10
4 Slate-color…    34    41    31    32    27    38    34    43    57    21    16
5 Northern Fi…    60    37    22    48    22    55    41    39    28    57    20
# ℹ 260 more rows
# ℹ 5 more variables: C12 <dbl>, C13 <dbl>, C14 <dbl>, C15 <dbl>, C16 <dbl>

Show Code

# Generate the Rank-Abundance (Whittaker) Plot
birds.data.MC %>% 
  pivot_longer(!species, names_to = "cluster", values_to = "freq") %>% 
  filter(freq > 0) %>% 
  group_by(cluster) %>% 
  # Create ranks within each cluster
  mutate(rank = rank(desc(freq), ties.method = "first")) %>% 
  ggplot(aes(x = rank, y = log(freq + 1), color = cluster)) +
  geom_line(size = 1) + 
  geom_point(size = 2) +
  labs(title = "Bird Species Rank-Abundance Curves per Cluster",
       subtitle = "Log-transformed frequency vs. Species Rank",
       x = "Rank (Most to least abundant)",
       y = "Log(Frequency + 1)") +
  theme_bw() +
  facet_wrap(~ cluster) +
  theme(legend.position = "none")

Remote sensing

Summary of the remote sensing section in a flow chart image

Remote sensing with lag

Show Code

rs.data.monthly <- read_csv("data/S2_CHIRPS_Per_Tile_Monthly_2019_2026.csv") %>%
  dplyr::select(fid, year, month, precip, NDVI, SAVI, BSI) %>%
  mutate(across(c(NDVI, SAVI, BSI), ~na_if(., -9999))) %>%
  arrange(fid, year, month)
indices <- c("NDVI", "SAVI", "BSI")

add_stars <- function(p) {
  if (p <= 0.001) return("***")
  else if (p <= 0.01) return("**")
  else if (p <= 0.05) return("*")
  else return("")
}

#  Mann-Kendall & Sen's slope analysis (Landscape Level)
mk_results_landscape <- lapply(indices, function(idx) {
  df <- na.omit(data.frame(
    values =rs.data.monthly[[idx]],
    years = rs.data.monthly$year + (rs.data.monthly$month - 1)/12
  ))
  
  mk  <- MannKendall(df$values)
  sen <- sens.slope(df$values)
  
  data.frame(
    Index    = idx,
    Tau      = as.numeric(mk$tau),
    Pval     = as.numeric(mk$sl),
    SenSlope = as.numeric(sen$estimates)
  )
}) %>% bind_rows()

mk_results_landscape$Signif <- sapply(mk_results_landscape$Pval, add_stars)

head (mk_results_landscape)

  Index         Tau       Pval      SenSlope Signif
1  NDVI -0.03348902 0.06410569 -1.531667e-05       
2  SAVI -0.03349334 0.06407139 -2.296460e-05       
3   BSI  0.01162767 0.52035522  3.365126e-06

Show Code

# CORRELATION ANALYSIS
cor_matrix <- cor(rs.data.monthly[,indices], use="pairwise.complete.obs")
melted_cor <- melt(cor_matrix)

# RAINFALL LAGS
rs.data.monthly <- rs.data.monthly %>%
  group_by(fid) %>%
  mutate(  precip_lag1 = lag(precip,1),   precip_lag2 = lag(precip,2)) %>%
  ungroup()

#  BAYESIAN MIXED MODELS
rs.model.data <- rs.data.monthly %>%
  drop_na(NDVI,SAVI,BSI,precip,precip_lag1,precip_lag2) %>%
  mutate(across(starts_with("precip"),  ~as.numeric(scale(.)),.names="{.col}_s"))

fit_NDVI <- brm(NDVI ~ precip_s + precip_lag1_s + precip_lag2_s + (1|fid),
  data = rs.model.data, cores = 4,  iter = 2000,  control = list(adapt_delta = 0.95))

fit_SAVI <- brm(SAVI ~ precip_s + precip_lag1_s + precip_lag2_s + (1|fid),
  data = rs.model.data,  cores = 4,  iter = 2000)

fit_BSI <- brm(BSI ~ precip_s + precip_lag1_s + precip_lag2_s + (1|fid),
  data = rs.model.data,  cores = 4,  iter = 2000)
summary(fit_NDVI)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: NDVI ~ precip_s + precip_lag1_s + precip_lag2_s + (1 | fid) 
   Data: rs.model.data (Number of observations: 1330) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~fid (Number of levels: 16) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.01      0.01     0.00     0.02 1.01     1164     1212

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept         0.45      0.00     0.45     0.46 1.00     2075     2547
precip_s          0.02      0.00     0.01     0.03 1.00     4202     3237
precip_lag1_s    -0.01      0.00    -0.02    -0.00 1.00     4116     3454
precip_lag2_s     0.03      0.00     0.03     0.04 1.00     6906     2979

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.12      0.00     0.12     0.13 1.00     4375     2935

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Show Code

summary(fit_SAVI)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: SAVI ~ precip_s + precip_lag1_s + precip_lag2_s + (1 | fid) 
   Data: rs.model.data (Number of observations: 1330) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~fid (Number of levels: 16) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.02      0.01     0.00     0.04 1.00     1117     1190

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept         0.68      0.01     0.67     0.70 1.00     2814     2432
precip_s          0.03      0.01     0.02     0.05 1.00     4527     3012
precip_lag1_s    -0.02      0.01    -0.03    -0.00 1.00     4471     2751
precip_lag2_s     0.05      0.01     0.04     0.06 1.00     7829     2389

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.18      0.00     0.18     0.19 1.00     5872     2897

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Show Code

summary(fit_BSI)

 Family: gaussian 
  Links: mu = identity; sigma = identity 
Formula: BSI ~ precip_s + precip_lag1_s + precip_lag2_s + (1 | fid) 
   Data: rs.model.data (Number of observations: 1330) 
  Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
         total post-warmup draws = 4000

Multilevel Hyperparameters:
~fid (Number of levels: 16) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.02      0.00     0.01     0.03 1.00     1418     1698

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept        -0.02      0.00    -0.03    -0.02 1.00     1237     1627
precip_s         -0.01      0.00    -0.02    -0.01 1.00     6774     2994
precip_lag1_s     0.01      0.00     0.01     0.02 1.00     6695     3329
precip_lag2_s    -0.03      0.00    -0.03    -0.02 1.00     5136     2201

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.07      0.00     0.06     0.07 1.00     5529     2898

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).

Show Code

# RAIN LAG RESPONSE
p1 <- plot(conditional_effects(fit_NDVI,"precip_lag2_s"),plot=FALSE)[[1]] +
  labs(x="Lag 2 Rainfall",y="NDVI")+theme_minimal()

p2 <- plot(conditional_effects(fit_SAVI,"precip_lag2_s"),plot=FALSE)[[1]] +
  labs(x="Lag 2 Rainfall",y="SAVI")+theme_minimal()

p3 <- plot(conditional_effects(fit_BSI,"precip_lag2_s"),plot=FALSE)[[1]] +
  labs(x="Lag 2 Rainfall",y="BSI")+theme_minimal()

combined_marginal_plot <- (p1|p2|p3)
print(combined_marginal_plot)

Show Code

#  RESTREND RESIDUAL EXTRACTION
rs.model.data <- rs.model.data %>%
  mutate(
    resid_NDVI = residuals(fit_NDVI)[,"Estimate"],
    resid_SAVI = residuals(fit_SAVI)[,"Estimate"],
    resid_BSI  = residuals(fit_BSI)[,"Estimate"]
  )

# We average the Bayesian residuals across all 16 tiles for each month
rs.resid_landscape <- rs.model.data %>%
  dplyr:: select(year, month, resid_NDVI, resid_SAVI, resid_BSI) %>%
  group_by(year, month) %>%
  summarise(
    across(starts_with("resid_"), ~mean(., na.rm = TRUE)),
    .groups = "drop"
  ) %>%
  arrange(year, month) # Ensure chronological order

indices_resid <- c("resid_NDVI", "resid_SAVI", "resid_BSI")

#SIGNIFICANCE STAR FUNCTION (Clean Version)
add_stars <- function(p) {
  if (p <= 0.001) return("***")
  else if (p <= 0.01) return("**")
  else if (p <= 0.05) return("*")
  else return("")
}

#  MANN-KENDALL ON LANDSCAPE RESIDUALS
mk_resid_results <- lapply(indices_resid, function(idx) {
  
  # Extract values for the landscape average
  vals <- na.omit(rs.resid_landscape[[idx]])
  
  mk  <- MannKendall(vals)
  sen <- sens.slope(vals)
  
  data.frame(
    Index = gsub("resid_", "", idx),
    Tau = as.numeric(mk$tau),
    Pval = as.numeric(mk$sl),
    SenSlope = as.numeric(sen$estimates),
    Significance = add_stars(mk$sl)
  )
}) %>% bind_rows()

#  OUTPUT CLEAN TABLE
kable(mk_resid_results, 
      digits = 5,
      caption = "Table X: Rainfall-Adjusted Residual Trends (RESTREND) for the Overall Landscape")

Table X: Rainfall-Adjusted Residual Trends (RESTREND) for the Overall Landscape
Index	Tau	Pval	SenSlope	Significance
NDVI	-0.41480	0.00000	-0.00297	***
SAVI	-0.41365	0.00000	-0.00445	***
BSI	0.00631	0.93534	0.00001

Show Code

# --- . MONTHLY VEGETATION TREND VISUALISATION ---
rs.model.data <- rs.model.data %>%
  mutate(time = year + (month-1)/12)

monthly_long <- rs.model.data %>%
  select(time, NDVI, SAVI, BSI) %>%
  pivot_longer(-time, names_to="Index", values_to="Value") %>%
  drop_na()

sen_slopes <- monthly_long %>%
  group_by(Index) %>%
  summarise(SenSlope = sens.slope(Value)$estimates, .groups="drop") %>%
  mutate(Label = paste0(Index, " (Sen=", signif(SenSlope, 3), ")"))

monthly_long <- left_join(monthly_long, sen_slopes, by="Index")

combined_plot <- ggplot(monthly_long, aes(time, Value, color=Label)) +
  geom_line(alpha=0.8) +
  geom_smooth(method="lm", se=FALSE, linetype="dashed") +
  scale_color_manual(values=c("darkgreen", "darkblue", "darkred")) +
  theme_minimal() + # Fixed: removed double ++
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    axis.line = element_line(color = "black")
  ) + # Fixed: added missing +
  labs(x="Year", y="Index Value", color="") +
  scale_x_continuous(breaks = 2019:2026, labels = 2019:2026)

print(combined_plot)

Show Code

#  RESTREND RESIDUAL TREND PLOT ---
residual_long <- rs.model.data %>%
  select(time, resid_NDVI, resid_SAVI, resid_BSI) %>%
  pivot_longer(-time, names_to="Index", values_to="Value") %>%
  mutate(Index = gsub("resid_", "", Index)) %>%
  drop_na()

sen_resid <- residual_long %>%
  group_by(Index) %>%
  summarise(SenSlope = sens.slope(Value)$estimates, .groups="drop") %>%
  mutate(Label = paste0(Index, " (Sen=", signif(SenSlope, 3), ")"))

residual_long <- left_join(residual_long, sen_resid, by="Index")

residual_plot <- ggplot(residual_long, aes(time, Value, color=Label)) +
  geom_line(alpha=0.8) +
  geom_smooth(method="lm", se=FALSE, linetype="dashed") +
  scale_color_manual(values=c("darkgreen", "darkblue", "darkred")) +
  theme_minimal() +
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    axis.line = element_line(color = "black")
  ) +
  labs(x="Year", y="Residual Index", color="") +
  scale_x_continuous(breaks = 2019:2026, labels = 2019:2026)

residual_plot

Show Code

head(rs.model.data)

# A tibble: 6 × 16
    fid  year month precip  NDVI  SAVI      BSI precip_lag1 precip_lag2 precip_s
  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <dbl>       <dbl>    <dbl>
1     1  2019     3   81.2 0.514 0.771 0.0114          29.6        35.1    0.136
2     1  2019     4  171.  0.403 0.604 0.129           81.2        29.6    0.194
3     1  2019     5   95.5 0.405 0.608 0.126          171.         81.2    0.145
4     1  2019     6  176.  0.474 0.711 0.0347          95.5       171.     0.197
5     1  2019     7   51.7 0.572 0.857 0.000496       176.         95.5    0.117
6     1  2019     8  106.  0.375 0.563 0.124           51.7       176.     0.152
# ℹ 6 more variables: precip_lag1_s <dbl>, precip_lag2_s <dbl>,
#   resid_NDVI <dbl>, resid_SAVI <dbl>, resid_BSI <dbl>, time <dbl>

Show Code

#  SPATIAL TILE ANALYSIS
spatial_tile_results <- rs.model.data %>%
  group_by(fid) %>%
  summarise(
    NDVI_Slope = sens.slope(resid_NDVI)$estimates,
    NDVI_Pval  = MannKendall(resid_NDVI)$sl,
    SAVI_Slope = sens.slope(resid_SAVI)$estimates,
    SAVI_Pval  = MannKendall(resid_SAVI)$sl,
    BSI_Slope  = sens.slope(resid_BSI)$estimates,
    BSI_Pval   = MannKendall(resid_BSI)$sl,
    .groups="drop"
  )
spatial_tile_results

# A tibble: 16 × 7
     fid NDVI_Slope     NDVI_Pval SAVI_Slope     SAVI_Pval  BSI_Slope BSI_Pval
   <dbl>      <dbl>         <dbl>      <dbl>         <dbl>      <dbl>    <dbl>
 1     1   -0.00300 0.000000218     -0.00453 0.000000184    0.0000952   0.813 
 2     2   -0.00319 0.0000000245    -0.00482 0.0000000294   0.000303    0.413 
 3     3   -0.00320 0.000000169     -0.00486 0.000000137    0.000215    0.555 
 4     4   -0.00313 0.0000000113    -0.00471 0.0000000103   0.000257    0.333 
 5     5   -0.00311 0.0000000108    -0.00468 0.00000000894  0.0000679   0.801 
 6     6   -0.00241 0.00000287      -0.00364 0.00000310    -0.000630    0.0344
 7     7   -0.00309 0.0000000108    -0.00464 0.0000000118  -0.0000379   0.887 
 8     8   -0.00297 0.0000000268    -0.00449 0.0000000187  -0.0000178   0.944 
 9     9   -0.00309 0.000000137     -0.00456 0.000000209    0.000206    0.577 
10    10   -0.00324 0.0000000214    -0.00486 0.0000000178   0.000132    0.604 
11    11   -0.00320 0.000000188     -0.00478 0.000000261    0.000150    0.662 
12    12   -0.00304 0.0000000257    -0.00451 0.0000000351  -0.0000478   0.900 
13    13   -0.00303 0.0000000307    -0.00456 0.0000000294   0.000109    0.807 
14    14   -0.00292 0.000000162     -0.00436 0.000000131   -0.0000573   0.919 
15    15   -0.00317 0.00000000153   -0.00470 0.00000000146  0.000169    0.464 
16    16   -0.00273 0.000000444     -0.00412 0.000000426   -0.000245    0.509

Show Code

#  SUMMARY STATISTICS
summary_stats <- spatial_tile_results %>%
  mutate(
    Trend_Status = case_when(
      NDVI_Slope < 0 & NDVI_Pval <= 0.05 ~ "Degrading",
      NDVI_Slope > 0 & NDVI_Pval <= 0.05 ~ "Improving",
      TRUE ~ "Stable"
    )
  ) %>%
  count(Trend_Status) %>%
  mutate(Percentage = 100*n/sum(n))

avg_slope <- mean(spatial_tile_results$NDVI_Slope,na.rm=TRUE)
annual_change_rate <- avg_slope*12

r2_ndvi <- bayes_R2(fit_NDVI)
r2_bsi  <- bayes_R2(fit_BSI)
r2_savi <- bayes_R2(fit_SAVI)
r2_ndvi

     Estimate  Est.Error       Q2.5     Q97.5
R2 0.09722818 0.01499669 0.06874767 0.1274214

Show Code

r2_bsi

    Estimate  Est.Error      Q2.5    Q97.5
R2 0.2000514 0.01828188 0.1641947 0.235359

Show Code

r2_savi

     Estimate  Est.Error      Q2.5   Q97.5
R2 0.09751322 0.01489215 0.0688776 0.12752

Show Code

print(summary_stats)

# A tibble: 1 × 3
  Trend_Status     n Percentage
  <chr>        <int>      <dbl>
1 Degrading       16        100

Show Code

print(paste("Average Annual Residual NDVI Change:",round(annual_change_rate,4)))

[1] "Average Annual Residual NDVI Change: -0.0364"

Show Code

print(paste("NDVI Model R2:",round(r2_ndvi[1],3),"| BSI Model R2:",round(r2_bsi[1],3)))

[1] "NDVI Model R2: 0.097 | BSI Model R2: 0.2"

Show Code

#tile level measures for export to QGIS

spatial_means_csv <- rs.model.data %>%
group_by(fid) %>%
summarise(Mean_NDVI = mean(NDVI, na.rm = TRUE), Mean_SAVI = mean(SAVI, na.rm = TRUE),  Mean_BSI  = mean(BSI,  na.rm = TRUE),   .groups = "drop") %>%
  rename(Cluster = fid)

write_csv(  spatial_means_csv,"LDSF_Spatial_Means_2019_2026.csv")


spatial_trends_csv <- spatial_tile_results %>%
  dplyr::select(fid, R.S.NDVI = NDVI_Slope, R.S.BSI  = BSI_Slope, R.S.SAVI = SAVI_Slope
  )

write_csv(
  spatial_trends_csv,
  "LDSF_RESTREND_Slopes_2019_2026.csv"
)


# --- PREPARE DATA FOR HEAT MAP ---
#  pivot both slopes and p-values to long format
plot_slopes <- spatial_tile_results %>%
  select(fid, contains("slope")) %>%
  pivot_longer(-fid, names_to = "Index", values_to = "SenSlope") %>%
  mutate(Index = gsub("_slope", "", Index))

plot_pvals <- spatial_tile_results %>%
  select(fid, contains("_p")) %>%
  pivot_longer(-fid, names_to = "Index", values_to = "Pval") %>%
  mutate(Index = gsub("_p", "", Index))

# Join them and add grid logic + star labels
plot_data <- left_join(plot_slopes, plot_pvals, by = c("fid", "Index")) %>%
  mutate(
    plot_row = floor((fid - 1) / 4) + 1,
    plot_col = ((fid - 1) %% 4) + 1,
    # Logic for stars: Significant if p <= 0.05
    Stars = ifelse(Pval <= 0.05, "*", ""),
    # Label to show inside tile (FID and Stars)
    Label = paste0(fid, Stars)
  )


head(spatial_tile_results)

# A tibble: 6 × 7
    fid NDVI_Slope    NDVI_Pval SAVI_Slope     SAVI_Pval  BSI_Slope BSI_Pval
  <dbl>      <dbl>        <dbl>      <dbl>         <dbl>      <dbl>    <dbl>
1     1   -0.00300 0.000000218    -0.00453 0.000000184    0.0000952   0.813 
2     2   -0.00319 0.0000000245   -0.00482 0.0000000294   0.000303    0.413 
3     3   -0.00320 0.000000169    -0.00486 0.000000137    0.000215    0.555 
4     4   -0.00313 0.0000000113   -0.00471 0.0000000103   0.000257    0.333 
5     5   -0.00311 0.0000000108   -0.00468 0.00000000894  0.0000679   0.801 
6     6   -0.00241 0.00000287     -0.00364 0.00000310    -0.000630    0.0344

Remote Sensing Time Series Analysis for environmental monitoring, to evaluate vegetation health and soil characteristics across 16 (tiles) from 2019 to 2026, using monthly satellite-derived data. I focus on; NDVI (Normalized Difference Vegetation Index): Measures vegetation “greenness” or health. SAVI (Soil Adjusted Vegetation Index): Similar to NDVI but corrects for soil brightness in areas with sparse vegetation. BSI (Bare Soil Index): Used to identify areas with little to no vegetation (bare soil). Statistical Trend Analysis Mann-Kendall Test: Determines if there is a statistically significant monotonic upward or downward trend. Sen’s Slope: Estimates the magnitude of that trend. RESTREND Analysis (RESTREND stands for Residual Trend Analysis). It performs a linear regression between the indices and rainfall (CHIRPS data). It calculates the Residuals (the difference between what the vegetation should look like based on rain vs. what it actually looks like). If the residuals show a downward trend, it suggests land degradation is occurring due to human activity or other factors independent of rainfall. ##Vegetation Structure

Show Code

ldsf_raw <- read_csv("data/Mara_Vegstructure_2025(in).csv") %>%
  mutate(vegstructure = str_replace_all(vegstructure, "_", " ")) %>%
  mutate(vegstructure = str_to_sentence(vegstructure)) %>%
  mutate(vegstructure = factor(vegstructure, levels = c(
    "Grassland", "Wooded grassland", "Shrubland", 
    "Woodland", "Bushland", "Cropland"
  )))


structure_summary <- ldsf_raw %>%
  group_by(vegstructure) %>%
  summarise(Count = n(), .groups = "drop") %>%
  mutate(Percentage = (Count / sum(Count)) * 100) 

#  Bar Chart (Panel A)
p1 <- ggplot(structure_summary, aes(x = reorder(vegstructure, -Count), y = Count, fill = vegstructure)) +
  geom_bar(stat = "identity", color = "white", width = 0.7) +
  geom_text(aes(label = paste0(round(Percentage, 1), "%")), 
            vjust = -0.5, size = 3.5, fontface = "bold") +
  scale_fill_viridis_d(option = "viridis", direction = -1) + 
  theme_minimal() +
  theme(
    panel.grid.major.x = element_blank(),
    legend.position = "none",
    axis.title = element_text(size = 11, family = "calibri"), 
    axis.text = element_text(size = 10, color = "black"),
    axis.line.x = element_line(color = "black")
  ) +
  labs(x = "Land Cover Classification", y = "Number of Plots")

#  Load the QGIS Map (Panel B)
qgis_map <- image_read("Veg structure.png")
qgis_map_grob <- grid::rasterGrob(qgis_map)

#  Combine and Save Final Figure
combined_figure <- p1 + wrap_elements(qgis_map_grob) + 
  plot_layout(widths = c(1, 1.2)) + 
  plot_annotation(tag_levels = 'A')

ggsave("Figure1_Final_Submission.png", combined_figure, 
       width = 14, height = 6, dpi = 300, bg = "white")

Acoustics

Indices

I calculated a suite of standard acoustic indices using Kaleidoscope Pro software (Version 5.8.1; Wildlife Acoustics, Inc.), which provides specialised tools for processing and analysing environmental sound recordings (Bradfer-Lawrence et al., 2019; Greenhalgh et al., 2021; Hyland et al., 2023). I conducted the acoustic indices analysis using the Non-Bat Analysis Mode with default parameter settings to maintain methodological consistency and ensure replicability of results. I batch-processed the audio recordings, segmenting larger files to improve the granularity of the analysis. The software simultaneously calculated the following indices: the Acoustic Complexity Index (ACI), Acoustic Diversity Index (ADI), Acoustic Evenness Index (AEI), Bioacoustics Index (BI), Normalised Difference Soundscape Index (NDSI), and Entropy Index (H).

Show Code

#acoustic indices
acousticindex <- read_csv("data/acousticindex.csv")
head (acousticindex)

# A tibble: 6 × 16
  FOLDER   `IN FILE` CHANNEL OFFSET DURATION DATE  TIME   HOUR  MEAN    SD    SH
  <chr>    <chr>       <dbl>  <dbl>    <dbl> <chr> <tim> <dbl> <dbl> <dbl> <dbl>
1 "Cluste… SMU10315…       0      0       58 10/0… 13:02    21 4741. 4801. 0.771
2 "Cluste… SMU10315…       0      0       58 10/0… 33:02    19 6034. 5882. 0.860
3 "Cluste… SMU10315…       0      0       58 10/0… 03:02    20 6203. 5681. 0.862
4 "Cluste… SMU10315…       0      0       58 10/0… 43:02    18 4211. 3722. 0.847
5 "Cluste… SMU10315…       0      0       58 10/0… 53:02    18 4338. 3406. 0.799
6 "Cluste… SMU10315…       0      0       58 10/0… 23:02    21 5223. 5060. 0.795
# ℹ 5 more variables: NDSI <dbl>, ACI <dbl>, ADI <dbl>, AEI <dbl>, BI <dbl>

Show Code

acoustic.index <- acousticindex %>%
  mutate(
    cluster = paste0("c", parse_number(FOLDER)),
    date_parsed = dmy(DATE), 
    season = format(date_parsed, "%B %Y")
  ) %>%
  dplyr:: select(season, cluster, SH, NDSI, ACI, ADI, AEI, BI)

head(acoustic.index)

# A tibble: 6 × 8
  season     cluster    SH    NDSI   ACI   ADI    AEI    BI
  <chr>      <chr>   <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>
1 April 2024 c10     0.771  0.812   151.  2.02 0.357  101. 
2 April 2024 c10     0.860 -0.0681  153.  2.28 0.111   77.2
3 April 2024 c10     0.862  0.547   153.  2.29 0.0980  71.9
4 April 2024 c10     0.847 -0.310   159.  2.30 0       15.6
5 April 2024 c10     0.799  0.0690  156.  2.29 0.0741  52.8
6 April 2024 c10     0.795  0.852   151.  2.10 0.308   90.2

Show Code

# Calculate mean values by cluster
acoustic_cluster_means <-  acoustic.index %>%
  group_by(cluster) %>%
  summarise(
    SH. = mean(SH, na.rm = TRUE),
    NDSI. = mean(NDSI, na.rm = TRUE),
    ACI. = mean(ACI, na.rm = TRUE),
    ADI. = mean(ADI, na.rm = TRUE),
    AEI. = mean(as.numeric(AEI), na.rm = TRUE),  
    BI. = mean(BI, na.rm = TRUE),
    .groups = 'drop'
  )
head(acoustic_cluster_means)

# A tibble: 6 × 7
  cluster   SH. NDSI.  ACI.  ADI.  AEI.   BI.
  <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 c1      0.674 0.267  168.  1.56 0.525  55.4
2 c10     0.806 0.617  155.  2.17 0.164  53.4
3 c11     0.786 0.586  166.  1.99 0.274  42.0
4 c12     0.823 0.716  159.  2.19 0.154  47.1
5 c13     0.835 0.663  159.  2.17 0.172  46.6
6 c14     0.820 0.792  166.  2.15 0.198  59.4

Birdnet

To identify avian vocalisations within the acoustic recordings, I employed an automated analysis workflow using BirdNET (v2.4.0; Kahl et al., 2023). BirdNET is a deep neural network developed for the automated detection and classification of bird vocalisations, drawing on extensive global reference libraries (Kahl et al., 2021). The model can identify more than 6,000 bird species worldwide based on diagnostic spectral features in the audio signal (Kahl et al., 2023). To ensure comparability across data types, the BirdNET acoustic detections were grouped and summarised by season, cluster, and species to determine the total detection frequency per species. Subsequently, community diversity metrics were calculated following the same procedures used for the vegetation and manual point-count datasets. This consistent application of Hill numbers (q0, q1, q2) and beta diversity partitioning enabled a direct comparison of community structure and spatial heterogeneity across all three survey methods.

Show Code

birdnet_data <- read_csv("data/BirdNET_CombinedTable.csv")
head(birdnet_data)

# A tibble: 6 × 14
  `Start (s)` `End (s)` `Scientific name`   `Common name` Confidence File    lat
        <dbl>     <dbl> <chr>               <chr>              <dbl> <chr> <dbl>
1          11        14 Apus melba          Alpine Swift       0.692 "E:\…    -1
2          44        47 Apus melba          Alpine Swift       0.596 "E:\…    -1
3           3         6 Apus melba          Alpine Swift       0.513 "E:\…    -1
4          18        21 Apus melba          Alpine Swift       0.532 "E:\…    -1
5          25        28 Apus melba          Alpine Swift       0.512 "E:\…    -1
6           6         9 Chalcomitra senega… Scarlet-ches…      0.642 "E:\…    -1
# ℹ 7 more variables: lon <dbl>, week <dbl>, overlap <dbl>, sensitivity <dbl>,
#   min_conf <dbl>, species_list <lgl>, model <chr>

Show Code

birdnet_processed <- birdnet_data %>%
  filter(Confidence > 0.75) %>%#Higher confidence thresholds (0.7-0.8) will reduce the need for manual verification, while potentially missing some rare or poorly-recorded species.
 dplyr::  select(species = `Common name`, File) %>%
  separate(File, 
           into = c("Drive", "Project", "Category", "SeasonFolder",
                    "ClusterFolder", "SubFolder", "FileName"),
           sep = "\\\\", remove = FALSE) %>%
  mutate(
    cluster = paste0("c", str_extract(ClusterFolder, "\\d+")), 
    season = paste(
      str_extract(SeasonFolder, "^[A-Za-z]{3}"),
      str_extract(SeasonFolder, "\\d{2}(?=\\.)|\\d{2}$")
    )
  ) %>%
 dplyr::  select(species, cluster, season)
#group_by(species) %>%   filter(n() <= 10) %>%   ungroup() #Requiring a species to be detected a minimum number of times (e.g., ten detections) before including it in results can help reduce misidentifications. Confidence thresholds around 0.5 provide a balance between precision and recall at the assemblage level, while requiring a minimum number of detections per species reduces false positives from sporadic misidentifications. ##With a threshold of 0.5, the total number of species is 427. Adding a filter to remove the species detected less than 10 times reduces the species to 230.
head(birdnet_processed)

# A tibble: 6 × 3
  species                 cluster season
  <chr>                   <chr>   <chr> 
1 Scarlet-chested Sunbird c1      Oct 25
2 Scarlet-chested Sunbird c1      Oct 25
3 Scarlet-chested Sunbird c1      Oct 25
4 Scarlet-chested Sunbird c1      Oct 25
5 Scarlet-chested Sunbird c1      Oct 25
6 Scarlet-chested Sunbird c1      Oct 25

Show Code

#n_distinct(birdnet_processed$species)
#View(distinct(birdnet_processed, species))



# Convert the long detection list into a wide Community Matrix for meta analysis
birdnet_matrix <- birdnet_processed %>%
  group_by(season, species, cluster) %>%
  summarise(detections = n(), .groups = "drop") %>%
pivot_wider(names_from = cluster, values_from = detections, values_fill = 0) %>%
arrange(season, species)
head (birdnet_matrix)

# A tibble: 6 × 18
  season species       c11   c12   c16    c3    c4    c8    c2   c10   c14    c6
  <chr>  <chr>       <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 Apr 24 Abyssinian…    86   118   216     9     4   294     0     0     0     0
2 Apr 24 African Cr…     0     1     0     0     0     0     0     0     0     0
3 Apr 24 African Fi…     0     0     0     0     0     0     3     0     0     0
4 Apr 24 African Pa…     0     0     0     0     0     1     0     0     0     0
5 Apr 24 African Sc…     2     4   106     2     4   292    24     4    33     2
6 Apr 24 African Wo…     1     1    17     2     0     0     0    10     1    25
# ℹ 6 more variables: c9 <int>, c5 <int>, c1 <int>, c13 <int>, c7 <int>,
#   c15 <int>

Show Code

birdnet_MC_input <- birdnet_processed %>%
  group_by(cluster, species) %>%
  summarise(freq = n(), .groups = "drop") %>%
  pivot_wider(names_from = cluster, values_from = freq, values_fill = 0) %>%
  dplyr:: select(-species)%>%
  relocate(paste0("c", 1:16))
head(birdnet_MC_input)

# A tibble: 6 × 16
     c1    c2    c3    c4    c5    c6    c7    c8    c9   c10   c11   c12   c13
  <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1     9  2218   590   847   179   190  1635  4007     0  1257   957   365   310
2     3    70     2    13     0     0    44    20    96    82     0     0     0
3    43     8     0    91     0     8     5    44    21     0     0     1     0
4     9     0     0     0     0     5     0     0     0     0     0     0     0
5     2  1572  1753  2618   843   105  4317  4684     0   774   695   745   518
6    31   136    95   437    52    28   143   419    51   129    59   264   101
# ℹ 3 more variables: c14 <int>, c15 <int>, c16 <int>

Show Code

mc.birdnet <- MetaCommunity(birdnet_MC_input)
summary(mc.birdnet)

Meta-community (class 'MetaCommunity') made of 94927 individuals in 16 
communities and 284 species. 

Its sample coverage is 0.999357404945198 

Community weights are: 
    c1     c2     c3     c4     c5     c6     c7     c8     c9    c10    c11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   c12    c13    c14    c15    c16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
   c1    c2    c3    c4    c5    c6    c7    c8    c9   c10   c11   c12   c13 
 3771  8325  5973  6195  2936  3481  9412 11128  3134  4515  5722  5403  4730 
  c14   c15   c16 
 7587  2802  9813 
Community sample coverages are: 
       c1        c2        c3        c4        c5        c6        c7        c8 
0.9949632 0.9978381 0.9973216 0.9959649 0.9935307 0.9962663 0.9974504 0.9980232 
       c9       c10       c11       c12       c13       c14       c15       c16 
0.9933005 0.9966789 0.9961558 0.9964841 0.9955610 0.9963099 0.9925084 0.9989811

Show Code

alpha.birdnet.0 <- DivPart(q=0, mc.birdnet, Correction = "Best")
alpha.birdnet.1 <- DivPart(q=1, mc.birdnet, Correction = "Best")
alpha.birdnet.2 <- DivPart(q=2, mc.birdnet, Correction = "Best")


alphas.birdnet <- data.frame(
  q0.birdnett = alpha.birdnet.0$CommunityAlphaDiversities,
  q1.birdnett= alpha.birdnet.1$CommunityAlphaDiversities,
  q2.birdnett= alpha.birdnet.2$CommunityAlphaDiversities
) %>%
  rownames_to_column("cluster") %>%
  mutate(
    D..birdnett = q2.birdnett / q0.birdnett,
    cluster = factor(cluster, levels = paste0("c", 1:16))
  ) %>%
  arrange(cluster)

head(alphas.birdnet)

  cluster q0.birdnett q1.birdnett q2.birdnett D..birdnett
1      c1          78    12.24472    6.555346  0.08404290
2      c2          85    13.90944    7.508378  0.08833386
3      c3          66    10.88466    6.781391  0.10274835
4      c4          93    10.19266    4.591689  0.04937301
5      c5          66    14.50354    7.694577  0.11658450
6      c6          75    11.10612    4.855278  0.06473704

Show Code

dp.birdnet<-DivProfile(seq(0,2,1), mc.birdnet, Correction = "Best", NumberOfSimulations = 10)

Show Code

plot(dp.birdnet)

Show Code

MC.beta.birdnet <- birdnet_processed %>%
  group_by(species, cluster) %>%
  summarise(freq = n(), .groups = "drop") %>%
  pivot_wider(names_from = cluster, values_from = freq, values_fill = 0) %>%
  relocate(paste0("c", 1:16), .after = species)


MC.beta_freq_birdnet <- MC.beta.birdnet %>%
  mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, 0)))

birdnet_beta_jac <- beta.pair(t(MC.beta_freq_birdnet[,-1]), index.family = "jac")

beta.part.birdnet <- beta.multi.abund(t(MC.beta.birdnet[,-1]), index.family = "bray")

head(beta.part.birdnet)

$beta.BRAY.BAL
[1] 0.7900691

$beta.BRAY.GRA
[1] 0.07577612

$beta.BRAY
[1] 0.8658452

✅ Interpretation: The analysis of acoustic data across the study sites revealed substantial spatial heterogeneity in both BirdNET’s classification performance and the resulting avian community structures. The BirdNET analysis comprised 94,927 detections across 16 communities, each with equal sample coverage weighting (Wi = 0.0625). A total of 284 species were detected. Overall sample coverage was exceptionally high (0.999), indicating that the sampling effort captured nearly all species present in the regional pool. Individual community coverage values ranged from 0.992 to 0.998, consistently demonstrating near-complete inventory at each site. Total Bray–Curtis dissimilarity was 0.866, partitioned into a balanced variation component (turnover = 0.790) and an abundance gradient component (nestedness = 0.076). This pronounced dominance of turnover over nestedness reveals that compositional differences between communities are driven almost entirely by species replacement rather than differences in species richness or abundance of shared species. The results confirm a highly diverse, well-sampled assemblage in which BirdNET performed reliably, with distinct species assemblages characterising each of the 16 recording locations.

Combined dataset

These data set comprises 11 acoustic metrics: Bioacoustic Index (BI), Normalised Difference Soundscape Index (NDSI), Acoustic Diversity Index (ADI), Acoustic Evenness Index (AEI), Acoustic Complexity Index (ACI), Acoustic Entropy (SH), ultrasonic metrics, and BirdNET-derived avian diversity metrics (q0, q1, q2, D). I initially compiled 17 environmental predictors spanning vegetation structural variables (shrub volume per hectare, tree volume per hectare, percentage cover per hectare, tree basal area), rangeland plant species diversity metrics (calculated using Hill numbers: q0, q1, q2, and evenness D), bird community diversity metrics (Hill’s numbers derived from manual point counts), soil health components (PC1 and PC2), and residual slope values of remotely sensed vegetation indices (Normalized Difference Vegetation Index [NDVI], Soil Adjusted Vegetation Index [SAVI], and Bare Soil Index [BSI]).

Show Code

# source https://www.sthda.com/english/wiki/ggally-r-package-extension-to-ggplot2-for-correlation-matrix-and-survival-plots-r-software-and-data-visualization
# Create the dataset
# https://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/
# #https://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/
#https://www.sthda.com/english/articles/38-regression-model-validation/157-cross-validation-essentials-in-r/
#https://www.sthda.com/english/articles/38-regression-model-validation/158-regression-model-accuracy-metrics-r-square-aic-bic-cp-and-more/
# https://www.sthda.com/english/articles/40-regression-analysis/168-multiple-linear-regression-in-r/
#https://www.sthda.com/english/articles/39-regression-model-diagnostics/160-multicollinearity-essentials-and-vif-in-r/


# Combine all data sources
big.dataset <- data.frame(soil.pc, alphas.rangelands, shrub.volume.cover, 
                          tree.vol.cov.area, spatial_trends_csv, 
                          alphas.birds, alphas.birdnet, acoustic_cluster_means) %>%
  dplyr::select(-matches("^cluster", ignore.case = TRUE), cluster) %>%
  relocate(cluster)

print(colnames(big.dataset))

 [1] "cluster"     "soil.pc1"    "soil.pc2"    "q0"          "q1"         
 [6] "q2"          "SH.C"        "SH.V"        "TR.A"        "TR.V"       
[11] "TR.C"        "fid"         "R.S.NDVI"    "R.S.BSI"     "R.S.SAVI"   
[16] "q0b"         "q1b"         "q2b"         "D.eveb"      "q0.birdnett"
[21] "q1.birdnett" "q2.birdnett" "D..birdnett" "SH."         "NDSI."      
[26] "ACI."        "ADI."        "AEI."        "BI."

Correlation matrix

I visualised exploratory relationships among variables using scatterplot matrices implemented with the ggpairs function (GGally package, Schloerke et al., 2025) facilitating assessment of distributional properties, non-linear patterns, outliers, and preliminary multicollinearity.

Show Code

# Correlation matrix
big.dataset %>% 
  dplyr::select(where(is.numeric)) %>%
  ggpairs(
  big.dataset %>% 
    dplyr::select(where(is.numeric)), upper = list(continuous = GGally::wrap("cor", size = 2)), 
    lower = list(continuous = GGally::wrap("points", size = 0.3)),  
    diag = list(continuous = GGally::wrap("densityDiag", size = 0.3))) +
    theme(text = element_text(size = 6))

Scaling

Prior to analysis, I standardised all predictor and response variables to zero mean and unit variance using the scale() function to ensure comparability of coefficients and to meet assumptions of multivariate analyses. I applied scaling separately to each group using the scale() function, which centres each variable by subtracting its mean and divides by its standard deviation, resulting in variables with mean = 0 and standard deviation = 1.

Show Code

rangelands_cols   <- c("q0", "q1", "q2")
veg_struct_cols   <- c("SH.C", "SH.V", "TR.A", "TR.V", "TR.C")
remote_cols       <- c("R.S.SAVI", "R.S.NDVI","R.S.BSI")
birds_div_cols    <- c("q0b", "q1b", "q2b", "D.eveb")
soil_cols         <- c("soil.pc1", "soil.pc2")
acoustic_cols     <- c("NDSI.", "ADI.", "ACI.", "SH.", "BI.")
birdnet_cols      <- c("q0.birdnett", "q1.birdnett", "q2.birdnett", "D..birdnett")

# --- GROUP 1: ACOUSTICS ---
acoustics_sc <- as.data.frame(scale(big.dataset[, acoustic_cols]))

# --- GROUP 2: BIRDNET ---
birdnet_sc <- as.data.frame(scale(big.dataset[, birdnet_cols]))

# --- GROUP 3: RANGELAND DIVERSITY ---
rangelands_sc <- as.data.frame(scale(big.dataset[, rangelands_cols]))

# --- GROUP 4: VEGETATION STRUCTURE ---
veg_struct_sc <- as.data.frame(scale(big.dataset[, veg_struct_cols]))

# --- GROUP 5: REMOTE SENSING(VERY HIGH COLLINIALITY) ---
remote_sc <- as.data.frame(scale(big.dataset[, remote_cols]))

# --- GROUP 6: SOIL ---
soil_sc <- as.data.frame(scale(big.dataset[, soil_cols]))
# --- GROUP 7: BIRD DIVERSITY (POINT COUNT) ---
birds_sc <- as.data.frame(scale(big.dataset[, birds_div_cols]))

VIF

I evaluated multicollinearity among the 16 remaining predictors using the Variance Inflation Factor (VIF) with the vifstep function in the usdmpackage (Naimi, 2023). I grouped variables a priori into ecological meaningful categories to guide variable selection. I applied a threshold of VIF < 3 for variable retention. Within each group, I sequentially removed variables with VIF > 3 using a stepwise procedure (vifstep) until all remaining variables met the threshold (Zuur et al., 2010). For the remote sensing variables, which exhibited extreme multicollinearity (VIF > 10 for all three indices), I conducted principal component analysis (PCA) using prcomp() with scaling to reduce dimensionality. We retained the first principal component (remote_pc1), which explained 97.1% of the variance, for subsequent analyses.

After within-group variable selection, I combined all retained predictors into a single environmental dataset. I reassessed global multicollinearity across all variables using car::vif() (Fox & Weisberg, 2019) and applied a second VIF stepwise selection (threshold = 3) to remove any remaining collinear variables across groups. This process yielded a final set of environmental predictors with acceptable multicollinearity: rangeland plant richness (q0), tree volume per hectare, soil properties principal component 1, and the remote sensing composite (remote_pc1).

Show Code

# VIF: INDIVIDUAL GROUP PROCESSING
# --- GROUP 3: RANGELAND DIVERSITY ---
vif_range <- vifstep(rangelands_sc, th = 3)
rangelands_final <- exclude(rangelands_sc, vif_range)
colnames( rangelands_final)

[1] "q0" "q2"

Show Code

# --- GROUP 4: VEGETATION STRUCTURE ---
vif_veg <- vifstep(veg_struct_sc, th = 3)
veg_struct_final <- exclude(veg_struct_sc, vif_veg)
colnames(veg_struct_final)

[1] "SH.C" "TR.V"

Show Code

# --- GROUP 5: REMOTE SENSING(VERY HIGH COLLINIALITY) ---
model_vif <- lm(seq_len(nrow(remote_sc)) ~ ., data = remote_sc)
print(vif(model_vif))

 R.S.SAVI  R.S.NDVI   R.S.BSI 
77.066638 78.464652  7.913983

Show Code

remote_pca_model <- prcomp(big.dataset[, remote_cols], scale. = TRUE)
summary(remote_pca_model)

Importance of components:
                          PC1     PC2     PC3
Standard deviation     1.7049 0.29452 0.08122
Proportion of Variance 0.9689 0.02891 0.00220
Cumulative Proportion  0.9689 0.99780 1.00000

Show Code

remote_final <- as.data.frame(remote_pca_model$x[, 1, drop = FALSE])

colnames(remote_final) <- "remote_pc1"
print(remote_pca_model$rotation[, 1])

  R.S.SAVI   R.S.NDVI    R.S.BSI 
-0.5813151 -0.5815597  0.5690879

Show Code

# --- GROUP 6: SOIL ---
soil_final <- soil_sc


# --- GROUP 7: BIRD DIVERSITY (POINT COUNT) ---
vif_birds <- vifstep(birds_sc, th = 3)
birds_final <- exclude(birds_sc, vif_birds)
colnames(birds_final)

[1] "q1b"    "D.eveb"

Show Code

env.selected <-  cbind(rangelands_final,veg_struct_final, remote_final, soil_final, birds_final)

#GLOBAL VIF
global_vif_model <- lm(seq_len(nrow(env.selected)) ~ ., data = env.selected)
global_vif_results <- car::vif(global_vif_model)

print(sort(global_vif_results, decreasing = TRUE))

       q1b remote_pc1   soil.pc2   soil.pc1         q2       SH.C         q0 
  9.760141   7.585565   6.663145   5.150411   5.124420   5.121028   2.634473 
    D.eveb       TR.V 
  2.566150   2.549289

Show Code

# If any cross-group VIFs are still > 3, remove them
env.final_vif <- vifstep(env.selected, th = 3)
env.final_vars <- exclude(env.selected, env.final_vif)


env.final_vars <- env.final_vars %>% 
 dplyr:: select( -SH.C, -D.eveb)

colnames(env.final_vars)

[1] "q0"         "TR.V"       "remote_pc1" "soil.pc1"

##RDA To test the overarching hypothesis that environmental gradients explain variation in both acoustic patterns and bird communities, I performed redundancy analysis (RDA) using the vegan package (Oksanen et al., 2025). I constructed a combined response matrix comprising all five acoustic indices and four BirdNET metrics, reflecting the integrated nature of soundscape ecology where acoustic patterns and bird communities represent different facets of the same ecological system. I used the final set of environmental predictors selected through the VIF procedure as explanatory variables. I assessed model significance using permutation tests with 999 permutations under the reduced model (Legendre & Legendre, 2012). I calculated adjusted R² to account for the number of predictors relative to sample size (Peres-Neto et al., 2006) and evaluated axis-specific significance using forward tests with 999 permutations.

To provide context and facilitate comparison with univariate analyses, I also conducted separate RDA analyses for acoustic indices and BirdNET metrics individually. I constructed two separate RDA models:

BirdNET model: Scaled BirdNET-derived diversity metrics (q0.birdnett, q1.birdnett, q2.birdnett, D..birdnett) as response variables
Acoustic model: Scaled acoustic indices (NDSI., ADI., ACI., SH., BI.) as response variables

I used the final set of environmental predictors selected through the VIF procedure as explanatory variables in both models. I assessed model significance using permutation tests with 999 permutations under the reduced model (Legendre & Legendre, 2012). I calculated adjusted R² values to account for the number of predictors relative to sample size (Peres-Neto et al., 2006). I evaluated axis-specific significance using forward tests with 999 permutations to identify which constrained ordination axes explained significant portions of the variance.

Show Code

# 
acoustics.all <- cbind(acoustics_sc, birdnet_sc)

# PRIMARY RDA: Combined analysis
rda_combined <- rda(acoustics.all ~ ., data = as.data.frame(env.final_vars))


cat("\n========== PRIMARY RDA: COMBINED ACOUSTIC + BIRDNET ==========\n")


========== PRIMARY RDA: COMBINED ACOUSTIC + BIRDNET ==========

Show Code

print(summary(rda_combined))


Call:
rda(formula = acoustics.all ~ q0 + TR.V + remote_pc1 + soil.pc1,      data = as.data.frame(env.final_vars)) 

Partitioning of variance:
              Inertia Proportion
Total           9.000     1.0000
Constrained     4.199     0.4666
Unconstrained   4.801     0.5334

Eigenvalues, and their contribution to the variance 

Importance of components:
                        RDA1   RDA2    RDA3    RDA4    PC1    PC2     PC3
Eigenvalue            2.1933 1.3241 0.41895 0.26311 2.1840 1.4219 0.56767
Proportion Explained  0.2437 0.1471 0.04655 0.02923 0.2427 0.1580 0.06307
Cumulative Proportion 0.2437 0.3908 0.43737 0.46660 0.7093 0.8673 0.93033
                          PC4     PC5      PC6      PC7       PC8       PC9
Eigenvalue            0.40186 0.16416 0.036584 0.016328 0.0058286 0.0022828
Proportion Explained  0.04465 0.01824 0.004065 0.001814 0.0006476 0.0002536
Cumulative Proportion 0.97498 0.99322 0.997285 0.999099 0.9997464 1.0000000

Accumulated constrained eigenvalues
Importance of components:
                        RDA1   RDA2    RDA3    RDA4
Eigenvalue            2.1933 1.3241 0.41895 0.26311
Proportion Explained  0.5223 0.3153 0.09976 0.06265
Cumulative Proportion 0.5223 0.8376 0.93735 1.00000

Show Code

# Global significance
anova_combined <- anova(rda_combined, permutations = 999)
print(anova_combined)

Permutation test for rda under reduced model
Permutation: free
Number of permutations: 999

Model: rda(formula = acoustics.all ~ q0 + TR.V + remote_pc1 + soil.pc1, data = as.data.frame(env.final_vars))
         Df Variance      F Pr(>F)  
Model     4   4.1994 2.4056  0.015 *
Residual 11   4.8006                
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Adjusted R²
adj_r2_combined <- RsquareAdj(rda_combined)
print(adj_r2_combined)

$r.squared
[1] 0.466604

$adj.r.squared
[1] 0.2726418

Show Code

# Axis significance
anova_axis_combined <- anova(rda_combined, by = "axis", permutations = 999)
print(anova_axis_combined)

Permutation test for rda under reduced model
Forward tests for axes
Permutation: free
Number of permutations: 999

Model: rda(formula = acoustics.all ~ q0 + TR.V + remote_pc1 + soil.pc1, data = as.data.frame(env.final_vars))
         Df Variance      F Pr(>F)  
RDA1      1   2.1933 5.0256  0.033 *
RDA2      1   1.3241 3.0341  0.194  
RDA3      1   0.4189 0.9600  0.742  
RDA4      1   0.2631 0.6029  0.742  
Residual 11   4.8006                
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# SUPPLEMENTARY RDA: Separate analyses to understand patterns
rda_acoustic <- rda(acoustics_sc ~ ., data = env.final_vars)
rda_birdnet <- rda(birdnet_sc ~ ., data = env.final_vars)
cat("\n========== SUPPLEMENTARY: ACOUSTIC-ONLY RDA ==========\n")


========== SUPPLEMENTARY: ACOUSTIC-ONLY RDA ==========

Show Code

anova(rda_acoustic, permutations = 999)

Permutation test for rda under reduced model
Permutation: free
Number of permutations: 999

Model: rda(formula = acoustics_sc ~ q0 + TR.V + remote_pc1 + soil.pc1, data = env.final_vars)
         Df Variance      F Pr(>F)  
Model     4   2.6344 3.0624  0.027 *
Residual 11   2.3656                
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

RsquareAdj(rda_acoustic)

$r.squared
[1] 0.5268751

$adj.r.squared
[1] 0.3548297

Show Code

cat("\n========== SUPPLEMENTARY: BIRDNET-ONLY RDA ==========\n")


========== SUPPLEMENTARY: BIRDNET-ONLY RDA ==========

Show Code

anova(rda_birdnet, permutations = 999)

Permutation test for rda under reduced model
Permutation: free
Number of permutations: 999

Model: rda(formula = birdnet_sc ~ q0 + TR.V + remote_pc1 + soil.pc1, data = env.final_vars)
         Df Variance      F Pr(>F)
Model     4   1.5651 1.7676  0.164
Residual 11   2.4349

Show Code

RsquareAdj(rda_birdnet)

$r.squared
[1] 0.3912652

$adj.r.squared
[1] 0.1699071

Show Code

# RDA BIPLOT (Using combined model)

site_scores <- as.data.frame(vegan::scores(rda_combined, display = "sites", scaling = 2))
env_scores <- as.data.frame(vegan::scores(rda_combined, display = "bp", scaling = 2))
env_scores$Variable <- rownames(env_scores)

# Extract variance explained
var_exp <- summary(rda_combined)$cont$importance[2, 1:2] * 100

# Create biplot
rda_plot <- ggplot2::ggplot() +
  ggplot2::geom_point(data = site_scores, 
                      ggplot2::aes(x = RDA1, y = RDA2),
                      colour = "grey40", alpha = 0.7, size = 3) +
  ggplot2::geom_segment(data = env_scores, 
                        ggplot2::aes(x = 0, y = 0, xend = RDA1, yend = RDA2),
                        arrow = ggplot2::arrow(length = grid::unit(0.25, "cm")),
                        colour = "darkblue", linewidth = 1) +
  ggplot2::geom_text(data = env_scores, 
                     ggplot2::aes(x = RDA1, y = RDA2, label = Variable),
                     colour = "darkblue", fontface = "bold", vjust = -0.6, size = 4) +
  ggplot2::coord_fixed() +
  ggplot2::labs(
    title = "Redundancy Analysis: Environmental Drivers of Acoustic and Bird Communities",
    x = paste0("RDA1 (", round(var_exp[1], 1), "%)"),
    y = paste0("RDA2 (", round(var_exp[2], 1), "%)")
  ) +
  ggplot2::theme_bw(base_size = 14) +
  ggplot2::theme(
    panel.grid = ggplot2::element_blank(),
    plot.title = ggplot2::element_text(hjust = 0.5, face = "bold", size = 12)
  )

print(rda_plot)

Regression

To complement the multivariate analyses and identify specific environmental drivers of individual acoustic and BirdNET metrics, I fitted separate multiple linear regression models using the lm() function. I modeled each of the nine response variables (five acoustic indices and four BirdNET-derived metrics) against the four retained environmental predictors. I assessed model assumptions through residual diagnostics, including normality of residuals (Shapiro-Wilk test), homoscedasticity (Breusch-Pagan test), and identification of influential observations (Cook’s distance threshold 4/n). I evaluated model fit using R², adjusted R², Akaike Information Criterion (AIC), and Bayesian Information Criterion (BIC).

Show Code

#  REGRESSION MODELS WITH VIF-SELECTED VARIABLES
#================================================================
#("Model: Y ~ q0 + TR.V + soil.pc1 + remote_pc1"+)

model_data <- cbind(birdnet_sc, env.final_vars, acoustics_sc)
colnames(model_data)

 [1] "q0.birdnett" "q1.birdnett" "q2.birdnett" "D..birdnett" "q0"         
 [6] "TR.V"        "remote_pc1"  "soil.pc1"    "NDSI."       "ADI."       
[11] "ACI."        "SH."         "BI."

Show Code

# ----- Acoustic indices models -----
model_NDSI <- lm(NDSI. ~ q0 + TR.V + soil.pc1+   remote_pc1, data = model_data)
model_ADI <- lm(ADI. ~ q0 + TR.V + soil.pc1 +  remote_pc1, data = model_data)
model_ACI <- lm(ACI. ~ q0 + TR.V + soil.pc1 +  remote_pc1, data = model_data)
model_SH <- lm(SH. ~ q0 + TR.V + soil.pc1 +  remote_pc1, data = model_data)
model_BI <- lm(BI. ~ q0 + TR.V + soil.pc1 +  remote_pc1, data = model_data)

# ----- BirdNET metrics models -----
model_q0_birdnet <- lm(q0.birdnett ~ q0 + TR.V + soil.pc1 +  remote_pc1, data = model_data)
model_q1_birdnet <- lm(q1.birdnett ~ q0 + TR.V + soil.pc1 +remote_pc1, data = model_data)
model_q2_birdnet <- lm(q2.birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)
model_D_birdnet <- lm(D..birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

# Print summaries with titles
cat("\n--- SUMMARY: NDSI MODEL ---\n"); print(summary(model_NDSI))


--- SUMMARY: NDSI MODEL ---


Call:
lm(formula = NDSI. ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8547 -0.1292  0.2087  0.3162  1.0357 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -2.628e-16  1.962e-01   0.000  1.00000   
q0           3.966e-01  2.425e-01   1.635  0.13029   
TR.V        -7.723e-01  2.321e-01  -3.328  0.00674 **
soil.pc1     5.174e-02  2.276e-01   0.227  0.82435   
remote_pc1  -4.937e-02  1.286e-01  -0.384  0.70830   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7849 on 11 degrees of freedom
Multiple R-squared:  0.5482,    Adjusted R-squared:  0.384 
F-statistic: 3.337 on 4 and 11 DF,  p-value: 0.05078

Show Code

cat("\n--- SUMMARY: ADI MODEL ---\n"); print(summary(model_ADI))


--- SUMMARY: ADI MODEL ---


Call:
lm(formula = ADI. ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.81236 -0.45170 -0.01213  0.21270  1.52385 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept)  3.018e-17  1.805e-01   0.000  1.00000   
q0           5.865e-01  2.232e-01   2.628  0.02348 * 
TR.V        -8.718e-01  2.135e-01  -4.083  0.00181 **
soil.pc1     2.403e-01  2.094e-01   1.147  0.27558   
remote_pc1   1.916e-01  1.183e-01   1.620  0.13356   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7222 on 11 degrees of freedom
Multiple R-squared:  0.6176,    Adjusted R-squared:  0.4785 
F-statistic: 4.441 on 4 and 11 DF,  p-value: 0.02225

Show Code

cat("\n--- SUMMARY: ACI MODEL ---\n"); print(summary(model_ACI))


--- SUMMARY: ACI MODEL ---


Call:
lm(formula = ACI. ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.9131 -0.4469 -0.1199  0.1718  1.7580 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -1.595e-15  1.900e-01   0.000  1.00000   
q0          -3.359e-01  2.349e-01  -1.430  0.18047   
TR.V         7.404e-01  2.247e-01   3.295  0.00715 **
soil.pc1     9.374e-02  2.204e-01   0.425  0.67883   
remote_pc1  -3.023e-01  1.245e-01  -2.428  0.03350 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7601 on 11 degrees of freedom
Multiple R-squared:  0.5763,    Adjusted R-squared:  0.4223 
F-statistic: 3.741 on 4 and 11 DF,  p-value: 0.03705

Show Code

cat("\n--- SUMMARY: SH MODEL ---\n"); print(summary(model_SH))


--- SUMMARY: SH MODEL ---


Call:
lm(formula = SH. ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.01087 -0.44815 -0.00509  0.25308  1.60255 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -2.843e-16  1.999e-01   0.000  1.00000   
q0           4.531e-01  2.471e-01   1.834  0.09388 . 
TR.V        -8.061e-01  2.364e-01  -3.409  0.00583 **
soil.pc1    -7.892e-03  2.319e-01  -0.034  0.97346   
remote_pc1   1.598e-01  1.310e-01   1.220  0.24805   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7997 on 11 degrees of freedom
Multiple R-squared:  0.531, Adjusted R-squared:  0.3605 
F-statistic: 3.114 on 4 and 11 DF,  p-value: 0.0609

Show Code

cat("\n--- SUMMARY: BI MODEL ---\n"); print(summary(model_BI))


--- SUMMARY: BI MODEL ---


Call:
lm(formula = BI. ~ q0 + TR.V + soil.pc1 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.49700 -0.44978  0.06804  0.51651  1.54311 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.396e-16  2.333e-01   0.000    1.000
q0           1.403e-01  2.884e-01   0.486    0.636
TR.V        -1.700e-01  2.759e-01  -0.616    0.550
soil.pc1     2.283e-01  2.706e-01   0.843    0.417
remote_pc1  -2.684e-01  1.529e-01  -1.756    0.107

Residual standard error: 0.9333 on 11 degrees of freedom
Multiple R-squared:  0.3612,    Adjusted R-squared:  0.1289 
F-statistic: 1.555 on 4 and 11 DF,  p-value: 0.2539

Show Code

cat("\n--- SUMMARY: BIRDNET q0 (Richness) ---\n"); print(summary(model_q0_birdnet))


--- SUMMARY: BIRDNET q0 (Richness) ---


Call:
lm(formula = q0.birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, 
    data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.67978 -0.35957  0.00847  0.49330  1.80338 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept) -1.413e-16  2.393e-01   0.000   1.0000  
q0          -1.041e-01  2.958e-01  -0.352   0.7314  
TR.V        -3.126e-01  2.830e-01  -1.105   0.2929  
soil.pc1    -5.133e-01  2.776e-01  -1.849   0.0915 .
remote_pc1   5.608e-03  1.568e-01   0.036   0.9721  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9573 on 11 degrees of freedom
Multiple R-squared:  0.328, Adjusted R-squared:  0.08361 
F-statistic: 1.342 on 4 and 11 DF,  p-value: 0.315

Show Code

cat("\n--- SUMMARY: BIRDNET q1 (Shannon) ---\n"); print(summary(model_q1_birdnet))


--- SUMMARY: BIRDNET q1 (Shannon) ---


Call:
lm(formula = q1.birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, 
    data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.04028 -0.42991 -0.08651  0.33725  1.61605 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  9.910e-17  2.197e-01   0.000   1.0000  
q0          -6.901e-01  2.716e-01  -2.541   0.0274 *
TR.V         3.315e-02  2.598e-01   0.128   0.9008  
soil.pc1    -2.077e-01  2.548e-01  -0.815   0.4323  
remote_pc1   8.840e-02  1.439e-01   0.614   0.5516  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8788 on 11 degrees of freedom
Multiple R-squared:  0.4337,    Adjusted R-squared:  0.2277 
F-statistic: 2.106 on 4 and 11 DF,  p-value: 0.1484

Show Code

cat("\n--- SUMMARY: BIRDNET q2 (Simpson) ---\n"); print(summary(model_q2_birdnet))


--- SUMMARY: BIRDNET q2 (Simpson) ---


Call:
lm(formula = q2.birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, 
    data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.1109 -0.4535 -0.1989  0.4520  1.7052 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  4.820e-17  2.258e-01   0.000   1.0000  
q0          -6.030e-01  2.791e-01  -2.160   0.0537 .
TR.V         7.394e-03  2.670e-01   0.028   0.9784  
soil.pc1    -1.139e-01  2.619e-01  -0.435   0.6720  
remote_pc1   1.533e-01  1.479e-01   1.036   0.3223  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9032 on 11 degrees of freedom
Multiple R-squared:  0.4018,    Adjusted R-squared:  0.1842 
F-statistic: 1.847 on 4 and 11 DF,  p-value: 0.1902

Show Code

cat("\n--- SUMMARY: BIRDNET D (Berger-Parker) ---\n"); print(summary(model_D_birdnet))


--- SUMMARY: BIRDNET D (Berger-Parker) ---


Call:
lm(formula = D..birdnett ~ q0 + TR.V + soil.pc1 + remote_pc1, 
    data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2253 -0.4658 -0.1772  0.3440  1.5590 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  8.334e-17  2.258e-01   0.000   1.0000  
q0          -5.389e-01  2.791e-01  -1.931   0.0797 .
TR.V         8.334e-02  2.671e-01   0.312   0.7608  
soil.pc1     7.293e-02  2.619e-01   0.278   0.7858  
remote_pc1   1.909e-01  1.480e-01   1.290   0.2234  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9033 on 11 degrees of freedom
Multiple R-squared:  0.4017,    Adjusted R-squared:  0.1841 
F-statistic: 1.846 on 4 and 11 DF,  p-value: 0.1904

Show Code

model_list <- list(NDSI = model_NDSI, ADI = model_ADI,ACI = model_ACI,  SH = model_SH,  BI = model_BI,q0_birdnet = model_q0_birdnet,q1_birdnet = model_q1_birdnet,q2_birdnet = model_q2_birdnet,D_birdnet = model_D_birdnet)

# Extract coefficients
coef_table <- do.call(rbind, lapply(names(model_list), function(name) {
  tidy(model_list[[name]]) %>%
    mutate(Response = name) %>%
    dplyr::select(Response, term, estimate, std.error, statistic, p.value)
}))


cat("\n--- SUMMARY: coef_table ---\n"); print(summary(coef_table))


--- SUMMARY: coef_table ---

   Response             term              estimate          std.error     
 Length:45          Length:45          Min.   :-0.87175   Min.   :0.1183  
 Class :character   Class :character   1st Qu.:-0.20773   1st Qu.:0.1962  
 Mode  :character   Mode  :character   Median : 0.00000   Median :0.2319  
                                       Mean   :-0.06112   Mean   :0.2222  
                                       3rd Qu.: 0.09374   3rd Qu.:0.2619  
                                       Max.   : 0.74039   Max.   :0.2958  
   statistic          p.value        
 Min.   :-4.0830   Min.   :0.001811  
 1st Qu.:-0.8151   1st Qu.:0.106888  
 Median : 0.0000   Median :0.550316  
 Mean   :-0.2127   Mean   :0.509151  
 3rd Qu.: 0.4865   3rd Qu.:0.972109  
 Max.   : 3.2947   Max.   :1.000000

Show Code

# Extract model fit statistics
fit_table <- do.call(rbind, lapply(names(model_list), function(name) {
  glance(model_list[[name]]) %>%
    mutate(Response = name) %>%
   dplyr:: select(Response, r.squared, adj.r.squared, sigma, statistic, p.value, AIC, BIC)
}))
print(fit_table)

# A tibble: 9 × 8
  Response   r.squared adj.r.squared sigma statistic p.value   AIC   BIC
  <chr>          <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl> <dbl>
1 NDSI           0.548        0.384  0.785      3.34  0.0508  43.7  48.3
2 ADI            0.618        0.478  0.722      4.44  0.0222  41.0  45.6
3 ACI            0.576        0.422  0.760      3.74  0.0371  42.6  47.3
4 SH             0.531        0.361  0.800      3.11  0.0609  44.3  48.9
5 BI             0.361        0.129  0.933      1.56  0.254   49.2  53.8
6 q0_birdnet     0.328        0.0836 0.957      1.34  0.315   50.0  54.6
7 q1_birdnet     0.434        0.228  0.879      2.11  0.148   47.3  51.9
8 q2_birdnet     0.402        0.184  0.903      1.85  0.190   48.2  52.8
9 D_birdnet      0.402        0.184  0.903      1.85  0.190   48.2  52.8

Show Code

# Create the detailed table with significance stars
detailed_coef_table <- do.call(rbind, lapply(names(model_list), function(name) {
  tidy(model_list[[name]]) %>%
    mutate(Response = name) %>%
    # Add significance column
    mutate(significance = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01  ~ "**",
      p.value < 0.05  ~ "*",
      p.value < 0.1   ~ ".",
      TRUE            ~ "ns"
    )) %>%
    # Round numbers for clean reporting
    mutate(
      estimate  = round(estimate, 3),
      std.error = round(std.error, 3),
      statistic = round(statistic, 2),
      p.value   = round(p.value, 4)
    ) %>%
    dplyr::select(Response, term, estimate, std.error, statistic, p.value, significance)
}))
detailed_coef_table

# A tibble: 45 × 7
   Response term        estimate std.error statistic p.value significance
   <chr>    <chr>          <dbl>     <dbl>     <dbl>   <dbl> <chr>       
 1 NDSI     (Intercept)    0         0.196      0     1      ns          
 2 NDSI     q0             0.397     0.243      1.64  0.130  ns          
 3 NDSI     TR.V          -0.772     0.232     -3.33  0.0067 **          
 4 NDSI     soil.pc1       0.052     0.228      0.23  0.824  ns          
 5 NDSI     remote_pc1    -0.049     0.129     -0.38  0.708  ns          
 6 ADI      (Intercept)    0         0.181      0     1      ns          
 7 ADI      q0             0.587     0.223      2.63  0.0235 *           
 8 ADI      TR.V          -0.872     0.214     -4.08  0.0018 **          
 9 ADI      soil.pc1       0.24      0.209      1.15  0.276  ns          
10 ADI      remote_pc1     0.192     0.118      1.62  0.134  ns          
# ℹ 35 more rows

Show Code

# 2. Print the full table
cat("\n--- DETAILED COEFFICIENT TABLE (ALL MODELS) ---\n")


--- DETAILED COEFFICIENT TABLE (ALL MODELS) ---

Show Code

print(as.data.frame(detailed_coef_table))

     Response        term estimate std.error statistic p.value significance
1        NDSI (Intercept)    0.000     0.196      0.00  1.0000           ns
2        NDSI          q0    0.397     0.243      1.64  0.1303           ns
3        NDSI        TR.V   -0.772     0.232     -3.33  0.0067           **
4        NDSI    soil.pc1    0.052     0.228      0.23  0.8243           ns
5        NDSI  remote_pc1   -0.049     0.129     -0.38  0.7083           ns
6         ADI (Intercept)    0.000     0.181      0.00  1.0000           ns
7         ADI          q0    0.587     0.223      2.63  0.0235            *
8         ADI        TR.V   -0.872     0.214     -4.08  0.0018           **
9         ADI    soil.pc1    0.240     0.209      1.15  0.2756           ns
10        ADI  remote_pc1    0.192     0.118      1.62  0.1336           ns
11        ACI (Intercept)    0.000     0.190      0.00  1.0000           ns
12        ACI          q0   -0.336     0.235     -1.43  0.1805           ns
13        ACI        TR.V    0.740     0.225      3.29  0.0071           **
14        ACI    soil.pc1    0.094     0.220      0.43  0.6788           ns
15        ACI  remote_pc1   -0.302     0.125     -2.43  0.0335            *
16         SH (Intercept)    0.000     0.200      0.00  1.0000           ns
17         SH          q0    0.453     0.247      1.83  0.0939            .
18         SH        TR.V   -0.806     0.236     -3.41  0.0058           **
19         SH    soil.pc1   -0.008     0.232     -0.03  0.9735           ns
20         SH  remote_pc1    0.160     0.131      1.22  0.2481           ns
21         BI (Intercept)    0.000     0.233      0.00  1.0000           ns
22         BI          q0    0.140     0.288      0.49  0.6362           ns
23         BI        TR.V   -0.170     0.276     -0.62  0.5503           ns
24         BI    soil.pc1    0.228     0.271      0.84  0.4169           ns
25         BI  remote_pc1   -0.268     0.153     -1.76  0.1069           ns
26 q0_birdnet (Intercept)    0.000     0.239      0.00  1.0000           ns
27 q0_birdnet          q0   -0.104     0.296     -0.35  0.7314           ns
28 q0_birdnet        TR.V   -0.313     0.283     -1.10  0.2929           ns
29 q0_birdnet    soil.pc1   -0.513     0.278     -1.85  0.0915            .
30 q0_birdnet  remote_pc1    0.006     0.157      0.04  0.9721           ns
31 q1_birdnet (Intercept)    0.000     0.220      0.00  1.0000           ns
32 q1_birdnet          q0   -0.690     0.272     -2.54  0.0274            *
33 q1_birdnet        TR.V    0.033     0.260      0.13  0.9008           ns
34 q1_birdnet    soil.pc1   -0.208     0.255     -0.82  0.4323           ns
35 q1_birdnet  remote_pc1    0.088     0.144      0.61  0.5516           ns
36 q2_birdnet (Intercept)    0.000     0.226      0.00  1.0000           ns
37 q2_birdnet          q0   -0.603     0.279     -2.16  0.0537            .
38 q2_birdnet        TR.V    0.007     0.267      0.03  0.9784           ns
39 q2_birdnet    soil.pc1   -0.114     0.262     -0.43  0.6720           ns
40 q2_birdnet  remote_pc1    0.153     0.148      1.04  0.3223           ns
41  D_birdnet (Intercept)    0.000     0.226      0.00  1.0000           ns
42  D_birdnet          q0   -0.539     0.279     -1.93  0.0797            .
43  D_birdnet        TR.V    0.083     0.267      0.31  0.7608           ns
44  D_birdnet    soil.pc1    0.073     0.262      0.28  0.7858           ns
45  D_birdnet  remote_pc1    0.191     0.148      1.29  0.2234           ns

Show Code

# 3. Create a pivot table for "Quick Look" at Significance
# This shows you which predictors are actually driving which acoustic indices
sig_summary <- detailed_coef_table %>%
  filter(term != "(Intercept)") %>%
  dplyr::select(Response, term, significance) %>%
  tidyr::pivot_wider(names_from = term, values_from = significance)

cat("\n--- SIGNIFICANCE SUMMARY MATRIX ---\n")


--- SIGNIFICANCE SUMMARY MATRIX ---

Show Code

print(sig_summary)

# A tibble: 9 × 5
  Response   q0    TR.V  soil.pc1 remote_pc1
  <chr>      <chr> <chr> <chr>    <chr>     
1 NDSI       ns    **    ns       ns        
2 ADI        *     **    ns       ns        
3 ACI        ns    **    ns       *         
4 SH         .     **    ns       ns        
5 BI         ns    ns    ns       ns        
6 q0_birdnet ns    ns    .        ns        
7 q1_birdnet *     ns    ns       ns        
8 q2_birdnet .     ns    ns       ns        
9 D_birdnet  .     ns    ns       ns

Show Code

# 4. Final Fit Table (Rounded)
final_fit_summary <- fit_table %>%
  mutate(across(where(is.numeric), ~round(., 3)))

cat("\n--- MODEL FIT SUMMARY (R2 & AIC) ---\n")


--- MODEL FIT SUMMARY (R2 & AIC) ---

Show Code

print(final_fit_summary)

# A tibble: 9 × 8
  Response   r.squared adj.r.squared sigma statistic p.value   AIC   BIC
  <chr>          <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl> <dbl>
1 NDSI           0.548         0.384 0.785      3.34   0.051  43.7  48.3
2 ADI            0.618         0.478 0.722      4.44   0.022  41.0  45.6
3 ACI            0.576         0.422 0.76       3.74   0.037  42.6  47.3
4 SH             0.531         0.361 0.8        3.11   0.061  44.3  48.9
5 BI             0.361         0.129 0.933      1.56   0.254  49.2  53.8
6 q0_birdnet     0.328         0.084 0.957      1.34   0.315  50.0  54.6
7 q1_birdnet     0.434         0.228 0.879      2.11   0.148  47.3  51.9
8 q2_birdnet     0.402         0.184 0.903      1.85   0.19   48.2  52.8
9 D_birdnet      0.402         0.184 0.903      1.85   0.19   48.2  52.8

Final Correlation visualisation

Show Code

#("Model: Y ~ q0 + TR.V + soil.pc2 + remote_pc1")

#model_data <- cbind(final_env, final_acoustic)
colnames(model_data)

 [1] "q0.birdnett" "q1.birdnett" "q2.birdnett" "D..birdnett" "q0"         
 [6] "TR.V"        "remote_pc1"  "soil.pc1"    "NDSI."       "ADI."       
[11] "ACI."        "SH."         "BI."

Show Code

cor_data <- model_data %>%
  ungroup() %>%
  dplyr::select(BI.,  NDSI., ADI., ACI.,  q1.birdnett, q0.birdnett, q2.birdnett,D..birdnett, TR.V, soil.pc1, q0, remote_pc1) %>%
  drop_na()

manual_order <- c("BI.",  "NDSI.", "ADI.", "ACI.", "q0.birdnett", "q1.birdnett",
 "q2.birdnett", "D..birdnett",  "TR.V",   "soil.pc1", "q0", "remote_pc1")



cor_data <- model_data %>%
  ungroup() %>%
  dplyr::select(all_of(manual_order)) %>%
  drop_na()


my_sig_circles <- function(data, mapping, ...) {
  x <- eval_data_col(data, mapping$x)
  y <- eval_data_col(data, mapping$y)
  
  # Calculate correlation and p-value
  ct <- cor.test(x, y, use = "complete.obs")
  corr_val <- ct$estimate
  p_val <- ct$p.value
  
  # Logic: Only plot if p < 0.05
  if (p_val < 0.10) {
    # Assign labels based on p-value thresholds
    stars <- ifelse(p_val < 0.001, "***", 
             ifelse(p_val < 0.01, "**", 
             ifelse(p_val < 0.05, "*", "."))) # Dot for p < 0.10
    
    ggplot(data = data.frame(x = 1, y = 1), aes(x = x, y = y)) +
      geom_point(aes(color = corr_val), size = 9) + 
      scale_color_gradient2(low = "#B2182B", mid = "white", high = "#2166AC", 
                            midpoint = 0, limits = c(-1, 1)) +
      geom_text(label = paste0(round(corr_val, 2), "\n", stars),
                size = 2.5, fontface = "bold", color = "black") +
      theme_void() +
      theme(legend.position = "none")
  } else {
    ggplot() + theme_void()
  }
}

ggpairs(cor_data,
        upper = list(continuous = my_sig_circles),
        diag = "blank", 
        lower = "blank") +
 ggplot2:: theme_minimal() +
  theme(
    strip.text.x = element_text(size = 7, face = "bold", angle = 45, hjust = 0),
    strip.text.y = element_text(size = 7, face = "bold",angle = 0),
    panel.border = element_rect(colour = "grey95", fill = NA),
    panel.grid.major = element_blank()
  )