Soil Vegetation Analysis 2026

Libraries

Show Code

# --- 1. CORE DATA MANIPULATION ---
library(tidyverse)    # Includes dplyr, ggplot2, tidyr, readr, stringr, purrr
library(lubridate)    # Date/time parsing (dmy)
library(reshape2)     # For melt() function in correlation heatmaps
library(broom)        # For tidy() and glance() model outputs
library(scales)       # For number_format() and axis scaling

# --- 2. SPATIAL & REMOTE SENSING ---
library(sf)           # Vector data handling
library(terra)        # Raster data handling
library(Kendall)      # Mann-Kendall trend tests
library(trend)        # Sens.slope() calculation

# --- 3. BIODIVERSITY & COMMUNITY ECOLOGY ---
library(vegan)         # RDA, PERMANOVA (adonis2), and Bray-Curtis
library(betapart)      # Beta diversity partitioning (Jaccard/Sorensen)
library(entropart)     # Hill numbers (DivPart, DivProfile) and MetaCommunity
library(iNEXT.3D)      # 3D rarefaction and diversity estimation
library(iNEXT.beta3D)  # Beta diversity rarefaction

# --- 4. MULTIVARIATE ANALYSIS ---
library(FactoMineR)    # Principal Component Analysis (PCA)
library(factoextra)    # Visualization (fviz_pca_var) and eigenvalue extraction

# --- 5. STATISTICAL MODELING & DIAGNOSTICS ---
library(usdm)          # VIF analysis (vifstep)
library(car)           # Levene's test and ANOVA Type II/III
library(rstatix)       # Shapiro-Wilk and Kruskal-Wallis (pipe-friendly)
library(lmtest)        # Breusch-Pagan test (bptest) for heteroscedasticity

# --- 6. VISUALIZATION & REPORTING ---
library(GGally)        # ggpairs for correlation matrices
library(patchwork)     # Combining multiple ggplot objects
library(gridExtra)     # Grid-based plot layouts
library(grid)          # Required for unit() and arrow() functions
library(knitr)         # kable() for clean table outputs in RMarkdown
library(kableExtra)

Soil Data

Show Code

getwd()

[1] "C:/Users/n1227824/OneDrive - Nottingham Trent University/R/R_ldsf.rs.acoustics/scripts"

Show Code

Soil_Data <- read_csv("data/soil.data.csv")
head(Soil_Data)

# A tibble: 6 × 30
  SSN    Job.No Study Site  cluster  plot depth_std depth_top depth_bottom    pH
  <chr>  <chr>  <chr> <chr>   <dbl> <dbl> <chr>         <dbl>        <dbl> <dbl>
1 WA090… ICR-2… NTU … ENAR…       1     1 TOP              NA           NA  6.86
2 WA090… ICR-2… NTU … ENAR…       1     1 SUB              NA           NA  6.97
3 WA090… ICR-2… NTU … ENAR…       1     2 TOP              NA           NA  6.77
4 WA090… ICR-2… NTU … ENAR…       1     2 SUB              NA           NA  6.96
5 WA090… ICR-2… NTU … ENAR…       1     3 TOP              NA           NA  6.67
6 WA090… ICR-2… NTU … ENAR…       1     3 SUB              NA           NA  6.95
# ℹ 20 more variables: `SOC (%)` <dbl>, `TN (%)` <dbl>, `EC  (uS/cm)` <dbl>,
#   `m3.P (mg/kg)` <dbl>, `m3.Al (mg/kg)` <dbl>, `m3.B (mg/kg)` <dbl>,
#   `m3.Ca (mg/kg)` <dbl>, `m3.Fe (mg/kg)` <dbl>, `m3.K (mg/kg)` <dbl>,
#   `m3.Mg (mg/kg)` <dbl>, `m3.Na (mg/kg)` <dbl>, `CEC (cmolc/kg)` <dbl>,
#   `ExAc (cmolc/kg)` <dbl>, PSI <dbl>, `Clay (%)` <dbl>, `Silt (%)` <dbl>,
#   `Sand (%)` <dbl>, Soil.Texture.Interpretation <chr>, Interpretation <lgl>,
#   ...30 <chr>

Show Code

# Select and rename relevant columns
soil_data <- Soil_Data %>%
  dplyr::select(-c(1:4, 30, 29, 28)) %>%
  rename(
    EC = `EC  (uS/cm)`, P = `m3.P (mg/kg)`,      Al = `m3.Al (mg/kg)`, 
    B = `m3.B (mg/kg)`, Ca = `m3.Ca (mg/kg)`,    Fe = `m3.Fe (mg/kg)`, 
    K = `m3.K (mg/kg)`, Mg = `m3.Mg (mg/kg)`,    Na = `m3.Na (mg/kg)`,  
    CEC = `CEC (cmolc/kg)`, ExAc = `ExAc (cmolc/kg)`, TN = `TN (%)`,  
    SOC = `SOC (%)`,  Clay = `Clay (%)`,  Sand = `Sand (%)`, Silt = `Silt (%)`) 

soil_data$cluster <- as.factor(soil_data$cluster)  
soil_data <- soil_data %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0)))  


# Compute the mean for each plot within each cluster
soil_data <- soil_data %>%
  group_by(cluster, plot) %>%
  summarise(across(where(is.numeric), mean, na.rm = TRUE))  

 head (soil_data)

# A tibble: 6 × 22
# Groups:   cluster [1]
  cluster  plot depth_top depth_bottom    pH   SOC     TN    EC     P    Al
  <fct>   <dbl>     <dbl>        <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
1 1           1         0        10     6.89  1.28 0.12    123.  20.1  852.
2 1           2         0        12.7   6.9   1.21 0.117   120.  24.1  860.
3 1           3         0         0     6.81  1.18 0.125   123.  41.2  850.
4 1           4         0        16.5   6.86  1.22 0.12    120.  24.6  856.
5 1           5         0         8.33  7.10  0.96 0.0967  125.  27.0  846.
6 1           6         0        11     6.6   1.07 0.12    105.  28.3  873.
# ℹ 12 more variables: B <dbl>, Ca <dbl>, Fe <dbl>, K <dbl>, Mg <dbl>,
#   Na <dbl>, CEC <dbl>, ExAc <dbl>, PSI <dbl>, Clay <dbl>, Silt <dbl>,
#   Sand <dbl>

Soil pca

To identify the main factors influencing soil properties, I used Principal Component Analysis (PCA) to transform the original 18, correlated variables into a smaller set of uncorrelated Principal Components (PCs), which capture the maximum variance in the data (Souza, 2025). I carried out the PCA using the PCA() function from the FactoMineR package in R (Husson et al., 2024). I determined the significance of each principal component by examining its eigenvalue; components with eigenvalues greater than one were considered meaningful. I examined variable loadings and correlation structures to interpret the ecological significance of each axis.

Show Code

soil.data <- soil_data[,-c(1:4)]

# Perform PCA
pc.results.soil <- PCA(soil.data, scale.unit = TRUE, graph = FALSE)

# Extract eigenvalues
eigenvalues <- pc.results.soil$eig[,1]  
head(eigenvalues)

   comp 1    comp 2    comp 3    comp 4    comp 5    comp 6 
7.5175646 4.4735839 1.9603027 1.2654116 1.0108506 0.4796236

Show Code

# Correlation circle - variable contribution to PCs
fviz_pca_var(pc.results.soil,  col.var = "contrib",gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),  repel = TRUE, title = "", 
             legend.title = list(color = "Contribution Level")) +
              labs(color = "Contribution")

Show Code

# Extract correlation of variables with PCs
correlations <- pc.results.soil$var$coord
head(correlations)

          Dim.1      Dim.2       Dim.3       Dim.4       Dim.5
pH   0.89288765 -0.3198878  0.17780786 -0.10981891 -0.11333255
SOC  0.25929878  0.9409967  0.01981635 -0.01406242  0.08483381
TN   0.05406606  0.9729491 -0.02929538 -0.01689972  0.12645926
EC   0.59203055  0.7034661  0.10441728  0.22294499 -0.17805452
P    0.44298990  0.5502939  0.47522877  0.25119190 -0.07212982
Al  -0.65468650  0.2344754 -0.52590976  0.14447660  0.40048924

Show Code

# Extract PCA scores and cluster info
pca_scores <- data.frame(
  PC1 = pc.results.soil$ind$coord[, 1], PC2 = pc.results.soil$ind$coord[, 2],
  Cluster = as.factor(soil_data$cluster))

# Calculate mean PC1 and PC2 per cluster
cluster_means <- pca_scores %>%
  group_by(Cluster) %>%
  summarise(
    Mean_PC1 = mean(PC1),  Mean_PC2 = mean(PC2))

soil.pc<- cluster_means %>% rename(soil.pc1= Mean_PC1, soil.pc2= Mean_PC2)
head (soil.pc)

# A tibble: 6 × 3
  Cluster soil.pc1 soil.pc2
  <fct>      <dbl>    <dbl>
1 1          0.872   -1.67 
2 2          1.02    -1.04 
3 3          1.66     1.92 
4 4         -2.26     0.904
5 5         -3.06    -0.350
6 6          2.72    -2.01

✅ Interpretation: T#The eigenvalues represent the amount of variance captured
by each principal component (PC).The first five components have eigenvalues greater than 1, suggesting they retain meaningful information.

✅ Interpretation: Cumulative variance: PC1 explains 41.8% of the total variance. PC2 adds 24.9%, bringing the cumulative variance to 66.7% #Variable Contributions to Principal Components (Correlation with PCs)

✅ Interpretation: PC1:These properties are strongly affected by fertilisers especially NPK Strong positive correlations: CEC (0.98), Ca (0.98), Mg (0.95), B (0.84), pH (0.89). High values of these components show high use of the fertilisers and lime. Further, the soils showed strong negative correlation: Al (-0.65), Sand (-0.54), ExAc (-0.51), Fe (-0.49). Suggests PC1 represents cation exchange capacity (CEC) and soil fertility.

✅ Interpretation: PC2:High PC2: Probably in conservation areas and unfarmed hilly areas (due to organic matter accumulation).Strongest positive correlations: TN (0.97), SOC (0.94),EC (0.70), Sand (0.62). Negative correlations Clay (-0.69), Na (-0.60) Indicates PC2 captures organic matter and soil texture gradients.

Soil spatial statistics

To test for multivariate differences in overall soil composition among the 16 clusters, I conducted a permutational multivariate analysis of variance (PERMANOVA) with Euclidean distances, based on standardised soil variables (adonis2 function, vegan package, Oksanen et al., 2025). I identified which specific soil variables exhibited significant spatial variation by applying Kruskal-Wallis tests to each variable individually, using a non-parametric approach due to violations of normality. I subsequently tested for spatial differences in the extracted scores of the first two principal components (PC1 and PC2) among clusters using one-way ANOVA.

Show Code

# Shapiro-Wilk test for all raw soil variables
soil_normality <- soil_data %>%
  pivot_longer(cols = c(pH, SOC, TN, EC, P, Al, B, Ca, Fe, K, Mg, Na, CEC, ExAc, PSI, Clay, Silt, Sand), 
               names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  shapiro_test(Value)

# PERMANOVA Does the overall soil change between clusters?
soil_matrix <- soil_data %>% ungroup() %>% dplyr:: select(-cluster, -plot)
adonis2(soil_matrix ~ cluster, data = soil_data)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = soil_matrix ~ cluster, data = soil_data)
          Df SumOfSqs      R2      F Pr(>F)    
Model     15   1.9487 0.59707 14.028  0.001 ***
Residual 142   1.3151 0.40293                  
Total    157   3.2638 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Kruskal-Wallis Which specific nutrients or textures are different?
soil_kruskal <- soil_data %>%
  pivot_longer(
    cols = c(pH, SOC, TN, EC, P, Al, B, Ca, Fe, K, Mg, Na, CEC, ExAc, PSI, Clay, Silt, Sand),
    names_to = "Variable",
    values_to = "Value"
  ) %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ cluster) %>%
  add_significance() %>%
  filter(p < 0.05) 


soil_kruskal %>%
  kable(caption = "Significant Soil Property Differences Across Clusters") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%", height = "400px")

Significant Soil Property Differences Across Clusters
Variable	.y.	n	statistic	df	p	method	p.signif
Al	Value	158	97.01431	15	0.00e+00	Kruskal-Wallis	****
B	Value	158	71.90914	15	0.00e+00	Kruskal-Wallis	****
CEC	Value	158	94.06721	15	0.00e+00	Kruskal-Wallis	****
Ca	Value	158	94.12593	15	0.00e+00	Kruskal-Wallis	****
Clay	Value	158	76.87263	15	0.00e+00	Kruskal-Wallis	****
EC	Value	158	47.05928	15	3.60e-05	Kruskal-Wallis	****
ExAc	Value	158	44.76172	15	8.35e-05	Kruskal-Wallis	****
Fe	Value	158	39.87837	15	4.73e-04	Kruskal-Wallis	***
K	Value	158	86.76481	15	0.00e+00	Kruskal-Wallis	****
Mg	Value	158	93.31948	15	0.00e+00	Kruskal-Wallis	****
Na	Value	158	85.33213	15	0.00e+00	Kruskal-Wallis	****
P	Value	158	44.24040	15	1.01e-04	Kruskal-Wallis	***
PSI	Value	158	52.21129	15	5.20e-06	Kruskal-Wallis	****
SOC	Value	158	47.75408	15	2.79e-05	Kruskal-Wallis	****
Sand	Value	158	77.46504	15	0.00e+00	Kruskal-Wallis	****
Silt	Value	158	35.38149	15	2.17e-03	Kruskal-Wallis	**
TN	Value	158	53.98273	15	2.60e-06	Kruskal-Wallis	****
pH	Value	158	105.81762	15	0.00e+00	Kruskal-Wallis	****

Show Code

#  PCA Test (PC1 & PC2 acrosee the landscape) ---
summary(aov(PC1 ~ Cluster, data = pca_scores))

             Df Sum Sq Mean Sq F value Pr(>F)    
Cluster      15  733.2   48.88   15.27 <2e-16 ***
Residuals   142  454.6    3.20                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov(PC2 ~ Cluster, data = pca_scores))

             Df Sum Sq Mean Sq F value   Pr(>F)    
Cluster      15  237.9  15.859   4.802 1.83e-07 ***
Residuals   142  468.9   3.302                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

✅ Interpretation:

The PERMANOVA revealed that overall soil composition differed significantly among the 16 clusters (F = 14.03, p = 0.001). The cluster grouping explained 59.7% of the multivariate variance in soil properties. Kruskal-Wallis tests on individual soil variables confirmed that all 18 measured variables exhibited statistically significant spatial variation (all p < 0.01).

Plants Data

Rangelands Plants metacommunity

Following, Marcon et al. (2025) I used the entropart R package to create a MetaCommunity object.These objects integrated abundance data for rangeland plant species—including perennial and annual grasses, shrubs, and trees recorded along 28-m transects—as well as avian community data derived from both traditional point counts and BirdNET automated detections. In this study, each metacommunity was defined as the total collection of species recorded at the landscape level across 16 clusters, with each cluster treated as a local community. These metacommunities served as the computational basis for partitioning diversity, as it allows for the simultaneous calculation of local α (alpha), between-site β (beta) and total landscape γ (gamma) diversity while weighting each community according to its sample size (Marcon et al., 2025).

Show Code

# Read CSV file
rangelands.data <- read_csv("data/rl.data.csv")

head(rangelands.data)

# A tibble: 6 × 163
  year.month lifeform    species    p1    p2    p3    p4    p5    p6    p7    p8
  <chr>      <chr>       <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 01-Apr-24  Perennial … Aristi…     0     0     0     0     0     0     0     0
2 01-Apr-24  Perennial … Bothri…     0     0     0     0     0     1     0     0
3 01-Apr-24  Perennial … Cynodo…    28    25    30    29    30     5     1    26
4 01-Apr-24  Perennial … Eragro…     0     5     0     0     0    21     1     0
5 01-Apr-24  Perennial … N           0     0     0     1     0     3    10     0
6 01-Apr-24  Perennial … Panicu…     0     0     0     0     0     0    11     0
# ℹ 152 more variables: p9 <dbl>, p10 <dbl>, p11 <dbl>, p12 <dbl>, p13 <dbl>,
#   p14 <dbl>, p15 <dbl>, p16 <dbl>, p17 <dbl>, p18 <dbl>, p19 <dbl>,
#   p20 <dbl>, p21 <dbl>, p22 <dbl>, p23 <dbl>, p24 <dbl>, p25 <dbl>,
#   p26 <dbl>, p27 <dbl>, p28 <dbl>, p29 <dbl>, p30 <dbl>, p31 <dbl>,
#   p32 <dbl>, p33 <dbl>, p34 <dbl>, p35 <dbl>, p36 <dbl>, p37 <dbl>,
#   p38 <dbl>, p39 <dbl>, p40 <dbl>, p41 <dbl>, p42 <dbl>, p43 <dbl>,
#   p44 <dbl>, p45 <dbl>, p46 <dbl>, p47 <dbl>, p48 <dbl>, p49 <dbl>, …

Show Code

rangelands.data <- rangelands.data %>%
  #filter(lifeform != "Shrub") %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
filter(!species %in% c("N", "NA"))

rangelands.data %>% 
   dplyr:: select( -year.month, -lifeform)%>%
  #select(!species &!lifeform ) 


  pivot_longer(cols= !species, names_to = "plot", values_to = "freq") %>%
  # filter(rowSums(across(where(is.numeric)))!=0) %>%
  pivot_wider(names_from = species, values_from = freq, values_fn = sum) %>%
  replace(is.na(.), 0) %>% 
  mutate(cluster=paste0(rep(LETTERS[3], 10), rep(1:16, each = 10))) %>% 
  relocate(cluster) %>% 
  pivot_longer(!cluster &!plot, names_to = "species", values_to = "freq") %>% 
  group_by(cluster, species) %>% 
  summarise_if(is.numeric, ~ sum(.)) %>% 
  pivot_wider(names_from = cluster, values_from = freq)%>% 
  relocate(C10:C16, .after = C9)->rangelands.data.MC

mc.rangelands<-MetaCommunity(rangelands.data.MC)
summary(mc.rangelands)

Meta-community (class 'MetaCommunity') made of 35797 individuals in 16 
communities and 132 species. 

Its sample coverage is 0.999804470055454 

Community weights are: 
    C1     C2     C3     C4     C5     C6     C7     C8     C9    C10    C11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   C12    C13    C14    C15    C16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
2286 1869 2352 2321 2046 2100 2648 2105 2294 2557 2452 2009 2127 2614 1509 2508 
Community sample coverages are: 
       C1        C2        C3        C4        C5        C6        C7        C8 
0.9978155 0.9989322 0.9970245 0.9974171 0.9975586 0.9980961 0.9996241 0.9943024 
       C9       C10       C11       C12       C13       C14       C15       C16 
0.9965153 0.9949181 0.9979635 0.9995052 0.9967125 0.9965582 0.9980225 0.9984073

✅ Interpretation: Analysis of the plant community across the 16 clusters revealed significant spatial structuring. The analysis identified a total of 132 species distributed across 16 local communities, with an exceptionally high estimated sample coverage (0.9998), indicating that the survey nearly captured the full extent of the species richness present in the study area.

Rangeland Plants Community Alpha Diversity

I then evaluated the local community structure by calculating theα-diversity for each of the 16 clusters using Hill numbers (q = 0, 1, and 2) via the DivPart function of entropart with the “Best” estimator (Marcon et al., 2025). This provided measures of species richness (q = 0), Shannon-effective diversity (q = 1), and Simpson-effective diversity (q = 2). Diversity profiles were generated using the DivProfile function (entropart package) to visualise the transition from richness to dominance-weighted diversity (Marcon et al., 2025). Additionally, I calculated community evenness as the ratio of Simpson diversity to richness (q2 / q0).

Note: This data frame contains grasses, shrubs and trees collected along the 28 metres transect in each plot

Show Code

#****************************************************************all species in rangeland form

# Alpha diversity for all(q=0,1,2)
alpha.rangelands.0 <- DivPart(q=0, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.0)

HCDT diversity partitioning of order 0 of metaCommunity mc.rangelands

Alpha diversity of communities: 
 C1  C2  C3  C4  C5  C6  C7  C8  C9 C10 C11 C12 C13 C14 C15 C16 
 55  45  50  61  57  50  56  65  47  63  52  50  44  55  48  55 
Total alpha diversity of the communities: 
[1] 53.3125
Beta diversity of the communities: 
    None 
2.475967 
Gamma diversity of the metacommunity: 
None 
 132

Show Code

alpha.rangelands.0$CommunityAlphaDiversities

 C1  C2  C3  C4  C5  C6  C7  C8  C9 C10 C11 C12 C13 C14 C15 C16 
 55  45  50  61  57  50  56  65  47  63  52  50  44  55  48  55

Show Code

alpha.rangelands.1 <- DivPart(q=1, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.1)

HCDT diversity partitioning of order 1 of metaCommunity mc.rangelands

Alpha diversity of communities: 
      C1       C2       C3       C4       C5       C6       C7       C8 
17.63735 12.90104 14.27879 21.82798 20.95318 12.45692 22.71278 18.77973 
      C9      C10      C11      C12      C13      C14      C15      C16 
10.71013 18.63753 17.69965 19.80972 11.77773 15.18085 17.12073 18.86585 
Total alpha diversity of the communities: 
[1] 16.55762
Beta diversity of the communities: 
    None 
1.398844 
Gamma diversity of the metacommunity: 
    None 
23.16152

Show Code

alpha.rangelands.1$CommunityAlphaDiversities

      C1       C2       C3       C4       C5       C6       C7       C8 
17.63735 12.90104 14.27879 21.82798 20.95318 12.45692 22.71278 18.77973 
      C9      C10      C11      C12      C13      C14      C15      C16 
10.71013 18.63753 17.69965 19.80972 11.77773 15.18085 17.12073 18.86585

Show Code

alpha.rangelands.2 <- DivPart(q=2, mc.rangelands, Correction = "Best")
summary(alpha.rangelands.2)

HCDT diversity partitioning of order 2 of metaCommunity mc.rangelands

Alpha diversity of communities: 
       C1        C2        C3        C4        C5        C6        C7        C8 
 8.586473  6.057753  7.667439 11.938755 10.382129  5.226332 14.109708 10.163111 
       C9       C10       C11       C12       C13       C14       C15       C16 
 6.149157 10.222547 11.008562 13.225724  6.249617  7.351418  9.184408 10.778188 
Total alpha diversity of the communities: 
[1] 8.52314
Beta diversity of the communities: 
    None 
1.181552 
Gamma diversity of the metacommunity: 
    None 
10.07053

Show Code

alpha.rangelands.2$CommunityAlphaDiversities

       C1        C2        C3        C4        C5        C6        C7        C8 
 8.586473  6.057753  7.667439 11.938755 10.382129  5.226332 14.109708 10.163111 
       C9       C10       C11       C12       C13       C14       C15       C16 
 6.149157 10.222547 11.008562 13.225724  6.249617  7.351418  9.184408 10.778188

Show Code

data.frame(alpha.rangelands.0$CommunityAlphaDiversities,alpha.rangelands.1$CommunityAlphaDiversitie,alpha.rangelands.2$CommunityAlphaDiversities)->alphas.rangelands

colnames(alphas.rangelands) <- c("q0", "q1", "q2")

# View the result
head(alphas.rangelands)

   q0       q1        q2
C1 55 17.63735  8.586473
C2 45 12.90104  6.057753
C3 50 14.27879  7.667439
C4 61 21.82798 11.938755
C5 57 20.95318 10.382129
C6 50 12.45692  5.226332

Show Code

alphas.rangelands %>%
  rownames_to_column() %>%
  rename(cluster = rowname) %>%
  mutate(cluster = factor(cluster)) %>%
  #bind_rows(tibble(cluster = "C15", q0s = 0, q1s = 0, q2s = 0, D.eve = 0)) %>%
  mutate(cluster = fct_relevel(cluster, paste0("C", 1:16))) %>%
  arrange(cluster) -> alphas.rangelands

head(alphas.rangelands)

  cluster q0       q1        q2
1      C1 55 17.63735  8.586473
2      C2 45 12.90104  6.057753
3      C3 50 14.27879  7.667439
4      C4 61 21.82798 11.938755
5      C5 57 20.95318 10.382129
6      C6 50 12.45692  5.226332

Rangeland Plants Spatial Statistics

To test for significant spatial differences in community composition, a PERMANOVA was performed on the plot-level plant species abundance matrix (n = 160) and at the point station level for birds (n = 96) using Bray–Curtis dissimilarity. Spatial variation in alpha diversity across the 16 clusters was evaluated using one-way ANOVA (stats package).

Show Code

rangelands_plots <- rangelands.data %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
  filter(!species %in% c("N", "NA")) %>%
  dplyr::select(-year.month, -lifeform) %>%
  pivot_longer(cols = !species, names_to = "plot", values_to = "freq") %>%
  group_by(plot, species) %>%
  summarise(freq = sum(freq, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = freq, values_fill = 0) %>%
  # Apply the cluster grouping (10 plots per cluster)
  mutate(cluster = paste0("C", rep(1:16, each = 10))) %>%
  relocate(cluster, plot)

#  META-COMMUNITY & ALPHA DIVERSITY at plot level

mc_data <- rangelands_plots %>%
  ungroup() %>%
  dplyr:: select(-cluster) %>%
  column_to_rownames("plot") %>%
  t() %>%
  as.data.frame()

mc.plots <- MetaCommunity(mc_data)

#Hill Numbers (q=0, 1, 2) per plot
q0_vals <- DivPart(q=0, mc.plots, Correction = "Best")$CommunityAlphaDiversities
q1_vals <- DivPart(q=1, mc.plots, Correction = "Best")$CommunityAlphaDiversities
q2_vals <- DivPart(q=2, mc.plots, Correction = "Best")$CommunityAlphaDiversities

plot_diversity <- rangelands_plots %>%
  dplyr:: select(cluster, plot) %>%
  mutate(q0 = q0_vals, q1 = q1_vals, q2 = q2_vals)

# PERMANOVA (Community Composition) ---
species_matrix <- rangelands_plots %>% ungroup() %>% dplyr:: select(-cluster, -plot)

permanova_result <- adonis2(species_matrix ~ cluster, 
                             data = rangelands_plots, 
                             method = "bray", 
                             permutations = 999)
print(permanova_result)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = species_matrix ~ cluster, data = rangelands_plots, permutations = 999, method = "bray")
          Df SumOfSqs      R2      F Pr(>F)    
Model     15    6.630 0.21198 2.5824  0.001 ***
Residual 144   24.647 0.78802                  
Total    159   31.277 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Normality of Residuals (Shapiro)
plot_diversity_long <- plot_diversity %>%
  pivot_longer(cols = c(q0, q1, q2), names_to = "q_order", values_to = "value")

shapiro_results <- plot_diversity_long %>%
  group_by(q_order) %>%
  shapiro_test(value)

#  Homogeneity of Variance (Levene's Test for q1)
levene_q1 <-leveneTest(q1 ~ cluster, data = plot_diversity)

print(shapiro_results)

# A tibble: 3 × 4
  q_order variable statistic       p
  <chr>   <chr>        <dbl>   <dbl>
1 q0      value        0.990 0.325  
2 q1      value        0.978 0.0109 
3 q2      value        0.969 0.00122

Show Code

print(levene_q1)

Levene's Test for Homogeneity of Variance (center = median)
       Df F value Pr(>F)
group  15  0.8086 0.6667
      144

Show Code

# ANOVA MODELS ---
aov_q0 <- aov(q0 ~ cluster, data = plot_diversity)
aov_q1 <- aov(q1 ~ cluster, data = plot_diversity)
aov_q2 <- aov(q2 ~ cluster, data = plot_diversity)


summary(aov_q0)

             Df Sum Sq Mean Sq F value  Pr(>F)   
cluster      15    976   65.05   2.184 0.00929 **
Residuals   144   4288   29.78                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q1)

             Df Sum Sq Mean Sq F value   Pr(>F)    
cluster      15    511   34.07   4.157 2.58e-06 ***
Residuals   144   1180    8.20                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q2)

             Df Sum Sq Mean Sq F value   Pr(>F)    
cluster      15  351.5  23.431    4.86 1.37e-07 ***
Residuals   144  694.3   4.821                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Run Kruskal-Wallis for all diversity orders to confirm the ANOVA test since q1 and q2 didnt meet the assumption of normality
kruskal_q0 <- kruskal.test(q0 ~ cluster, data = plot_diversity)
kruskal_q1 <- kruskal.test(q1 ~ cluster, data = plot_diversity)
kruskal_q2 <- kruskal.test(q2 ~ cluster, data = plot_diversity)

list(q0 = kruskal_q0, q1 = kruskal_q1, q2 = kruskal_q2)

$q0

    Kruskal-Wallis rank sum test

data:  q0 by cluster
Kruskal-Wallis chi-squared = 30.776, df = 15, p-value = 0.009411


$q1

    Kruskal-Wallis rank sum test

data:  q1 by cluster
Kruskal-Wallis chi-squared = 49.428, df = 15, p-value = 1.493e-05


$q2

    Kruskal-Wallis rank sum test

data:  q2 by cluster
Kruskal-Wallis chi-squared = 54.44, df = 15, p-value = 2.217e-06

Show Code

#ANOVA AND KRUSKAL RESULTS ARE CONSISTENT

✅ Interpretation: One-way ANOVA revealed significant spatial variation in alpha diversity among the 16 clusters across all Hill-number orders (Table 4). Differences were significant for q = 0 (p = 0.009), q = 1 (p < 0.001), and q = 2 (p < 0.001). The strength of the cluster effect increased with q, indicating that dominant‑species diversity varied more strongly across the landscape than the species richness.

Rangeland Plants diversity profiles

Diversity profiles were generated using the DivProfile function (entropart package) to visualise the transition from richness to dominance-weighted diversity (Marcon et al., 2025).

Show Code

dp.rangelands<-DivProfile(seq(0,2,1), mc.rangelands, Correction = "Best", NumberOfSimulations = 10)

Show Code

plot(dp.rangelands)

Rangeland Plants beta diversity

I quantified species turnover (β-diversity) using abundance-based Bray–Curtis dissimilarity via the betapart package(Baselga et al., 2025). The beta diversity analysis, based on the abundance-weighted Bray-Curtis dissimilarity index, provides quantitative evidence of the spatial variation in species composition and abundance structure. The total dissimilarity (β BRAY) is divided into two components: β BRAY.BAL, representing balanced species turnover or replacement, and βBRAY.GRA, representing differences due to species richness and nestedness.

$beta.BRAY.BAL
[1] 0.8182445

$beta.BRAY.GRA
[1] 0.1694376

$beta.BRAY
[1] 0.9876821

✅ Interpretation: The extremely high total dissimilarity (β.BRAY = 0.988) indicates near-complete differentiation in community structure across clusters. This was overwhelmingly driven by species turnover (β.BRAY.BAL = 0.818), with a minor contribution from nestedness resulting from abundance gradients (β.BRAY.GRA = 0.169). The statistical significance of this compositional differentiation was confirmed by PERMANOVA (F₁₅,₁₄₄ = 2.58, p < 0.001; R² = 0.212), which demonstrated that spatial grouping explained a substantial portion of community variation. These results demonstrate that strong species replacement and pronounced abundance gradients jointly shape landscape-level variation, resulting in a highly heterogeneous rangeland environment despite the shared presence of common dominant taxa.

Rangeland Plants Species rank curves

The species rank abundance curves is important to characterise structural shifts in plant communities across the landscape’s 16 clusters.

Show Code

rangelands.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  print(n=96)

Show Code

rangelands.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  pivot_longer(!species, names_to = "cluster", values_to = "freq") %>% 
  filter(freq!=0) %>% 
  group_by(cluster) %>% 
  mutate(rank = rank(desc(freq))) %>% 
  ggplot(aes(x=rank, y=log(freq+1)))+
  geom_line() + 
  geom_point()+
  theme_bw() +
  facet_wrap(.~ cluster)

Shrub volume,cover,biomass (PLOT LEVEL)


Metric	Description	Formula	Units

Total Cover	The sum of the crown area of all shrubs	A=∑π×(length/2)×(width/2)	m²

Total Volume	The sum of the estimated volume of all shrubs	V=∑1/3×A×height	m³

Show Code

#==================== DATA ====================#
shrubsv <- read_csv("data/raw.data.shrub.csv")

shrubsv <- shrubsv %>% 
  mutate(
    Cluster = as.character(Cluster),
    Plot = as.character(Plot)
  )

# Create full grid of all 160 plots (using characters)
all_plots_grid <- expand_grid(
  Cluster = as.character(1:16), 
  Plot = as.character(1:10)
)

# Calculate individual shrub metrics
shrub_df <- shrubsv %>%
  mutate(
    crown_area_m2 = pi * (shlength / 2) * (shwidth / 2),
    volume_m3 = (1/3) * crown_area_m2 * shheight
  )

#  Plot-Level Summary (n=160)
shrub_plot_summary <- shrub_df %>%
  group_by(Cluster, Plot) %>%
  summarise(
    n_shrubs = n(),
    total_cover_m2 = sum(crown_area_m2, na.rm = TRUE),
    total_vol_m3 = sum(volume_m3, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
 
  right_join(all_plots_grid, by = c("Cluster", "Plot")) %>%
  mutate(across(where(is.numeric), ~replace_na(., 0))) %>%
  mutate(
    cover_percent.sh = (total_cover_m2 / 1000) * 100,
    volume_ha.sh = total_vol_m3 * 10,
    density_ha.sh = n_shrubs * 10
  )

# Cluster-Level Summary (n=16)
shrub.cluster_summary <- shrub_plot_summary %>%
  group_by(Cluster) %>%
  summarise(
    SH.C = mean(cover_percent.sh),
    SH.V = mean(volume_ha.sh),
    .groups = 'drop'
  ) %>%
  arrange(as.integer(Cluster))

shrub.volume.cover<- shrub.cluster_summary %>% 
 dplyr::select(Cluster, SH.C, SH.V)

head(shrub.volume.cover)

# A tibble: 6 × 3
  Cluster   SH.C   SH.V
  <chr>    <dbl>  <dbl>
1 1        8.97   591. 
2 2        4.87   339. 
3 3       51.5   3298. 
4 4        9.89   655. 
5 5        8.85   460. 
6 6        0.242   10.3

Tree volume, cover, biomass (PLOT LEVEL)


Metric	Description	Formula	Units

Total Basal Area	The sum of the cross-sectional area of all tree trunks	A=∑[π×(dbh_cm/100)²/4 ]	m²

Total Volume	The sum of the estimated volume of all trees	V=∑A×height×0.5	m³

Total Cover	The sum of the estimated canopy area of all trees	A=∑π×[(dbh_cm/100)]/2)	m²

Show Code

treesv <- read_csv("data/raw.data.trees.csv")
treesv <- treesv %>% mutate(Cluster = as.character(Cluster))


all_clusters <- tibble(Cluster = as.character(1:16))
tree_df_prepared <- treesv %>%
  mutate(
    dbh_cm = trcircumf / pi,
    plot_num = as.integer(Plot),
    cluster_num = as.integer(Cluster),
    Cluster = as.character(Cluster),
    Plot = as.character(Plot)
  )

# Calculate metrics per tree
tree_metrics <- tree_df_prepared %>%
  mutate(
    basal_area_m2 = (pi * (dbh_cm / 100)^2) / 4,
    volume_m3 = basal_area_m2 * trheight * 0.5,
    crown_cover_m2 = pi * ((dbh_cm / 100) / 2)^2
  )

# Summarise metrics per cluster 
tree_cluster_summary <- tree_metrics %>%
  group_by(Cluster) %>%
  summarise(
    total_basal_area.tr = sum(basal_area_m2, na.rm = TRUE),
    total_volume.tr = sum(volume_m3, na.rm = TRUE),
    total_cover.tr = sum(crown_cover_m2, na.rm = TRUE),
    n_trees = n(),
    .groups = 'drop'
  ) %>%
  mutate(
    basal_area_ha.tr = total_basal_area.tr / 100,
    volume_ha.tr = total_volume.tr / 100,
    density_ha.tr = n_trees / 100,
    cover_percent.tr = (total_cover.tr / 1e6) * 100
  )

# Join with full cluster list and replace NA with 0
tree.cluster_summary <- all_clusters %>%
  left_join(tree_cluster_summary, by = "Cluster") %>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


tree_reps <- tree_metrics %>%
  group_by(Cluster, Plot) %>%
  summarise(
    total_ba = sum(basal_area_m2, na.rm = TRUE),
    total_vol = sum(volume_m3, na.rm = TRUE),
    total_cov = sum(crown_cover_m2, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  mutate(
    basal_area_ha.tr = total_ba * 10,  # Assuming 1000m2 plot to ha conversion
    volume_ha.tr = total_vol * 10,
    cover_percent.tr = (total_cov / 1000) * 100
  ) %>%
  right_join(expand_grid(Cluster = as.character(1:16), Plot = as.character(1:10)), 
             by = c("Cluster", "Plot")) %>%
  mutate(across(where(is.numeric), ~replace_na(., 0)))


# Calculate Mean and Standard Error for each cluster
tree.cluster_summary <- tree_reps %>%
  group_by(Cluster) %>%
  summarise(
    # Mean values per hectare
    Mean_BA_ha = mean(basal_area_ha.tr, na.rm = TRUE),
    Mean_Vol_ha = mean(volume_ha.tr, na.rm = TRUE),
    Mean_Cov_pct = mean(cover_percent.tr, na.rm = TRUE),
    
    # Standard Error 
    SE_BA_ha = sd(basal_area_ha.tr, na.rm = TRUE) / sqrt(n()),
    SE_Vol_ha = sd(volume_ha.tr, na.rm = TRUE) / sqrt(n()),
    SE_Cov_pct = sd(cover_percent.tr, na.rm = TRUE) / sqrt(n()),
    
    .groups = 'drop'
  ) %>%
  arrange(as.numeric(Cluster))

head(tree.cluster_summary)

# A tibble: 6 × 7
  Cluster Mean_BA_ha Mean_Vol_ha Mean_Cov_pct SE_BA_ha SE_Vol_ha SE_Cov_pct
  <chr>        <dbl>       <dbl>        <dbl>    <dbl>     <dbl>      <dbl>
1 1          6.50        24.4       0.0650     3.43     14.6      0.0343   
2 2          1.08         2.16      0.0108     0.523     1.02     0.00523  
3 3          2.29         6.83      0.0229     0.691     2.56     0.00691  
4 4          1.71         5.29      0.0171     1.01      2.96     0.0101   
5 5          5.87        12.1       0.0587     2.66      5.57     0.0266   
6 6          0.00796      0.0139    0.0000796  0.00538   0.00925  0.0000538

Show Code

colnames(tree.cluster_summary)

[1] "Cluster"      "Mean_BA_ha"   "Mean_Vol_ha"  "Mean_Cov_pct" "SE_BA_ha"    
[6] "SE_Vol_ha"    "SE_Cov_pct"

Show Code

tree.cluster_summary<-  tree.cluster_summary %>% dplyr:: select(Cluster,  Mean_BA_ha, Mean_Vol_ha, Mean_Cov_pct)
tree.vol.cov.area <-  tree.cluster_summary %>% rename(TR.A= Mean_BA_ha, TR.V=Mean_Vol_ha, TR.C = Mean_Cov_pct)
head (tree.vol.cov.area)

# A tibble: 6 × 4
  Cluster    TR.A    TR.V      TR.C
  <chr>     <dbl>   <dbl>     <dbl>
1 1       6.50    24.4    0.0650   
2 2       1.08     2.16   0.0108   
3 3       2.29     6.83   0.0229   
4 4       1.71     5.29   0.0171   
5 5       5.87    12.1    0.0587   
6 6       0.00796  0.0139 0.0000796

Shrub and tree statististics at plot level

I used non-parametric Kruskal-Wallis tests to assess spatial differences in shrub and tree cover and volume across 16 clusters, following a significant deviation from normality in the data (Shapiro-Wilk, p < 0.001). The distributions were strongly right-skewed and contained zero values corresponding to plots without woody vegetation. I defined statistical significance for all models at α = 0.05.

Show Code

#TEST ASSUMPTIONS
shrub_norm <- shapiro_test(residuals(lm(volume_ha.sh ~ Cluster, data = shrub_plot_summary)))

tree_norm <- shapiro_test(residuals(lm(volume_ha.tr ~ Cluster, data = tree_reps)))

shrub_var <- leveneTest(volume_ha.sh ~ Cluster, data = shrub_plot_summary)
tree_var <- leveneTest(volume_ha.tr ~ Cluster, data = tree_reps)

print(shrub_norm)

# A tibble: 1 × 3
  variable                                                    statistic  p.value
  <chr>                                                           <dbl>    <dbl>
1 residuals(lm(volume_ha.sh ~ Cluster, data = shrub_plot_sum…     0.273 7.88e-25

Show Code

print(tree_norm)

# A tibble: 1 × 3
  variable                                                statistic  p.value
  <chr>                                                       <dbl>    <dbl>
1 residuals(lm(volume_ha.tr ~ Cluster, data = tree_reps))     0.563 6.31e-20

Show Code

#  KRUSKAL-WALLIS

# --- SHRUBS: Volume and Cover ---
shrub_stats <- shrub_plot_summary %>%
  dplyr:: select(Cluster, cover_percent.sh, volume_ha.sh) %>%
  pivot_longer(cols = c(cover_percent.sh, volume_ha.sh), names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ Cluster) %>%
  add_significance()

# --- TREES: Basal Area, Volume, and Cover ---
tree_stats <- tree_reps %>%
  dplyr:: select(Cluster, basal_area_ha.tr, volume_ha.tr, cover_percent.tr) %>%
  pivot_longer(cols = c(basal_area_ha.tr, volume_ha.tr, cover_percent.tr), 
               names_to = "Variable", values_to = "Value") %>%
  group_by(Variable) %>%
  kruskal_test(Value ~ Cluster) %>%
  add_significance()

# Combine results
woody_spatial_stats <- bind_rows(shrub_stats, tree_stats)
print(woody_spatial_stats)

# A tibble: 5 × 8
  Variable         .y.       n statistic    df          p method        p.signif
  <chr>            <chr> <int>     <dbl> <int>      <dbl> <chr>         <chr>   
1 cover_percent.sh Value   160      53.8    15 0.00000287 Kruskal-Wall… ****    
2 volume_ha.sh     Value   160      54.1    15 0.00000254 Kruskal-Wall… ****    
3 basal_area_ha.tr Value   160      45.6    15 0.0000616  Kruskal-Wall… ****    
4 cover_percent.tr Value   160      45.6    15 0.0000616  Kruskal-Wall… ****    
5 volume_ha.tr     Value   160      45.2    15 0.0000708  Kruskal-Wall… ****

✅ Interpretation: The Kruskal-Wallis tests revealed statistically significant spatial variation across all five measured woody vegetation metrics: shrub metrics (percentage cover and stem volume per hectare) and tree metrics (basal area per hectare, stem volume per hectare, and percentage cover) (p < 0.001) (Table 5).

Point count

Birds metacommunity and alpha diversity

Same diversity measures as above. Metacommunity, alpha diversity, beta and gamma diverisity.

Show Code

birds.data <- read_csv("data/point.counts.csv") %>%
  mutate(across(where(is.numeric), ~ replace_na(., 0))) %>%
  filter(!species %in% c("N", "NA"))%>%
  dplyr:: select(-season)
head(birds.data)

# A tibble: 6 × 97
  species         p1    p2    p3    p4    p5    p6    p7    p8    p9   p10   p11
  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Abyssinian …     0     0     0     0     0     0     0     0     0     0     0
2 African Gra…     0     0     0     0     0     0     0     0     0     0     0
3 African Gre…     0     1     0     0     0     0     0     0     0     0     0
4 African Pie…     1     1     0     0     1     0     0     0     0     0     0
5 African Pip…     0     0     0     0     0     0     0     0     0     0     0
6 African Swi…     0     0     0     0     0     0     0     0     0     0     0
# ℹ 85 more variables: p12 <dbl>, p13 <dbl>, p14 <dbl>, p15 <dbl>, p16 <dbl>,
#   p17 <dbl>, p18 <dbl>, p19 <dbl>, p20 <dbl>, p21 <dbl>, p22 <dbl>,
#   p23 <dbl>, p24 <dbl>, p25 <dbl>, p26 <dbl>, p27 <dbl>, p28 <dbl>,
#   p29 <dbl>, p30 <dbl>, p31 <dbl>, p32 <dbl>, p33 <dbl>, p34 <dbl>,
#   p35 <dbl>, p36 <dbl>, p37 <dbl>, p38 <dbl>, p39 <dbl>, p40 <dbl>,
#   p41 <dbl>, p42 <dbl>, p43 <dbl>, p44 <dbl>, p45 <dbl>, p46 <dbl>,
#   p47 <dbl>, p48 <dbl>, p49 <dbl>, p50 <dbl>, p51 <dbl>, p52 <dbl>, …

Show Code

birds.data.MC <- birds.data %>% 
pivot_longer(cols= !species, names_to = "cluster", values_to = "freq") %>%
  # filter(rowSums(across(where(is.numeric)))!=0) %>%
  pivot_wider(names_from = species, values_from = freq, values_fn = sum) %>%
  replace(is.na(.), 0) %>% 
  mutate(cluster=paste0(rep(LETTERS[3], 6), rep(1:16, each = 6))) %>% #for 6 point count in each cluster
  relocate(cluster) %>% 
  pivot_longer(!cluster, names_to = "species", values_to = "freq") %>% 
  group_by(cluster, species) %>% 
  summarise_if(is.numeric, ~ sum(.)) %>% 
  pivot_wider(names_from = cluster, values_from = freq)%>% 
  relocate(C10:C16, .after = C9)


mc.birds <- MetaCommunity(birds.data.MC)
summary(mc.birds)

Meta-community (class 'MetaCommunity') made of 14335 individuals in 16 
communities and 265 species. 

Its sample coverage is 0.997419167541811 

Community weights are: 
    C1     C2     C3     C4     C5     C6     C7     C8     C9    C10    C11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   C12    C13    C14    C15    C16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
  C1   C2   C3   C4   C5   C6   C7   C8   C9  C10  C11  C12  C13  C14  C15  C16 
 924  942  870 1042  789  806  877 1010  947 1073  540  702  795  791  999 1228 
Community sample coverages are: 
       C1        C2        C3        C4        C5        C6        C7        C8 
0.9459155 0.9469642 0.9644207 0.9530211 0.9531630 0.9529121 0.9635587 0.9653700 
       C9       C10       C11       C12       C13       C14       C15       C16 
0.9599044 0.9590369 0.9279492 0.9473745 0.9497487 0.9646624 0.9650010 0.9698843

Show Code

alpha.birds.0 <- DivPart(q=0, mc.birds, Correction = "Best")
alpha.birds.1 <- DivPart(q=1, mc.birds, Correction = "Best")
alpha.birds.2 <- DivPart(q=2, mc.birds, Correction = "Best")


alphas.birds <- data.frame(
  q0b = alpha.birds.0$CommunityAlphaDiversities,
  q1b = alpha.birds.1$CommunityAlphaDiversities,
  q2b = alpha.birds.2$CommunityAlphaDiversities
) %>%
  rownames_to_column("cluster") %>%
  mutate(D.eveb = q2b / q0b,
         cluster = factor(cluster, levels = paste0("C", 1:16))) %>%
  arrange(cluster)

 head(alphas.birds)

  cluster q0b      q1b      q2b    D.eveb
1      C1 138 71.06855 45.20682 0.3275857
2      C2 141 68.15027 42.95914 0.3046748
3      C3 117 61.92999 39.16891 0.3347770
4      C4 137 61.93041 40.05031 0.2923381
5      C5 118 63.86513 44.72455 0.3790216
6      C6 117 59.57104 39.46756 0.3373296

Birds spatial statistics

Show Code

# 1. Prepare data at the Station level (Point Counts)
birds_stations <- birds.data %>%
  # pivot_longer assumes your raw columns are the stations/points
  pivot_longer(cols = !species, names_to = "station", values_to = "freq") %>%
  group_by(station, species) %>%
  summarise(freq = sum(freq, na.rm = TRUE), .groups = "drop") %>%
  pivot_wider(names_from = species, values_from = freq, values_fill = 0) %>%
  # Assign 16 clusters with 6 stations each (ensure your data rows match 16 * 6 = 96)
  mutate(cluster = factor(paste0("C", rep(1:16, each = 6)), 
                          levels = paste0("C", 1:16))) %>%
  relocate(cluster, station)

# 2. Meta-Community & Alpha Diversity at Station Level
# Transpose for entropart: species as rows, stations as columns
mc_birds_stations <- birds_stations %>%
  ungroup() %>%
  dplyr:: select(-cluster) %>%
  column_to_rownames("station") %>%
  t() %>%
  as.data.frame() %>%
  MetaCommunity()

# Hill Numbers (q=0, 1, 2) per station
q0_birds <- DivPart(q=0, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities
q1_birds <- DivPart(q=1, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities
q2_birds <- DivPart(q=2, mc_birds_stations, Correction = "Best")$CommunityAlphaDiversities

# Create diversity dataframe for testing
birds_div_stats <- birds_stations %>%
  dplyr:: select(cluster, station) %>%
  mutate(q0.b = q0_birds, q1.b = q1_birds, q2.b = q2_birds)

# 3. PERMANOVA (Community Composition)
# Does the species mix differ significantly between clusters?
bird_species_matrix <- birds_stations %>% ungroup() %>% dplyr:: select(-cluster, -station)

birds_permanova <- adonis2(bird_species_matrix ~ cluster, 
                           data = birds_stations, 
                           method = "bray", 
                           permutations = 999)
print(birds_permanova)

Permutation test for adonis under reduced model
Permutation: free
Number of permutations: 999

adonis2(formula = bird_species_matrix ~ cluster, data = birds_stations, permutations = 999, method = "bray")
         Df SumOfSqs      R2      F Pr(>F)    
Model    15   4.5142 0.36384 3.0503  0.001 ***
Residual 80   7.8929 0.63616                  
Total    95  12.4071 1.00000                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# 4. Normality and Variance Tests
birds_shapiro <- birds_div_stats %>%
  pivot_longer(cols = c(q0.b, q1.b, q2.b), names_to = "q_order", values_to = "value") %>%
  group_by(q_order) %>%
  shapiro_test(value)

levene_birds <- leveneTest(q1.b ~ cluster, data = birds_div_stats)

print(birds_shapiro)

# A tibble: 3 × 4
  q_order variable statistic      p
  <chr>   <chr>        <dbl>  <dbl>
1 q0.b    value        0.984 0.306 
2 q1.b    value        0.981 0.164 
3 q2.b    value        0.976 0.0771

Show Code

print(levene_birds)

Levene's Test for Homogeneity of Variance (center = median)
      Df F value  Pr(>F)  
group 15  2.0879 0.01873 *
      80                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# 5. ANOVA and Kruskal-Wallis
# Running both to ensure consistency, especially if normality is violated
kruskal_birds <- list(
  q0 = kruskal.test(q0.b ~ cluster, data = birds_div_stats),
  q1 = kruskal.test(q1.b ~ cluster, data = birds_div_stats),
  q2 = kruskal.test(q2.b ~ cluster, data = birds_div_stats)
)

print(kruskal_birds)

$q0

    Kruskal-Wallis rank sum test

data:  q0.b by cluster
Kruskal-Wallis chi-squared = 28.721, df = 15, p-value = 0.01747


$q1

    Kruskal-Wallis rank sum test

data:  q1.b by cluster
Kruskal-Wallis chi-squared = 17.637, df = 15, p-value = 0.2823


$q2

    Kruskal-Wallis rank sum test

data:  q2.b by cluster
Kruskal-Wallis chi-squared = 20.733, df = 15, p-value = 0.1456

Show Code

# ANOVA MODELS ---
aov_q0.b <- aov(q0.b ~ cluster, data =birds_div_stats)
aov_q1.b <- aov(q1.b ~ cluster, data = birds_div_stats)
aov_q2.b <- aov(q2.b ~ cluster, data = birds_div_stats)


summary(aov_q0.b)

            Df Sum Sq Mean Sq F value  Pr(>F)   
cluster     15   1922  128.10    2.64 0.00276 **
Residuals   80   3882   48.53                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

summary(aov_q1.b)

            Df Sum Sq Mean Sq F value Pr(>F)
cluster     15  544.8   36.32   1.469  0.137
Residuals   80 1977.7   24.72

Show Code

summary(aov_q2.b)

            Df Sum Sq Mean Sq F value Pr(>F)  
cluster     15  385.7   25.71   1.594 0.0942 .
Residuals   80 1290.8   16.14                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

✅ Interpretation: One-way ANOVA showed that species richness (q = 0) differed significantly among the 16 clusters, whereas Shannon diversity (q = 1) and Simpson diversity (q = 2) did not show significant spatial differences (Table 6).

Birds beta diversity

Show Code

birds.data.MC %>% 
  pivot_longer(cols = !species, names_to = "cluster", values_to = "freq") %>% 
  mutate(freq = as.integer(freq)) %>% 
  pivot_wider(names_from = cluster, values_from = freq, values_fn = sum) -> MC.beta.birds

MC.beta_freq_birds <- MC.beta.birds %>%
  mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, 0)))

# Jaccard Pairwise Dissimilarity
birds_beta_jac <- beta.pair(t(MC.beta_freq_birds[,-1]), index.family = "jac")

# Abundance-based Beta Diversity Partitioning (Bray-Curtis)
beta.part.birds <- beta.multi.abund(t(MC.beta.birds[,-1]), index.family = "bray")
print(beta.part.birds)

$beta.BRAY.BAL
[1] 0.7058388

$beta.BRAY.GRA
[1] 0.0634806

$beta.BRAY
[1] 0.7693194

Show Code

#  SPECIES RANK ABUNDANCE CURVES ---
#  View the most dominant bird species overall
birds.data.MC %>% 
  arrange(desc(rowSums(across(where(is.numeric))))) %>% 
  print(n = 5)

# A tibble: 265 × 17
  species         C1    C2    C3    C4    C5    C6    C7    C8    C9   C10   C11
  <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Common Bulb…    40    55    74    72    40    18    56    57    59    60    45
2 Ring-necked…    54    46    35    42    39    36    39    41    46    49    19
3 Emerald-spo…    21    40    48    39    21    16    41    40    67    36    10
4 Slate-color…    34    41    31    32    27    38    34    43    57    21    16
5 Northern Fi…    60    37    22    48    22    55    41    39    28    57    20
# ℹ 260 more rows
# ℹ 5 more variables: C12 <dbl>, C13 <dbl>, C14 <dbl>, C15 <dbl>, C16 <dbl>

Show Code

# Generate the Rank-Abundance (Whittaker) Plot
birds.data.MC %>% 
  pivot_longer(!species, names_to = "cluster", values_to = "freq") %>% 
  filter(freq > 0) %>% 
  group_by(cluster) %>% 
  # Create ranks within each cluster
  mutate(rank = rank(desc(freq), ties.method = "first")) %>% 
  ggplot(aes(x = rank, y = log(freq + 1), color = cluster)) +
  geom_line(size = 1) + 
  geom_point(size = 2) +
  labs(title = "Bird Species Rank-Abundance Curves per Cluster",
       subtitle = "Log-transformed frequency vs. Species Rank",
       x = "Rank (Most to least abundant)",
       y = "Log(Frequency + 1)") +
  theme_bw() +
  facet_wrap(~ cluster) +
  theme(legend.position = "none")

Remote sensing

Summary of the remote sensing section in a flow chart image

Show Code

# ================================
# Load and prepare data
# ================================
rs.data.monthly <- read_csv("data/S2_CHIRPS_Per_Tile_Monthly_2019_2025.csv") %>%
  dplyr:: select(BSI, NDVI, SAVI, year, month, precip, fid) %>%
  mutate(across(c(BSI, NDVI, SAVI), ~ na_if(., -9999)))

indices <- c("NDVI", "SAVI", "BSI")
indices_resid <- c("resid_NDVI", "resid_SAVI", "resid_BSI")

# ================================
# Mann-Kendall & Sen's slope (Overall)
# ================================
add_stars <- function(p) {
  if (p <= 0.001) return("***")
  else if (p <= 0.01) return("**")
  else if (p <= 0.05) return("*")
  else return("")
}

mk_results_monthly <- lapply(indices, function(idx) {
  df <- na.omit(data.frame(values = rs.data.monthly[[idx]],
                           years = rs.data.monthly$year + (rs.data.monthly$month - 1)/12))
  mk  <- MannKendall(df$values)
  sen <- sens.slope(df$values)
  
  data.frame(
    Index    = idx,
    Tau      = mk$tau,
    Pval     = mk$sl,
    SenSlope = sen$estimates
  )
}) %>% bind_rows()

mk_results_monthly$Signif <- sapply(mk_results_monthly$Pval, add_stars)
kable(mk_results_monthly, caption = "Mann-Kendall Test with Sen's slope estimates (Monthly, 2019–2025)")

Mann-Kendall Test with Sen’s slope estimates (Monthly, 2019–2025)
	Index	Tau	Pval	SenSlope	Signif
Sen’s slope…1	NDVI	-0.4337198	0.0000000	-0.0002269	***
Sen’s slope…2	SAVI	-0.4337024	0.0000000	-0.0003402	***
Sen’s slope…3	BSI	-0.0290817	0.1211612	-0.0000103

Show Code

# ================================
# Correlation Analysis (Monthly)
# ================================
cor_monthly <- cor(rs.data.monthly[, indices], use = "pairwise.complete.obs")
print(cor_monthly)

          NDVI       SAVI        BSI
NDVI  1.000000  1.0000000 -0.6311220
SAVI  1.000000  1.0000000 -0.6311825
BSI  -0.631122 -0.6311825  1.0000000

Show Code

# Heatmap
melted_cor <- melt(cor_monthly)
ggplot(melted_cor, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1, 1)) +
  theme_minimal(base_size = 13) +
  theme(panel.grid = element_blank()) +
  labs(title = "Correlation between NDVI, SAVI, BSI (Monthly)")

Show Code

# ================================
# RESTREND Analysis (Monthly)
# ================================
compute_residuals <- function(df, index_col, precip_col = "precip") {
  df_clean <- na.omit(df[, c(index_col, precip_col)])
  if (nrow(df_clean) < 3) return(rep(NA, nrow(df)))
  lm_fit <- lm(df_clean[[index_col]] ~ df_clean[[precip_col]])
  residuals_full <- rep(NA, nrow(df))
  residuals_full[!is.na(df[[index_col]]) & !is.na(df[[precip_col]])] <- resid(lm_fit)
  return(residuals_full)
}

# Compute residuals
rs.data.monthly$resid_NDVI <- compute_residuals(rs.data.monthly, "NDVI")
rs.data.monthly$resid_SAVI <- compute_residuals(rs.data.monthly, "SAVI")
rs.data.monthly$resid_BSI  <- compute_residuals(rs.data.monthly, "BSI")

# MK + Sen's slope on residuals
mk_resid_results <- lapply(indices_resid, function(idx) {
  df <- na.omit(data.frame(values = rs.data.monthly[[idx]],
                           time = rs.data.monthly$year + (rs.data.monthly$month-1)/12))
  mk  <- MannKendall(df$values)
  sen <- sens.slope(df$values)
  
  data.frame(
    Index = gsub("resid_", "", idx),
    Tau   = mk$tau,
    Pval  = mk$sl,
    SenSlope = sen$estimates,
    Significance = add_stars(mk$sl)
  )
}) %>% bind_rows()

kable(mk_resid_results, caption = "Rainfall-Adjusted Residual Trend (RESTREND, Monthly, 2019–2025)")

Rainfall-Adjusted Residual Trend (RESTREND, Monthly, 2019–2025)
	Index	Tau	Pval	SenSlope	Significance
Sen’s slope…1	NDVI	-0.4114406	0.000000	-0.0002173	***
Sen’s slope…2	SAVI	-0.4114131	0.000000	-0.0003258	***
Sen’s slope…3	BSI	-0.0408482	0.029477	-0.0000144	*

Show Code

# ================================
# Combined Monthly Trend Plot
# ================================
monthly_long <- rs.data.monthly %>%
  mutate(time = year + (month - 1)/12) %>%
  dplyr:: select(time, NDVI, SAVI, BSI) %>%
  pivot_longer(cols = c(NDVI, SAVI, BSI), names_to = "Index", values_to = "Value") %>%
  na.omit()

monthly_long$Index <- factor(monthly_long$Index, levels = c("NDVI", "SAVI", "BSI"))

sen_slopes <- monthly_long %>%
  group_by(Index) %>%
  summarise(SenSlope = sens.slope(Value)$estimates, .groups = "drop") %>%
  mutate(Label = paste0(Index, " (Sen = ", signif(SenSlope, 3), ")"))

monthly_long_labeled <- monthly_long %>%
  left_join(sen_slopes %>% dplyr:: select(Index, Label), by = "Index")

combined_plot <- ggplot(monthly_long_labeled, aes(x = time, y = Value, color = Label)) +
  geom_line(alpha = 0.8, size = 1) +
  geom_smooth(aes(color = Label), method = "lm", se = FALSE, linetype = "dashed") +
  scale_color_manual(values = c("darkgreen", "darkblue", "darkred")) +
  labs(
    title = "Monthly Trends of NDVI, SAVI, and BSI (2019–2025)",
    x = "Time (Year)", y = "Index Value",
    color = ""
  ) +
  theme_minimal(base_size = 11) +
  theme(
    panel.grid = element_blank(),
    legend.position = "top",
    legend.direction = "horizontal",
    legend.box = "horizontal"
  ) +
  scale_x_continuous(breaks = 2019:2025, labels = 2019:2025)

print(combined_plot)

Show Code

# ================================
# Residual Trend Plot (RESTREND)
# ================================
resid_long <- rs.data.monthly %>%
  mutate(time = year + (month - 1)/12) %>%
  dplyr:: select(time, resid_NDVI, resid_SAVI, resid_BSI) %>%
  pivot_longer(cols = c(resid_NDVI, resid_SAVI, resid_BSI), 
               names_to = "Index", values_to = "Residual") %>%
  na.omit()

resid_long$Index <- factor(
  resid_long$Index, 
  levels = c("resid_NDVI", "resid_SAVI", "resid_BSI"),
  labels = c("NDVI (Residual)", "SAVI (Residual)", "BSI (Residual)")
)

sen_resid_slopes <- resid_long %>%
  group_by(Index) %>%
  summarise(SenSlope = sens.slope(Residual)$estimates, .groups = "drop") %>%
  mutate(Label = paste0(Index, " (Sen = ", signif(SenSlope, 3), ")"))

resid_long_labeled <- resid_long %>%
  left_join(sen_resid_slopes %>%dplyr:: select(Index, Label), by = "Index")

resid_plot <- ggplot(resid_long_labeled, aes(x = time, y = Residual, color = Label)) +
  geom_line(size = 1, alpha = 0.8) +
  geom_smooth(aes(color = Label), method = "lm", se = FALSE, linetype = "dashed", size = 0.9) +
  scale_color_manual(values = c("darkgreen", "darkblue", "darkred")) +
  labs(
    title = "Rainfall-Adjusted Residual Trends (RESTREND)",
    x = "Time (Year)", y = "Residual Index Value",
    color = ""
  ) +
  theme_minimal(base_size = 11) +
  theme(
    panel.grid = element_blank(),
    legend.position = "top",
    legend.direction = "horizontal"
  ) +
  scale_x_continuous(breaks = 2019:2025, labels = 2019:2025)

print(resid_plot)

Show Code

# ================================
# Spatial Analysis by Tile (Summary only - no individual plots)
# ================================
tiles <- unique(rs.data.monthly$fid)


# Mann-Kendall & Sen's Slope per Tile
tile_mk_results <- lapply(tiles, function(tile_id) {
  tile_data <- rs.data.monthly %>% filter(fid == tile_id)
  
  tile_results <- lapply(indices, function(idx) {
    df <- na.omit(data.frame(values = tile_data[[idx]],
                             time = tile_data$year + (tile_data$month - 1)/12))
    if (nrow(df) < 3) {
      return(data.frame(
        Tile = tile_id,
        Index = idx,
        Tau = NA,
        Pval = NA,
        SenSlope = NA,
        N_Obs = nrow(df),
        Trend = "Insufficient data"
      ))
    }
    
    mk <- MannKendall(df$values)
    sen <- sens.slope(df$values)
    
    trend_category <- ifelse(mk$sl > 0.05, "No trend", 
                            ifelse(sen$estimates > 0, "Increasing", "Decreasing"))
    
    data.frame(
      Tile = tile_id,
      Index = idx,
      Tau = mk$tau,
      Pval = mk$sl,
      SenSlope = sen$estimates,
      N_Obs = nrow(df),
      Trend_Category = trend_category
    )
  }) %>% bind_rows()
  
  return(tile_results)
}) %>% bind_rows()

tile_mk_results$Signif <- sapply(tile_mk_results$Pval, function(p) {
  if (is.na(p)) return("")
  if (p <= 0.001) return("***")
  else if (p <= 0.01) return("**")
  else if (p <= 0.05) return("*")
  else return("")
})

tile_mk_results %>%
  kable(caption = "Mann-Kendall Test by Tile (Scrollable)") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%", height = "400px")

Mann-Kendall Test by Tile (Scrollable)
	Tile	Index	Tau	Pval	SenSlope	N_Obs	Trend_Category	Signif
Sen's slope...1	15	NDVI	-0.4819864	0.0000000	-0.0036635	79	Decreasing	***
Sen's slope...2	15	SAVI	-0.4819864	0.0000000	-0.0054941	79	Decreasing	***
Sen's slope...3	15	BSI	0.0107108	0.8922431	0.0000427	79	No trend
Sen's slope...4	10	NDVI	-0.4625122	0.0000000	-0.0038153	79	Decreasing	***
Sen's slope...5	10	SAVI	-0.4625122	0.0000000	-0.0057185	79	Decreasing	***
Sen's slope...6	10	BSI	0.0133074	0.8655348	0.0000766	79	No trend
Sen's slope...7	9	NDVI	-0.4553716	0.0000000	-0.0039241	79	Decreasing	***
Sen's slope...8	9	SAVI	-0.4553716	0.0000000	-0.0058848	79	Decreasing	***
Sen's slope...9	9	BSI	0.0418695	0.5879111	0.0002130	79	No trend
Sen's slope...10	7	NDVI	-0.5040571	0.0000000	-0.0038183	79	Decreasing	***
Sen's slope...11	7	SAVI	-0.5040571	0.0000000	-0.0057269	79	Decreasing	***
Sen's slope...12	7	BSI	-0.0035703	0.9662330	-0.0000158	79	No trend
Sen's slope...13	6	NDVI	-0.3378773	0.0000107	-0.0027521	79	Decreasing	***
Sen's slope...14	6	SAVI	-0.3378773	0.0000107	-0.0041267	79	Decreasing	***
Sen's slope...15	6	BSI	-0.1963648	0.0105601	-0.0011173	79	Decreasing	*
Sen's slope...16	1	NDVI	-0.3995456	0.0000002	-0.0032879	79	Decreasing	***
Sen's slope...17	1	SAVI	-0.3995456	0.0000002	-0.0049304	79	Decreasing	***
Sen's slope...18	1	BSI	-0.0710808	0.3560776	-0.0003594	79	No trend
Sen's slope...19	3	NDVI	-0.4281078	0.0000000	-0.0037655	79	Decreasing	***
Sen's slope...20	3	SAVI	-0.4281078	0.0000000	-0.0056467	79	Decreasing	***
Sen's slope...21	3	BSI	0.0139565	0.8588803	0.0001261	79	No trend
Sen's slope...22	8	NDVI	-0.4969166	0.0000000	-0.0037730	79	Decreasing	***
Sen's slope...23	8	SAVI	-0.4969166	0.0000000	-0.0056592	79	Decreasing	***
Sen's slope...24	8	BSI	0.0009737	0.9932446	0.0000009	79	No trend
Sen's slope...25	16	NDVI	-0.3993671	0.0000002	-0.0031801	80	Decreasing	***
Sen's slope...26	16	SAVI	-0.3993671	0.0000002	-0.0047687	80	Decreasing	***
Sen's slope...27	16	BSI	-0.1050633	0.1690715	-0.0006343	80	No trend
Sen's slope...28	14	NDVI	-0.3755274	0.0000010	-0.0031063	79	Decreasing	***
Sen's slope...29	14	SAVI	-0.3755274	0.0000010	-0.0046577	79	Decreasing	***
Sen's slope...30	14	BSI	-0.0905550	0.2392496	-0.0006675	79	No trend
Sen's slope...31	2	NDVI	-0.4307043	0.0000000	-0.0037510	79	Decreasing	***
Sen's slope...32	2	SAVI	-0.4307043	0.0000000	-0.0056246	79	Decreasing	***
Sen's slope...33	2	BSI	0.0042194	0.9594851	0.0000241	79	No trend
Sen's slope...34	4	NDVI	-0.4930218	0.0000000	-0.0039117	79	Decreasing	***
Sen's slope...35	4	SAVI	-0.4930218	0.0000000	-0.0058661	79	Decreasing	***
Sen's slope...36	4	BSI	0.0483609	0.5309659	0.0002672	79	No trend
Sen's slope...37	11	NDVI	-0.4373417	0.0000000	-0.0036986	80	Decreasing	***
Sen's slope...38	11	SAVI	-0.4373417	0.0000000	-0.0055463	80	Decreasing	***
Sen's slope...39	11	BSI	-0.0056962	0.9436928	-0.0000578	80	No trend
Sen's slope...40	12	NDVI	-0.4066861	0.0000001	-0.0032824	79	Decreasing	***
Sen's slope...41	12	SAVI	-0.4066861	0.0000001	-0.0049218	79	Decreasing	***
Sen's slope...42	12	BSI	-0.1002921	0.1922799	-0.0005700	79	No trend
Sen's slope...43	13	NDVI	-0.4203181	0.0000000	-0.0035241	79	Decreasing	***
Sen's slope...44	13	SAVI	-0.4203181	0.0000000	-0.0052848	79	Decreasing	***
Sen's slope...45	13	BSI	-0.0529049	0.4928401	-0.0003826	79	No trend
Sen's slope...46	5	NDVI	-0.4690036	0.0000000	-0.0038403	79	Decreasing	***
Sen's slope...47	5	SAVI	-0.4690036	0.0000000	-0.0057586	79	Decreasing	***
Sen's slope...48	5	BSI	-0.0269393	0.7284917	-0.0000964	79	No trend

Show Code

# RESTREND Analysis per Tile
tile_restrand_results <- lapply(tiles, function(tile_id) {
  tile_data <- rs.data.monthly %>% filter(fid == tile_id)
  
  tile_results <- lapply(indices_resid, function(idx) {
    df <- na.omit(data.frame(values = tile_data[[idx]],
                             time = tile_data$year + (tile_data$month - 1)/12))
    if (nrow(df) < 3) {
      return(data.frame(
        Tile = tile_id,
        Index = gsub("resid_", "", idx),
        Tau = NA,
        Pval = NA,
        SenSlope = NA,
        N_Obs = nrow(df),
        Trend = "Insufficient data"
      ))
    }
    
    mk <- MannKendall(df$values)
    sen <- sens.slope(df$values)
    
    trend_category <- ifelse(mk$sl > 0.05, "No trend", 
                            ifelse(sen$estimates > 0, "Increasing", "Decreasing"))
    
    data.frame(
      Tile = tile_id,
      Index = gsub("resid_", "", idx),
      Tau = mk$tau,
      Pval = mk$sl,
      SenSlope = sen$estimates,
      N_Obs = nrow(df),
      Trend_Category = trend_category
    )
  }) %>% bind_rows()
  
  return(tile_results)
}) %>% bind_rows()

tile_restrand_results$Signif <- sapply(tile_restrand_results$Pval, function(p) {
  if (is.na(p)) return("")
  if (p <= 0.001) return("***")
  else if (p <= 0.01) return("**")
  else if (p <= 0.05) return("*")
  else return("")
})



tile_restrand_results %>%
  kable(caption = "RESTREND Analysis by Tile (Monthly, 2019–2025)") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  scroll_box(width = "100%", height = "400px")

RESTREND Analysis by Tile (Monthly, 2019–2025)
	Tile	Index	Tau	Pval	SenSlope	N_Obs	Trend_Category	Signif
Sen's slope...1	15	NDVI	-0.4605648	0.0000000	-0.0034746	79	Decreasing	***
Sen's slope...2	15	SAVI	-0.4605648	0.0000000	-0.0052099	79	Decreasing	***
Sen's slope...3	15	BSI	-0.0035703	0.9662330	-0.0000131	79	No trend
Sen's slope...4	10	NDVI	-0.4417397	0.0000000	-0.0036502	79	Decreasing	***
Sen's slope...5	10	SAVI	-0.4417397	0.0000000	-0.0054746	79	Decreasing	***
Sen's slope...6	10	BSI	0.0029211	0.9729836	0.0000124	79	No trend
Sen's slope...7	9	NDVI	-0.4287569	0.0000000	-0.0037443	79	Decreasing	***
Sen's slope...8	9	SAVI	-0.4281078	0.0000000	-0.0056153	79	Decreasing	***
Sen's slope...9	9	BSI	0.0301850	0.6969316	0.0001400	79	No trend
Sen's slope...10	7	NDVI	-0.4761441	0.0000000	-0.0036577	79	Decreasing	***
Sen's slope...11	7	SAVI	-0.4761441	0.0000000	-0.0054849	79	Decreasing	***
Sen's slope...12	7	BSI	-0.0223953	0.7734492	-0.0001138	79	No trend
Sen's slope...13	6	NDVI	-0.3138591	0.0000433	-0.0026105	79	Decreasing	***
Sen's slope...14	6	SAVI	-0.3132100	0.0000449	-0.0039137	79	Decreasing	***
Sen's slope...15	6	BSI	-0.2074002	0.0069160	-0.0011839	79	Decreasing	**
Sen's slope...16	1	NDVI	-0.3722817	0.0000012	-0.0031145	79	Decreasing	***
Sen's slope...17	1	SAVI	-0.3722817	0.0000012	-0.0046706	79	Decreasing	***
Sen's slope...18	1	BSI	-0.0847128	0.2710427	-0.0004762	79	No trend
Sen's slope...19	3	NDVI	-0.4047387	0.0000001	-0.0036202	79	Decreasing	***
Sen's slope...20	3	SAVI	-0.4047387	0.0000001	-0.0054289	79	Decreasing	***
Sen's slope...21	3	BSI	0.0022720	0.9797360	0.0000334	79	No trend
Sen's slope...22	8	NDVI	-0.4741967	0.0000000	-0.0036464	79	Decreasing	***
Sen's slope...23	8	SAVI	-0.4741967	0.0000000	-0.0054679	79	Decreasing	***
Sen's slope...24	8	BSI	-0.0139565	0.8588803	-0.0000597	79	No trend
Sen's slope...25	16	NDVI	-0.3759493	0.0000008	-0.0030128	80	Decreasing	***
Sen's slope...26	16	SAVI	-0.3759493	0.0000008	-0.0045177	80	Decreasing	***
Sen's slope...27	16	BSI	-0.1139240	0.1358240	-0.0007208	80	No trend
Sen's slope...28	14	NDVI	-0.3508601	0.0000048	-0.0029185	79	Decreasing	***
Sen's slope...29	14	SAVI	-0.3508601	0.0000048	-0.0043761	79	Decreasing	***
Sen's slope...30	14	BSI	-0.0983447	0.2010857	-0.0006980	79	No trend
Sen's slope...31	2	NDVI	-0.4099318	0.0000001	-0.0035660	79	Decreasing	***
Sen's slope...32	2	SAVI	-0.4092827	0.0000001	-0.0053475	79	Decreasing	***
Sen's slope...33	2	BSI	-0.0048685	0.9527398	-0.0000288	79	No trend
Sen's slope...34	4	NDVI	-0.4703018	0.0000000	-0.0037747	79	Decreasing	***
Sen's slope...35	4	SAVI	-0.4703018	0.0000000	-0.0056605	79	Decreasing	***
Sen's slope...36	4	BSI	0.0353781	0.6475279	0.0001700	79	No trend
Sen's slope...37	11	NDVI	-0.4183544	0.0000000	-0.0035528	80	Decreasing	***
Sen's slope...38	11	SAVI	-0.4183544	0.0000000	-0.0053281	80	Decreasing	***
Sen's slope...39	11	BSI	-0.0202532	0.7935190	-0.0001263	80	No trend
Sen's slope...40	12	NDVI	-0.3859137	0.0000005	-0.0031461	79	Decreasing	***
Sen's slope...41	12	SAVI	-0.3846154	0.0000005	-0.0047176	79	Decreasing	***
Sen's slope...42	12	BSI	-0.1080818	0.1598834	-0.0006042	79	No trend
Sen's slope...43	13	NDVI	-0.3988965	0.0000002	-0.0033579	79	Decreasing	***
Sen's slope...44	13	SAVI	-0.3988965	0.0000002	-0.0050351	79	Decreasing	***
Sen's slope...45	13	BSI	-0.0632911	0.4114953	-0.0004217	79	No trend
Sen's slope...46	5	NDVI	-0.4456345	0.0000000	-0.0037186	79	Decreasing	***
Sen's slope...47	5	SAVI	-0.4456345	0.0000000	-0.0055756	79	Decreasing	***
Sen's slope...48	5	BSI	-0.0373255	0.6293805	-0.0001685	79	No trend

Show Code

# ================================
# Spatial Heatmaps
# ================================
# Original trends heatmap
ggplot(tile_mk_results, aes(x = factor(Tile), y = Index, fill = SenSlope)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "green", mid = "white", 
                       midpoint = 0, name = "Sen's Slope") +
  geom_text(aes(label = Signif), size = 3, vjust = 1) +
  labs(title = "Spatial Distribution of Sen's Slope by Tile",
       x = "Tile ID", y = "Index") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Show Code

# RESTREND heatmap
ggplot(tile_restrand_results, aes(x = factor(Tile), y = Index, fill = SenSlope)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "green", mid = "white", 
                       midpoint = 0, name = "Sen's Slope (residual)") +
  geom_text(aes(label = Signif), size = 3, vjust = 1) +
  labs(title = "Spatial Distribution of RESTREND Sen's Slope by Tile",
       x = "Tile ID", y = "Index") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Show Code

# ================================
# Summary Statistics
# ================================
trend_summary <- tile_mk_results %>%
  filter(!is.na(Trend_Category)) %>%
  group_by(Index, Trend_Category) %>%
  summarise(
    Count = n(),
    Percentage = round(n() / length(tiles) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(Index, Trend_Category)

restrand_summary <- tile_restrand_results %>%
  filter(!is.na(Trend_Category)) %>%
  group_by(Index, Trend_Category) %>%
  summarise(
    Count = n(),
    Percentage = round(n() / length(tiles) * 100, 1),
    .groups = 'drop'
  ) %>%
  arrange(Index, Trend_Category)

kable(trend_summary, caption = "Trend Summary by Index Across Tiles")

Trend Summary by Index Across Tiles
Index	Trend_Category	Count	Percentage
BSI	Decreasing	1	6.2
BSI	No trend	15	93.8
NDVI	Decreasing	16	100.0
SAVI	Decreasing	16	100.0

Show Code

kable(restrand_summary, caption = "RESTREND Trend Summary by Index Across Tiles")

RESTREND Trend Summary by Index Across Tiles
Index	Trend_Category	Count	Percentage
BSI	Decreasing	1	6.2
BSI	No trend	15	93.8
NDVI	Decreasing	16	100.0
SAVI	Decreasing	16	100.0

Show Code

head (rs.data.monthly)

# A tibble: 6 × 10
      BSI  NDVI  SAVI  year month precip   fid resid_NDVI resid_SAVI resid_BSI
    <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>      <dbl>      <dbl>     <dbl>
1  0.0840 0.456 0.684  2019     1   35.1    15   -0.0183     -0.0275   0.111  
2  0.0114 0.587 0.880  2019     1   35.1    10    0.113       0.169    0.0386 
3 -0.0691 0.660 0.990  2019     1   34.6     9    0.185       0.278   -0.0420 
4 -0.0248 0.603 0.905  2019     1   34.5     7    0.129       0.193    0.00227
5  0.173  0.370 0.554  2019     1   35.1     6   -0.105      -0.157    0.200  
6  0.107  0.467 0.700  2019     1   35.2     1   -0.00775    -0.0117   0.134

Show Code

head(tile_restrand_results)

                Tile Index          Tau         Pval      SenSlope N_Obs
Sen's slope...1   15  NDVI -0.460564762 1.938778e-09 -3.474611e-03    79
Sen's slope...2   15  SAVI -0.460564762 1.938778e-09 -5.209917e-03    79
Sen's slope...3   15   BSI -0.003570269 9.662330e-01 -1.311369e-05    79
Sen's slope...4   10  NDVI -0.441739708 8.545870e-09 -3.650179e-03    79
Sen's slope...5   10  SAVI -0.441739708 8.545870e-09 -5.474568e-03    79
Sen's slope...6   10   BSI  0.002921130 9.729836e-01  1.241231e-05    79
                Trend_Category Signif
Sen's slope...1     Decreasing    ***
Sen's slope...2     Decreasing    ***
Sen's slope...3       No trend       
Sen's slope...4     Decreasing    ***
Sen's slope...5     Decreasing    ***
Sen's slope...6       No trend

Show Code

# Create RESTREND cluster summary
restrand_cluster_df <- tile_restrand_results %>%
  rename(Cluster = Tile) %>%
  dplyr:: select(Cluster, Index, Tau, Pval, SenSlope, Trend_Category) %>%
  pivot_wider(
    names_from = Index,
    values_from = c(Tau, Pval, SenSlope, Trend_Category),
    names_glue = "{Index}_{.value}"
  ) %>%
  arrange(Cluster)
head(restrand_cluster_df)

# A tibble: 6 × 13
  Cluster NDVI_Tau SAVI_Tau  BSI_Tau NDVI_Pval SAVI_Pval BSI_Pval NDVI_SenSlope
    <dbl>    <dbl>    <dbl>    <dbl>     <dbl>     <dbl>    <dbl>         <dbl>
1       1   -0.372   -0.372 -0.0847   1.23e- 6  1.23e- 6  0.271        -0.00311
2       2   -0.410   -0.409 -0.00487  9.17e- 8  9.61e- 8  0.953        -0.00357
3       3   -0.405   -0.405  0.00227  1.33e- 7  1.33e- 7  0.980        -0.00362
4       4   -0.470   -0.470  0.0354   8.80e-10  8.80e-10  0.648        -0.00377
5       5   -0.446   -0.446 -0.0373   6.32e- 9  6.32e- 9  0.629        -0.00372
6       6   -0.314   -0.313 -0.207    4.33e- 5  4.49e- 5  0.00692      -0.00261
# ℹ 5 more variables: SAVI_SenSlope <dbl>, BSI_SenSlope <dbl>,
#   NDVI_Trend_Category <chr>, SAVI_Trend_Category <chr>,
#   BSI_Trend_Category <chr>

Show Code

head(rs.data.monthly)

# A tibble: 6 × 10
      BSI  NDVI  SAVI  year month precip   fid resid_NDVI resid_SAVI resid_BSI
    <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>      <dbl>      <dbl>     <dbl>
1  0.0840 0.456 0.684  2019     1   35.1    15   -0.0183     -0.0275   0.111  
2  0.0114 0.587 0.880  2019     1   35.1    10    0.113       0.169    0.0386 
3 -0.0691 0.660 0.990  2019     1   34.6     9    0.185       0.278   -0.0420 
4 -0.0248 0.603 0.905  2019     1   34.5     7    0.129       0.193    0.00227
5  0.173  0.370 0.554  2019     1   35.1     6   -0.105      -0.157    0.200  
6  0.107  0.467 0.700  2019     1   35.2     1   -0.00775    -0.0117   0.134

Show Code

# Calculate mean residuals per cluster
residuals_cluster <- rs.data.monthly %>%
  group_by(fid) %>%
  summarise(
    NDVI.R = mean(resid_NDVI, na.rm = TRUE),
    SAVI.R = mean(resid_SAVI, na.rm = TRUE),
    BSI.R  = mean(resid_BSI,  na.rm = TRUE)
  ) %>%
  rename(Cluster = fid)
head(residuals_cluster)

# A tibble: 6 × 4
  Cluster   NDVI.R   SAVI.R    BSI.R
    <dbl>    <dbl>    <dbl>    <dbl>
1       1 -0.00347 -0.00521  0.00771
2       2  0.0146   0.0219  -0.0155 
3       3  0.0287   0.0430  -0.0233 
4       4  0.0102   0.0153  -0.0134 
5       5 -0.0339  -0.0509   0.0289 
6       6 -0.0255  -0.0382   0.0239

Show Code

# This tells you if the site is improving or degrading regardless of rain.

residuals_cluster_slope <- tile_restrand_results %>%
  dplyr:: select(Tile, Index, SenSlope) %>%
  pivot_wider(names_from = Index, 
              values_from = SenSlope, 
              names_prefix = "R.S.") %>%
  rename(Cluster = Tile)
head(residuals_cluster_slope)

# A tibble: 6 × 4
  Cluster R.S.NDVI R.S.SAVI    R.S.BSI
    <dbl>    <dbl>    <dbl>      <dbl>
1      15 -0.00347 -0.00521 -0.0000131
2      10 -0.00365 -0.00547  0.0000124
3       9 -0.00374 -0.00562  0.000140 
4       7 -0.00366 -0.00548 -0.000114 
5       6 -0.00261 -0.00391 -0.00118  
6       1 -0.00311 -0.00467 -0.000476

Remote Sensing Time Series Analysis for environmental monitoring, to evaluate vegetation health and soil characteristics across 16 (tiles) from 2019 to 2025, using monthly satellite-derived data. I focus on; NDVI (Normalized Difference Vegetation Index): Measures vegetation “greenness” or health. SAVI (Soil Adjusted Vegetation Index): Similar to NDVI but corrects for soil brightness in areas with sparse vegetation. BSI (Bare Soil Index): Used to identify areas with little to no vegetation (bare soil). Statistical Trend Analysis Mann-Kendall Test: Determines if there is a statistically significant monotonic upward or downward trend. Sen’s Slope: Estimates the magnitude of that trend. RESTREND Analysis (RESTREND stands for Residual Trend Analysis). It performs a linear regression between the indices and rainfall (CHIRPS data). It calculates the Residuals (the difference between what the vegetation should look like based on rain vs. what it actually looks like). If the residuals show a downward trend, it suggests land degradation is occurring due to human activity or other factors independent of rainfall.

Acoustics

Indices

I calculated a suite of standard acoustic indices using Kaleidoscope Pro software (Version 5.8.1; Wildlife Acoustics, Inc.), which provides specialised tools for processing and analysing environmental sound recordings (Bradfer-Lawrence et al., 2019; Greenhalgh et al., 2021; Hyland et al., 2023). I conducted the acoustic indices analysis using the Non-Bat Analysis Mode with default parameter settings to maintain methodological consistency and ensure replicability of results. I batch-processed the audio recordings, segmenting larger files to improve the granularity of the analysis. The software simultaneously calculated the following indices: the Acoustic Complexity Index (ACI), Acoustic Diversity Index (ADI), Acoustic Evenness Index (AEI), Bioacoustics Index (BI), Normalised Difference Soundscape Index (NDSI), and Entropy Index (H).

Show Code

#acoustic indices
acousticindex <- read_csv("data/acousticindex.csv")
head (acousticindex)

# A tibble: 6 × 16
  FOLDER   `IN FILE` CHANNEL OFFSET DURATION DATE  TIME   HOUR  MEAN    SD    SH
  <chr>    <chr>       <dbl>  <dbl>    <dbl> <chr> <tim> <dbl> <dbl> <dbl> <dbl>
1 "Cluste… SMU10315…       0      0       58 10/0… 13:02    21 4741. 4801. 0.771
2 "Cluste… SMU10315…       0      0       58 10/0… 33:02    19 6034. 5882. 0.860
3 "Cluste… SMU10315…       0      0       58 10/0… 03:02    20 6203. 5681. 0.862
4 "Cluste… SMU10315…       0      0       58 10/0… 43:02    18 4211. 3722. 0.847
5 "Cluste… SMU10315…       0      0       58 10/0… 53:02    18 4338. 3406. 0.799
6 "Cluste… SMU10315…       0      0       58 10/0… 23:02    21 5223. 5060. 0.795
# ℹ 5 more variables: NDSI <dbl>, ACI <dbl>, ADI <dbl>, AEI <dbl>, BI <dbl>

Show Code

acoustic.index <- acousticindex %>%
  mutate(
    cluster = paste0("c", parse_number(FOLDER)),
    date_parsed = dmy(DATE), 
    season = format(date_parsed, "%B %Y")
  ) %>%
  dplyr:: select(season, cluster, SH, NDSI, ACI, ADI, AEI, BI)

head(acoustic.index)

# A tibble: 6 × 8
  season     cluster    SH    NDSI   ACI   ADI    AEI    BI
  <chr>      <chr>   <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>
1 April 2024 c10     0.771  0.812   151.  2.02 0.357  101. 
2 April 2024 c10     0.860 -0.0681  153.  2.28 0.111   77.2
3 April 2024 c10     0.862  0.547   153.  2.29 0.0980  71.9
4 April 2024 c10     0.847 -0.310   159.  2.30 0       15.6
5 April 2024 c10     0.799  0.0690  156.  2.29 0.0741  52.8
6 April 2024 c10     0.795  0.852   151.  2.10 0.308   90.2

Show Code

# Calculate mean values by cluster
acoustic_cluster_means <-  acoustic.index %>%
  group_by(cluster) %>%
  summarise(
    SH. = mean(SH, na.rm = TRUE),
    NDSI. = mean(NDSI, na.rm = TRUE),
    ACI. = mean(ACI, na.rm = TRUE),
    ADI. = mean(ADI, na.rm = TRUE),
    AEI. = mean(as.numeric(AEI), na.rm = TRUE),  
    BI. = mean(BI, na.rm = TRUE),
    .groups = 'drop'
  )
head(acoustic_cluster_means)

# A tibble: 6 × 7
  cluster   SH. NDSI.  ACI.  ADI.  AEI.   BI.
  <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 c1      0.674 0.267  168.  1.56 0.525  55.4
2 c10     0.806 0.617  155.  2.17 0.164  53.4
3 c11     0.786 0.586  166.  1.99 0.274  42.0
4 c12     0.823 0.716  159.  2.19 0.154  47.1
5 c13     0.835 0.663  159.  2.17 0.172  46.6
6 c14     0.820 0.792  166.  2.15 0.198  59.4

Birdnet

To identify avian vocalisations within the acoustic recordings, I employed an automated analysis workflow using BirdNET (v2.4.0; Kahl et al., 2023). BirdNET is a deep neural network developed for the automated detection and classification of bird vocalisations, drawing on extensive global reference libraries (Kahl et al., 2021). The model can identify more than 6,000 bird species worldwide based on diagnostic spectral features in the audio signal (Kahl et al., 2023). To ensure comparability across data types, the BirdNET acoustic detections were grouped and summarised by season, cluster, and species to determine the total detection frequency per species. Subsequently, community diversity metrics were calculated following the same procedures used for the vegetation and manual point-count datasets. This consistent application of Hill numbers (q0, q1, q2) and beta diversity partitioning enabled a direct comparison of community structure and spatial heterogeneity across all three survey methods.

Show Code

birdnet_data <- read_csv("data/BirdNET_CombinedTable.csv")
head(birdnet_data)

# A tibble: 6 × 14
  `Start (s)` `End (s)` `Scientific name`   `Common name` Confidence File    lat
        <dbl>     <dbl> <chr>               <chr>              <dbl> <chr> <dbl>
1          11        14 Apus melba          Alpine Swift       0.692 "E:\…    -1
2          44        47 Apus melba          Alpine Swift       0.596 "E:\…    -1
3           3         6 Apus melba          Alpine Swift       0.513 "E:\…    -1
4          18        21 Apus melba          Alpine Swift       0.532 "E:\…    -1
5          25        28 Apus melba          Alpine Swift       0.512 "E:\…    -1
6           6         9 Chalcomitra senega… Scarlet-ches…      0.642 "E:\…    -1
# ℹ 7 more variables: lon <dbl>, week <dbl>, overlap <dbl>, sensitivity <dbl>,
#   min_conf <dbl>, species_list <lgl>, model <chr>

Show Code

birdnet_processed <- birdnet_data %>%
  filter(Confidence > 0.75) %>%#Higher confidence thresholds (0.7-0.8) will reduce the need for manual verification, while potentially missing some rare or poorly-recorded species.
 dplyr::  select(species = `Common name`, File) %>%
  separate(File, 
           into = c("Drive", "Project", "Category", "SeasonFolder",
                    "ClusterFolder", "SubFolder", "FileName"),
           sep = "\\\\", remove = FALSE) %>%
  mutate(
    cluster = paste0("c", str_extract(ClusterFolder, "\\d+")), 
    season = paste(
      str_extract(SeasonFolder, "^[A-Za-z]{3}"),
      str_extract(SeasonFolder, "\\d{2}(?=\\.)|\\d{2}$")
    )
  ) %>%
 dplyr::  select(species, cluster, season)
#group_by(species) %>%   filter(n() <= 10) %>%   ungroup() #Requiring a species to be detected a minimum number of times (e.g., ten detections) before including it in results can help reduce misidentifications. Confidence thresholds around 0.5 provide a balance between precision and recall at the assemblage level, while requiring a minimum number of detections per species reduces false positives from sporadic misidentifications. ##With a threshold of 0.5, the total number of species is 427. Adding a filter to remove the species detected less than 10 times reduces the species to 230.
head(birdnet_processed)

# A tibble: 6 × 3
  species                 cluster season
  <chr>                   <chr>   <chr> 
1 Scarlet-chested Sunbird c1      Oct 25
2 Scarlet-chested Sunbird c1      Oct 25
3 Scarlet-chested Sunbird c1      Oct 25
4 Scarlet-chested Sunbird c1      Oct 25
5 Scarlet-chested Sunbird c1      Oct 25
6 Scarlet-chested Sunbird c1      Oct 25

Show Code

#n_distinct(birdnet_processed$species)
#View(distinct(birdnet_processed, species))



# Convert the long detection list into a wide Community Matrix for meta analysis
birdnet_matrix <- birdnet_processed %>%
  group_by(season, species, cluster) %>%
  summarise(detections = n(), .groups = "drop") %>%
pivot_wider(names_from = cluster, values_from = detections, values_fill = 0) %>%
arrange(season, species)
head (birdnet_matrix)

# A tibble: 6 × 18
  season species       c11   c12   c16    c3    c4    c8    c2   c10   c14    c6
  <chr>  <chr>       <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 Apr 24 Abyssinian…    86   118   216     9     4   294     0     0     0     0
2 Apr 24 African Cr…     0     1     0     0     0     0     0     0     0     0
3 Apr 24 African Fi…     0     0     0     0     0     0     3     0     0     0
4 Apr 24 African Pa…     0     0     0     0     0     1     0     0     0     0
5 Apr 24 African Sc…     2     4   106     2     4   292    24     4    33     2
6 Apr 24 African Wo…     1     1    17     2     0     0     0    10     1    25
# ℹ 6 more variables: c9 <int>, c5 <int>, c1 <int>, c13 <int>, c7 <int>,
#   c15 <int>

Show Code

birdnet_MC_input <- birdnet_processed %>%
  group_by(cluster, species) %>%
  summarise(freq = n(), .groups = "drop") %>%
  pivot_wider(names_from = cluster, values_from = freq, values_fill = 0) %>%
  dplyr:: select(-species)%>%
  relocate(paste0("c", 1:16))
head(birdnet_MC_input)

# A tibble: 6 × 16
     c1    c2    c3    c4    c5    c6    c7    c8    c9   c10   c11   c12   c13
  <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1     9  2218   590   847   179   190  1635  4007     0  1257   957   365   310
2     3    70     2    13     0     0    44    20    96    82     0     0     0
3    43     8     0    91     0     8     5    44    21     0     0     1     0
4     9     0     0     0     0     5     0     0     0     0     0     0     0
5     2  1572  1753  2618   843   105  4317  4684     0   774   695   745   518
6    31   136    95   437    52    28   143   419    51   129    59   264   101
# ℹ 3 more variables: c14 <int>, c15 <int>, c16 <int>

Show Code

mc.birdnet <- MetaCommunity(birdnet_MC_input)
summary(mc.birdnet)

Meta-community (class 'MetaCommunity') made of 94927 individuals in 16 
communities and 284 species. 

Its sample coverage is 0.999357404945198 

Community weights are: 
    c1     c2     c3     c4     c5     c6     c7     c8     c9    c10    c11 
0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 0.0625 
   c12    c13    c14    c15    c16 
0.0625 0.0625 0.0625 0.0625 0.0625 
Community sample numbers of individuals are: 
   c1    c2    c3    c4    c5    c6    c7    c8    c9   c10   c11   c12   c13 
 3771  8325  5973  6195  2936  3481  9412 11128  3134  4515  5722  5403  4730 
  c14   c15   c16 
 7587  2802  9813 
Community sample coverages are: 
       c1        c2        c3        c4        c5        c6        c7        c8 
0.9949632 0.9978381 0.9973216 0.9959649 0.9935307 0.9962663 0.9974504 0.9980232 
       c9       c10       c11       c12       c13       c14       c15       c16 
0.9933005 0.9966789 0.9961558 0.9964841 0.9955610 0.9963099 0.9925084 0.9989811

Show Code

alpha.birdnet.0 <- DivPart(q=0, mc.birdnet, Correction = "Best")
alpha.birdnet.1 <- DivPart(q=1, mc.birdnet, Correction = "Best")
alpha.birdnet.2 <- DivPart(q=2, mc.birdnet, Correction = "Best")


alphas.birdnet <- data.frame(
  q0.birdnett = alpha.birdnet.0$CommunityAlphaDiversities,
  q1.birdnett= alpha.birdnet.1$CommunityAlphaDiversities,
  q2.birdnett= alpha.birdnet.2$CommunityAlphaDiversities
) %>%
  rownames_to_column("cluster") %>%
  mutate(
    D..birdnett = q2.birdnett / q0.birdnett,
    cluster = factor(cluster, levels = paste0("c", 1:16))
  ) %>%
  arrange(cluster)

head(alphas.birdnet)

  cluster q0.birdnett q1.birdnett q2.birdnett D..birdnett
1      c1          78    12.24472    6.555346  0.08404290
2      c2          85    13.90944    7.508378  0.08833386
3      c3          66    10.88466    6.781391  0.10274835
4      c4          93    10.19266    4.591689  0.04937301
5      c5          66    14.50354    7.694577  0.11658450
6      c6          75    11.10612    4.855278  0.06473704

Show Code

MC.beta.birdnet <- birdnet_processed %>%
  group_by(species, cluster) %>%
  summarise(freq = n(), .groups = "drop") %>%
  pivot_wider(names_from = cluster, values_from = freq, values_fill = 0) %>%
  relocate(paste0("c", 1:16), .after = species)


MC.beta_freq_birdnet <- MC.beta.birdnet %>%
  mutate(across(where(is.numeric), ~ ifelse(. > 0, 1, 0)))

birdnet_beta_jac <- beta.pair(t(MC.beta_freq_birdnet[,-1]), index.family = "jac")

beta.part.birdnet <- beta.multi.abund(t(MC.beta.birdnet[,-1]), index.family = "bray")

head(beta.part.birdnet)

$beta.BRAY.BAL
[1] 0.7900691

$beta.BRAY.GRA
[1] 0.07577612

$beta.BRAY
[1] 0.8658452

✅ Interpretation: The analysis of acoustic data across the study sites revealed substantial spatial heterogeneity in both BirdNET’s classification performance and the resulting avian community structures. The BirdNET analysis comprised 94,927 detections across 16 communities, each with equal sample coverage weighting (Wi = 0.0625). A total of 284 species were detected. Overall sample coverage was exceptionally high (0.999), indicating that the sampling effort captured nearly all species present in the regional pool. Individual community coverage values ranged from 0.992 to 0.998, consistently demonstrating near-complete inventory at each site. Total Bray–Curtis dissimilarity was 0.866, partitioned into a balanced variation component (turnover = 0.790) and an abundance gradient component (nestedness = 0.076). This pronounced dominance of turnover over nestedness reveals that compositional differences between communities are driven almost entirely by species replacement rather than differences in species richness or abundance of shared species. The results confirm a highly diverse, well-sampled assemblage in which BirdNET performed reliably, with distinct species assemblages characterising each of the 16 recording locations.

Combined dataset

These data set comprises 11 acoustic metrics: Bioacoustic Index (BI), Normalised Difference Soundscape Index (NDSI), Acoustic Diversity Index (ADI), Acoustic Evenness Index (AEI), Acoustic Complexity Index (ACI), Acoustic Entropy (SH), ultrasonic metrics, and BirdNET-derived avian diversity metrics (q0, q1, q2, D). I initially compiled 17 environmental predictors spanning vegetation structural variables (shrub volume per hectare, tree volume per hectare, percentage cover per hectare, tree basal area), rangeland plant species diversity metrics (calculated using Hill numbers: q0, q1, q2, and evenness D), bird community diversity metrics (Hill’s numbers derived from manual point counts), soil health components (PC1 and PC2), and residual slope values of remotely sensed vegetation indices (Normalized Difference Vegetation Index [NDVI], Soil Adjusted Vegetation Index [SAVI], and Bare Soil Index [BSI]).

Show Code

# source https://www.sthda.com/english/wiki/ggally-r-package-extension-to-ggplot2-for-correlation-matrix-and-survival-plots-r-software-and-data-visualization
# Create the dataset
# https://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/
# #https://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/
#https://www.sthda.com/english/articles/38-regression-model-validation/157-cross-validation-essentials-in-r/
#https://www.sthda.com/english/articles/38-regression-model-validation/158-regression-model-accuracy-metrics-r-square-aic-bic-cp-and-more/
# https://www.sthda.com/english/articles/40-regression-analysis/168-multiple-linear-regression-in-r/
#https://www.sthda.com/english/articles/39-regression-model-diagnostics/160-multicollinearity-essentials-and-vif-in-r/


# Combine all data sources
big.dataset <- data.frame(soil.pc, alphas.rangelands, shrub.volume.cover, 
                          tree.vol.cov.area, residuals_cluster_slope, 
                          alphas.birds, alphas.birdnet, acoustic_cluster_means) %>%
  dplyr::select(-matches("^cluster", ignore.case = TRUE), cluster) %>%
  relocate(cluster)

print(colnames(big.dataset))

 [1] "cluster"     "soil.pc1"    "soil.pc2"    "q0"          "q1"         
 [6] "q2"          "SH.C"        "SH.V"        "TR.A"        "TR.V"       
[11] "TR.C"        "R.S.NDVI"    "R.S.SAVI"    "R.S.BSI"     "q0b"        
[16] "q1b"         "q2b"         "D.eveb"      "q0.birdnett" "q1.birdnett"
[21] "q2.birdnett" "D..birdnett" "SH."         "NDSI."       "ACI."       
[26] "ADI."        "AEI."        "BI."

Correlation matrix

I visualised exploratory relationships among variables using scatterplot matrices implemented with the ggpairs function (GGally package, Schloerke et al., 2025) facilitating assessment of distributional properties, non-linear patterns, outliers, and preliminary multicollinearity.

Show Code

# Correlation matrix
big.dataset %>% 
  dplyr::select(where(is.numeric)) %>%
  ggpairs(
  big.dataset %>% 
    dplyr::select(where(is.numeric)), upper = list(continuous = GGally::wrap("cor", size = 2)), 
    lower = list(continuous = GGally::wrap("points", size = 0.3)),  
    diag = list(continuous = GGally::wrap("densityDiag", size = 0.3))) +
    theme(text = element_text(size = 6))

PCA and Scaling

To address high multicollinearity among the three remote sensing indices (Pearson’s |r| > 0.96), I performed a Principal Component Analysis (PCA) using the FactoMineR package (Husson et al., 2026). I retained the first principal component (remote_pc1) as a composite proxy for remotely sensed vegetation and soil health. This primary axis captured 98.4% of the total variance, reducing the environmental predictor set from 18 to 16 variables.

I scaled all environmental and acoustic variables to zero mean and unit variance using the scale() function to ensure comparability of coefficients.

Show Code

# Variable Groups
rangelands.div  <- c("q0", "q1", "q2")
veg.structure   <- c("SH.C", "SH.V", "TR.A", "TR.V", "TR.C")
remote.sensing  <- c("R.S.NDVI", "R.S.SAVI", "R.S.BSI")
birds.div       <- c("q0b", "q1b", "q2b", "D.eveb")
soil.v          <- c("soil.pc1", "soil.pc2")

acoustic_indices <- c("NDSI.", "ADI.", "ACI.", "SH.", "BI.")
birdnet_vars <- c("q0.birdnett", "q1.birdnett", "q2.birdnett", "D..birdnett")

# Remote sensing PCA and Scaling
# -----------------------------------------------------------------
# Prepare remote sensing data for pca
remote_data_clean <- big.dataset[, remote.sensing]
remote_data_clean <- remote_data_clean[complete.cases(remote_data_clean), ]
res.pca.remote <- PCA(remote_data_clean, scale.unit = TRUE, graph = FALSE)
big.dataset$remote_pc1 <- res.pca.remote$ind$coord[, 1]
big.dataset <- big.dataset[, !(colnames(big.dataset) %in% remote.sensing)]

fviz_pca_var(res.pca.remote, col.var = "contrib", 
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             title = "Remote Sensing Variable Contribution")

Show Code

# scaling list 
env_to_scale <- c(rangelands.div, veg.structure, birds.div, soil.v, "remote_pc1")

# Scaling environmental variables
scaled_env_part <- scale(big.dataset[, env_to_scale])
scaled_env <- as.data.frame(scaled_env_part)

# Scaling acoustic metrics
scaled_acoustic <- as.data.frame(scale(big.dataset[, acoustic_indices]))
scaled_birdnet  <- as.data.frame(scale(big.dataset[, birdnet_vars]))

VIF

I evaluated multicollinearity among the 16 remaining predictors using the Variance Inflation Factor (VIF) with the vifstep function in the usdmpackage (Naimi, 2023). I applied a threshold of VIF < 3 for variable retention. I first examined VIF within each predictor group separately, then conducted a global VIF analysis on all scaled predictors simultaneously. In addition to VIF-based variable selection, I evaluated the necessity of variable removal by comparing the Global Permutation Test and adjusted R² across successive model iterations. The final model demonstrated an improved adjusted R² (0.299) and a more significant global p-value (0.006) relative to the saturated model. This iterative process yielded a final set of four robust predictors: rangeland plant richness (q0), tree volume per hectare, soil properties principal component 2, and the remote sensing composite (remote_pc1).

Show Code

# Variance Inflation Factor (VIF)
#--------------------------------
print("===== VIF ANALYSIS =====")

[1] "===== VIF ANALYSIS ====="

Show Code

vif_rangelands <- vifstep(big.dataset[, rangelands.div], th = 3)
print(vif_rangelands)

1 variables from the 3 input variables have collinearity problem: 
 
q1 

After excluding the collinear variables, the linear correlation coefficients ranges between: 
min correlation ( q2 ~ q0 ):  0.5388536 
max correlation ( q2 ~ q0 ):  0.5388536 

---------- VIFs of the remained variables -------- 
  Variables      VIF
1        q0 1.409172
2        q2 1.409172

Show Code

vif_veg <- vifstep(big.dataset[, veg.structure], th = 3)
print(vif_veg)

3 variables from the 5 input variables have collinearity problem: 
 
TR.A SH.V TR.C 

After excluding the collinear variables, the linear correlation coefficients ranges between: 
min correlation ( TR.V ~ SH.C ):  -0.06663559 
max correlation ( TR.V ~ SH.C ):  -0.06663559 

---------- VIFs of the remained variables -------- 
  Variables     VIF
1      SH.C 1.00446
2      TR.V 1.00446

Show Code

vif_birds <- vifstep(big.dataset[, birds.div], th = 3)
print(vif_birds)

2 variables from the 4 input variables have collinearity problem: 
 
q2b q0b 

After excluding the collinear variables, the linear correlation coefficients ranges between: 
min correlation ( D.eveb ~ q1b ):  0.3966637 
max correlation ( D.eveb ~ q1b ):  0.3966637 

---------- VIFs of the remained variables -------- 
  Variables      VIF
1       q1b 1.186721
2    D.eveb 1.186721

Show Code

vif_soil <- vifstep(big.dataset[, soil.v], th = 3)
print(vif_soil)

No variable from the 2 input variables has collinearity problem. 

The linear correlation coefficients ranges between: 
min correlation ( soil.pc2 ~ soil.pc1 ):  -0.3461851 
max correlation ( soil.pc2 ~ soil.pc1 ):  -0.3461851 

---------- VIFs of the remained variables -------- 
  Variables      VIF
1  soil.pc1 1.136162
2  soil.pc2 1.136162

Show Code

# Run VIF on ALL scaled predictors together (global)
vif_scaled <- vifstep(scaled_env, th = 3)
print(vif_scaled)

7 variables from the 15 input variables have collinearity problem: 
 
TR.A q2b SH.V q1b q1 TR.C q2 

After excluding the collinear variables, the linear correlation coefficients ranges between: 
min correlation ( D.eveb ~ TR.V ):  0.02491861 
max correlation ( soil.pc2 ~ SH.C ):  0.5142444 

---------- VIFs of the remained variables -------- 
   Variables      VIF
1         q0 1.786894
2       SH.C 2.603972
3       TR.V 1.712278
4        q0b 2.446289
5     D.eveb 1.782379
6   soil.pc1 1.986627
7   soil.pc2 2.375860
8 remote_pc1 1.338640

Show Code

# Extract variables that survived VIF
clean_env_vars <- vif_scaled@results$Variables

# Variables to force-remove
vars_to_remove <- c("D.eveb", "q0b", "SH.C", "soil.pc1")
clean_env_vars <- setdiff(clean_env_vars, vars_to_remove)

#Environmental predictor variables
final_env <- scaled_env[, clean_env_vars, drop = FALSE]

head(final_env)

          q0        TR.V   soil.pc2 remote_pc1
1  0.2719800  3.04090602 -1.3264940 -0.3471401
2 -1.3397532 -0.51411776 -0.8335848 -0.7118992
3 -0.5338866  0.23227723  1.5026787 -1.0068042
4  1.2390199 -0.01439318  0.7031248 -0.6150020
5  0.5943266  1.07990837 -0.2874805  2.3627116
6 -0.5338866 -0.85707142 -1.6012977  0.7598561

Show Code

#In addition to VIF-based variable selection, the necessity of variable removal was evaluated by comparing the Global Permutation Test and Adjusted R^2 across successive model iterations. The final model demonstrated an improved Adjusted R^2(0.299) and a more significant global p-value (0.006) relative to the saturated model. These improvements indicate that the excluded variables (e.g., soil.pc1) did not provide independent explanatory power beyond the retained primary predictors such as TR.V, and their removal reduced redundancy while enhancing model parsimony.
# Put indices and birdnet results in 1 data frame


#Acoustic response variables
clean_ac_vars <- colnames(scaled_acoustic)
clean_bird_vars <- colnames(scaled_birdnet)

final_acoustic <- cbind(scaled_acoustic[, clean_ac_vars, drop = FALSE],
  scaled_birdnet[, clean_bird_vars, drop = FALSE])
head(final_acoustic)

        NDSI.       ADI.         ACI.        SH.        BI. q0.birdnett
1 -2.37098306 -2.9672595  2.293767742 -3.1912234  0.2584496  -0.3611739
2  0.02677755  0.7088839 -0.906979026  0.3346851  0.0543062   0.2337007
3 -0.18638363 -0.4052535  1.731067946 -0.2199623 -1.1249175  -1.3809589
4  0.70777989  0.8271282  0.004255906  0.7711956 -0.6026690   0.9135574
5  0.34211520  0.7033138  0.016368253  1.0935979 -0.6555527  -1.3809589
6  1.23385742  0.5891985  1.742629777  0.7069498  0.6626209  -0.6161201
  q1.birdnett q2.birdnett D..birdnett
1 -0.41475752 -0.37847874  -0.3086470
2 -0.09068099 -0.11750194  -0.2090861
3 -0.67952459 -0.31657900   0.1253663
4 -0.81423854 -0.91620379  -1.1130757
5  0.02497373 -0.06651349   0.4463997
6 -0.63641248 -0.84402309  -0.7565915

Regression

I fitted individual multiple linear regression models using the lm() function to assess the influence of the four retained environmental predictors on each of the nine acoustic response variables (five acoustic indices and four BirdNET-derived metrics).

Show Code

#  REGRESSION MODELS WITH VIF-SELECTED VARIABLES
#================================================================
#("Model: Y ~ q0 + TR.V + soil.pc2 + remote_pc1")

model_data <- cbind(final_env, final_acoustic)

# ----- Acoustic indices models -----
model_NDSI <- lm(NDSI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_ADI <- lm(ADI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_ACI <- lm(ACI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_SH <- lm(SH. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_BI <- lm(BI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

# ----- BirdNET metrics models -----
model_q0_birdnet <- lm(q0.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_q1_birdnet <- lm(q1.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_q2_birdnet <- lm(q2.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)
model_D_birdnet <- lm(D..birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

# Print summaries with titles
cat("\n--- SUMMARY: NDSI MODEL ---\n"); print(summary(model_NDSI))


--- SUMMARY: NDSI MODEL ---


Call:
lm(formula = NDSI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.73060 -0.02539  0.28625  0.37543  0.55863 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -2.562e-16  1.904e-01   0.000  1.00000   
q0           3.721e-01  2.297e-01   1.620  0.13350   
TR.V        -8.230e-01  2.207e-01  -3.730  0.00333 **
soil.pc2    -1.289e-02  2.127e-01  -0.061  0.95275   
remote_pc1   1.946e-01  2.028e-01   0.960  0.35777   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7615 on 11 degrees of freedom
Multiple R-squared:  0.5748,    Adjusted R-squared:  0.4201 
F-statistic: 3.717 on 4 and 11 DF,  p-value: 0.03773

Show Code

cat("\n--- SUMMARY: ADI MODEL ---\n"); print(summary(model_ADI))


--- SUMMARY: ADI MODEL ---


Call:
lm(formula = ADI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.81116 -0.27556 -0.02375  0.22698  1.29602 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.693e-17  1.445e-01   0.000 1.000000    
q0           3.192e-01  1.743e-01   1.832 0.094212 .  
TR.V        -7.629e-01  1.674e-01  -4.556 0.000822 ***
soil.pc2     2.443e-01  1.614e-01   1.514 0.158263    
remote_pc1   4.889e-01  1.539e-01   3.177 0.008807 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5778 on 11 degrees of freedom
Multiple R-squared:  0.7552,    Adjusted R-squared:  0.6661 
F-statistic: 8.481 on 4 and 11 DF,  p-value: 0.002244

Show Code

cat("\n--- SUMMARY: ACI MODEL ---\n"); print(summary(model_ACI))


--- SUMMARY: ACI MODEL ---


Call:
lm(formula = ACI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.1813 -0.3676 -0.1190  0.1897  1.9624 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept) -1.512e-15  2.375e-01   0.000    1.000  
q0          -2.432e-01  2.865e-01  -0.849    0.414  
TR.V         5.718e-01  2.752e-01   2.077    0.062 .
soil.pc2    -1.603e-01  2.653e-01  -0.604    0.558  
remote_pc1  -1.530e-01  2.529e-01  -0.605    0.558  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9499 on 11 degrees of freedom
Multiple R-squared:  0.3384,    Adjusted R-squared:  0.09776 
F-statistic: 1.406 on 4 and 11 DF,  p-value: 0.2951

Show Code

cat("\n--- SUMMARY: SH MODEL ---\n"); print(summary(model_SH))


--- SUMMARY: SH MODEL ---


Call:
lm(formula = SH. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.82021 -0.35331  0.03974  0.24534  0.95021 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept) -3.297e-16  1.457e-01   0.000  1.00000   
q0           2.552e-01  1.758e-01   1.451  0.17464   
TR.V        -7.073e-01  1.689e-01  -4.187  0.00152 **
soil.pc2     3.411e-01  1.628e-01   2.095  0.06008 . 
remote_pc1   4.958e-01  1.552e-01   3.194  0.00855 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5829 on 11 degrees of freedom
Multiple R-squared:  0.7508,    Adjusted R-squared:  0.6602 
F-statistic: 8.285 on 4 and 11 DF,  p-value: 0.002461

Show Code

cat("\n--- SUMMARY: BI MODEL ---\n"); print(summary(model_BI))


--- SUMMARY: BI MODEL ---


Call:
lm(formula = BI. ~ q0 + TR.V + soil.pc2 + remote_pc1, data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9004 -0.4003  0.1368  0.4109  1.2044 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept) -2.887e-16  2.069e-01   0.000   1.0000  
q0           3.732e-01  2.496e-01   1.495   0.1630  
TR.V        -4.395e-01  2.398e-01  -1.833   0.0940 .
soil.pc2    -7.016e-01  2.311e-01  -3.036   0.0113 *
remote_pc1  -1.708e-01  2.203e-01  -0.775   0.4545  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8275 on 11 degrees of freedom
Multiple R-squared:  0.4979,    Adjusted R-squared:  0.3153 
F-statistic: 2.727 on 4 and 11 DF,  p-value: 0.08454

Show Code

cat("\n--- SUMMARY: BIRDNET q0 (Richness) ---\n"); print(summary(model_q0_birdnet))


--- SUMMARY: BIRDNET q0 (Richness) ---


Call:
lm(formula = q0.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, 
    data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.6438 -0.4540  0.0720  0.5325  1.5422 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.560e-16  2.553e-01   0.000    1.000
q0           1.674e-01  3.080e-01   0.543    0.598
TR.V        -3.138e-01  2.959e-01  -1.061    0.312
soil.pc2    -8.718e-02  2.852e-01  -0.306    0.766
remote_pc1  -3.786e-01  2.719e-01  -1.392    0.191

Residual standard error: 1.021 on 11 degrees of freedom
Multiple R-squared:  0.2354,    Adjusted R-squared:  -0.04264 
F-statistic: 0.8466 on 4 and 11 DF,  p-value: 0.5243

Show Code

cat("\n--- SUMMARY: BIRDNET q1 (Shannon) ---\n"); print(summary(model_q1_birdnet))


--- SUMMARY: BIRDNET q1 (Shannon) ---


Call:
lm(formula = q1.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, 
    data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.0720 -0.4678 -0.1296  0.4344  2.1233 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  6.476e-17  2.255e-01   0.000   1.0000  
q0          -6.893e-01  2.721e-01  -2.533   0.0278 *
TR.V         7.235e-02  2.614e-01   0.277   0.7871  
soil.pc2     1.318e-01  2.520e-01   0.523   0.6113  
remote_pc1   1.891e-01  2.402e-01   0.787   0.4479  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9022 on 11 degrees of freedom
Multiple R-squared:  0.4031,    Adjusted R-squared:  0.1861 
F-statistic: 1.857 on 4 and 11 DF,  p-value: 0.1883

Show Code

cat("\n--- SUMMARY: BIRDNET q2 (Simpson) ---\n"); print(summary(model_q2_birdnet))


--- SUMMARY: BIRDNET q2 (Simpson) ---


Call:
lm(formula = q2.birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, 
    data = model_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.9251 -0.6626 -0.1216  0.4746  1.9605 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  9.057e-18  2.322e-01   0.000    1.000  
q0          -6.829e-01  2.802e-01  -2.437    0.033 *
TR.V         1.176e-01  2.692e-01   0.437    0.671  
soil.pc2     2.306e-01  2.594e-01   0.889    0.393  
remote_pc1   1.290e-01  2.474e-01   0.522    0.612  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9289 on 11 degrees of freedom
Multiple R-squared:  0.3672,    Adjusted R-squared:  0.1371 
F-statistic: 1.596 on 4 and 11 DF,  p-value: 0.2437

Show Code

cat("\n--- SUMMARY: BIRDNET D (Berger-Parker) ---\n"); print(summary(model_D_birdnet))


--- SUMMARY: BIRDNET D (Berger-Parker) ---


Call:
lm(formula = D..birdnett ~ q0 + TR.V + soil.pc2 + remote_pc1, 
    data = model_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.08805 -0.54984 -0.00289  0.20929  1.53248 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  4.152e-17  2.153e-01   0.000   1.0000  
q0          -7.502e-01  2.598e-01  -2.887   0.0148 *
TR.V         2.266e-01  2.496e-01   0.908   0.3833  
soil.pc2     3.249e-01  2.406e-01   1.351   0.2040  
remote_pc1   2.882e-01  2.294e-01   1.256   0.2350  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8614 on 11 degrees of freedom
Multiple R-squared:  0.4559,    Adjusted R-squared:  0.258 
F-statistic: 2.304 on 4 and 11 DF,  p-value: 0.1234

Show Code

model_list <- list(NDSI = model_NDSI, ADI = model_ADI,ACI = model_ACI,  SH = model_SH,  BI = model_BI,q0_birdnet = model_q0_birdnet,q1_birdnet = model_q1_birdnet,q2_birdnet = model_q2_birdnet,D_birdnet = model_D_birdnet)

# Extract coefficients
coef_table <- do.call(rbind, lapply(names(model_list), function(name) {
  tidy(model_list[[name]]) %>%
    mutate(Response = name) %>%
    dplyr::select(Response, term, estimate, std.error, statistic, p.value)
}))


cat("\n--- SUMMARY: coef_table ---\n"); print(summary(coef_table))


--- SUMMARY: coef_table ---

   Response             term              estimate          std.error     
 Length:45          Length:45          Min.   :-0.82300   Min.   :0.1445  
 Class :character   Class :character   1st Qu.:-0.17083   1st Qu.:0.2028  
 Mode  :character   Mode  :character   Median : 0.00000   Median :0.2375  
                                       Mean   :-0.03429   Mean   :0.2285  
                                       3rd Qu.: 0.23055   3rd Qu.:0.2598  
                                       Max.   : 0.57179   Max.   :0.3080  
   statistic           p.value         
 Min.   :-4.55617   Min.   :0.0008218  
 1st Qu.:-0.77526   1st Qu.:0.0939884  
 Median : 0.00000   Median :0.3833199  
 Mean   :-0.08767   Mean   :0.4345601  
 3rd Qu.: 0.95981   3rd Qu.:0.7655387  
 Max.   : 3.19355   Max.   :1.0000000

Show Code

# Extract model fit statistics
fit_table <- do.call(rbind, lapply(names(model_list), function(name) {
  glance(model_list[[name]]) %>%
    mutate(Response = name) %>%
   dplyr:: select(Response, r.squared, adj.r.squared, sigma, statistic, p.value, AIC, BIC)
}))

print(fit_table)

# A tibble: 9 × 8
  Response   r.squared adj.r.squared sigma statistic p.value   AIC   BIC
  <chr>          <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl> <dbl>
1 NDSI           0.575        0.420  0.761     3.72  0.0377   42.7  47.3
2 ADI            0.755        0.666  0.578     8.48  0.00224  33.9  38.5
3 ACI            0.338        0.0978 0.950     1.41  0.295    49.8  54.4
4 SH             0.751        0.660  0.583     8.29  0.00246  34.1  38.8
5 BI             0.498        0.315  0.827     2.73  0.0845   45.4  50.0
6 q0_birdnet     0.235       -0.0426 1.02      0.847 0.524    52.1  56.7
7 q1_birdnet     0.403        0.186  0.902     1.86  0.188    48.1  52.8
8 q2_birdnet     0.367        0.137  0.929     1.60  0.244    49.1  53.7
9 D_birdnet      0.456        0.258  0.861     2.30  0.123    46.6  51.3

Model diagnostic

Model diagnostics included residual normality (Shapiro–Wilk test, Q–Q plots), homoscedasticity (Breusch–Pagan test), and identification of influential observations (Cook’s distance threshold 4/n). I verified that all models met the assumptions of linear regression.

Show Code

# MODEL DIAGNOSTICS
# ================================================================
diag_results <- list()
par(mfrow = c(2, 3))

for(i in 1:ncol(final_acoustic)) {
  var_name <- colnames(final_acoustic)[i]
  #model  
  diag_model <- lm(final_acoustic[,i] ~ ., data = final_env)
  
  plot(diag_model, which = 1, main = paste("Resid vs Fit:", var_name))
  plot(diag_model, which = 2, main = paste("Q-Q Plot:", var_name))
  plot(diag_model, which = 3, main = paste("Scale-Location:", var_name))
  
  cooksd <- cooks.distance(diag_model)
  plot(cooksd, type = "h", main = paste("Cook's D:", var_name))
  abline(h = 4/length(cooksd), col = "red", lty = 2)
  
  shapiro_p <- shapiro.test(residuals(diag_model))$p.value
  bp_p <- lmtest::bptest(diag_model)$p.value
  max_cook <- max(cooksd)
  
  mtext(paste("Shapiro p =", round(shapiro_p, 4)), side = 3, line = 0, cex = 0.7)
  mtext(paste("BP test p =", round(bp_p, 4)), side = 3, line = -1, cex = 0.7)
  
  diag_results[[var_name]] <- data.frame(
    Acoustic_Variable = var_name,    
    Shapiro_Wilk_p = shapiro_p,
    BP_Test_p = bp_p,   
    Max_CooksD = max_cook
  )
}

Show Code

# Combine all stored results into one final data frame
diagnostic_summary <- do.call(rbind, diag_results)
print(diagnostic_summary)

            Acoustic_Variable Shapiro_Wilk_p BP_Test_p Max_CooksD
NDSI.                   NDSI.   0.0009169035 0.9713572  0.2198775
ADI.                     ADI.   0.3211183476 0.5007853  1.1465698
ACI.                     ACI.   0.0546668827 0.8026158  0.9269415
SH.                       SH.   0.9263333695 0.4901983  4.5860448
BI.                       BI.   0.2340409723 0.6318211  2.4507828
q0.birdnett       q0.birdnett   0.9301599703 0.6825122  0.5737676
q1.birdnett       q1.birdnett   0.0943523835 0.6547287  0.3480016
q2.birdnett       q2.birdnett   0.1790918429 0.5880221  0.3060009
D..birdnett       D..birdnett   0.1191691981 0.5282265  0.6162897

RDA

To test the overarching hypothesis that acoustic composition varies along land degradation gradients, I performed Redundancy Analysis (RDA) using the vegan package (Oksanen et al., 2025). The RDA model constrained the acoustic response matrix by the four retained environmental predictors. I assessed the global significance of the RDA model using permutation tests (999 permutations) implemented with the permute package (Simpson et al., 2026) to assess: Global model significance, Significance of constrained axes, Significance of individual environmental predictors. Adjusted R² was calculated to estimate the true explanatory power while accounting for model complexity. Effect sizes were quantified as the proportion of constrained variance explained by each predictor and by each RDA axis.

Show Code

# REDUNDANCY ANALYSIS (RDA)
# ================================================================
# This tests if the landscape variation explains the acoustic variation
rda_model <- rda(final_acoustic ~ ., data = as.data.frame(final_env))
print(summary(rda_model))


Call:
rda(formula = final_acoustic ~ q0 + TR.V + soil.pc2 + remote_pc1,      data = as.data.frame(final_env)) 

Partitioning of variance:
              Inertia Proportion
Total           9.000     1.0000
Constrained     4.379     0.4865
Unconstrained   4.621     0.5135

Eigenvalues, and their contribution to the variance 

Importance of components:
                        RDA1   RDA2    RDA3    RDA4    PC1     PC2     PC3
Eigenvalue            2.3540 1.3294 0.51308 0.18212 2.5073 0.78874 0.56840
Proportion Explained  0.2616 0.1477 0.05701 0.02024 0.2786 0.08764 0.06316
Cumulative Proportion 0.2616 0.4093 0.46627 0.48651 0.7651 0.85273 0.91589
                          PC4     PC5    PC6      PC7       PC8       PC9
Eigenvalue            0.50895 0.15361 0.0720 0.013235 0.0074381 0.0017538
Proportion Explained  0.05655 0.01707 0.0080 0.001471 0.0008265 0.0001949
Cumulative Proportion 0.97244 0.98951 0.9975 0.998979 0.9998051 1.0000000

Accumulated constrained eigenvalues
Importance of components:
                        RDA1   RDA2   RDA3    RDA4
Eigenvalue            2.3540 1.3294 0.5131 0.18212
Proportion Explained  0.5376 0.3036 0.1172 0.04159
Cumulative Proportion 0.5376 0.8412 0.9584 1.00000

Show Code

# Global Significance Test
anova_global <- anova(rda_model, permutations = 999)
print(anova_global)

Permutation test for rda under reduced model
Permutation: free
Number of permutations: 999

Model: rda(formula = final_acoustic ~ q0 + TR.V + soil.pc2 + remote_pc1, data = as.data.frame(final_env))
         Df Variance      F Pr(>F)   
Model     4   4.3786 2.6055  0.007 **
Residual 11   4.6214                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Significance by Axis
anova_axis <- anova(rda_model, by = "axis", permutations = 999)
print(anova_axis)

Permutation test for rda under reduced model
Forward tests for axes
Permutation: free
Number of permutations: 999

Model: rda(formula = final_acoustic ~ q0 + TR.V + soil.pc2 + remote_pc1, data = as.data.frame(final_env))
         Df Variance      F Pr(>F)  
RDA1      1   2.3540 5.6031  0.015 *
RDA2      1   1.3294 3.1642  0.175  
RDA3      1   0.5131 1.2212  0.577  
RDA4      1   0.1821 0.4335  0.790  
Residual 11   4.6214                
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Show Code

# Adjusted R-squared (The true explanatory power)
adj_r2_rda <- RsquareAdj(rda_model)
print(adj_r2_rda)

$r.squared
[1] 0.4865095

$adj.r.squared
[1] 0.2997857

Show Code

# Axis variance
axis_importance <- summary(rda_model)$concont$importance

# Predictor significance
anova_terms <- anova(rda_model, by = "terms", permutations = 999)




# ================================================================
# ================================================================
# RDA BIPLOT 
# ================================================================

# Extract RDA scores
site_scores <- as.data.frame(vegan::scores(rda_model, display = "sites", scaling = 2))
env_scores  <- as.data.frame(vegan::scores(rda_model, display = "bp", scaling = 2))
env_scores$Variable <- rownames(env_scores)

# Extract variance explained by axes
var_exp <- summary(rda_model)$cont$importance[2, 1:2] * 100

# Create plot
rda_plot <- ggplot2::ggplot() +
  
  # Plot sites
  ggplot2::geom_point(  data = site_scores,  ggplot2::aes(x = RDA1, y = RDA2),
    colour = "grey40",alpha = 0.7,  size = 3) +
  
  # Environmental arrows
  ggplot2::geom_segment(data = env_scores,  ggplot2::aes(x = 0, y = 0, xend = RDA1, yend = RDA2),   arrow = ggplot2::arrow(length = grid::unit(0.25, "cm")),
    colour = "darkblue",  linewidth = 1 ) +
  
  # Variable labels
  ggplot2::geom_text(  data = env_scores,  ggplot2::aes(x = RDA1, y = RDA2, label = Variable),colour = "darkblue",fontface = "bold",vjust = -0.6,  size = 4  ) +
  
  # Axis scaling
  ggplot2::coord_fixed() +
  
  # Labels with explained variance
  ggplot2::labs(
    title = "Redundancy Analysis of Acoustic Diversity and Environmental Drivers",
    x = paste0("RDA1 (", round(var_exp[1], 1), "%)"),
    y = paste0("RDA2 (", round(var_exp[2], 1), "%)")
  ) +
  
  ggplot2::theme_bw(base_size = 14) +
  ggplot2::theme(
    panel.grid = ggplot2::element_blank(),
    # Changed size to 12 here
    plot.title = ggplot2::element_text(hjust = 0.5, face = "bold", size = 12) 
  )

# Print plot
print(rda_plot)

Final Correlation visualisation

Show Code

#("Model: Y ~ q0 + TR.V + soil.pc2 + remote_pc1")

#model_data <- cbind(final_env, final_acoustic)

cor_data <- model_data %>%
  ungroup() %>%
  dplyr::select(BI.,  NDSI., ADI., ACI.,  q1.birdnett, q0.birdnett, q2.birdnett,D..birdnett, TR.V, soil.pc2, q0, remote_pc1) %>%
  drop_na()

manual_order <- c("BI.",  "NDSI.", "ADI.", "ACI.", "q0.birdnett", "q1.birdnett",
 "q2.birdnett", "D..birdnett",  "TR.V",  "soil.pc2", "soil.pc2", "q0", "remote_pc1")


# 1. Prepare data in your exact manual order
cor_data <- big.dataset %>%
  ungroup() %>%
  dplyr::select(all_of(manual_order)) %>%
  drop_na()

# 2. Define custom function for circular significant correlations
my_sig_circles <- function(data, mapping, ...) {
  x <- eval_data_col(data, mapping$x)
  y <- eval_data_col(data, mapping$y)
  
  # Calculate correlation and p-value
  ct <- cor.test(x, y, use = "complete.obs")
  corr_val <- ct$estimate
  p_val <- ct$p.value
  
  # Logic: Only plot if p < 0.05
  if (p_val < 0.10) {
    # Assign labels based on p-value thresholds
    stars <- ifelse(p_val < 0.001, "***", 
             ifelse(p_val < 0.01, "**", 
             ifelse(p_val < 0.05, "*", "."))) # Dot for p < 0.10
    
    ggplot(data = data.frame(x = 1, y = 1), aes(x = x, y = y)) +
      # Change from tile to circle (point)
      # Size is absolute value of correlation to show strength
      geom_point(aes(color = corr_val), size = 9) + 
      scale_color_gradient2(low = "#B2182B", mid = "white", high = "#2166AC", 
                            midpoint = 0, limits = c(-1, 1)) +
      geom_text(label = paste0(round(corr_val, 2), "\n", stars),
                size = 2.5, fontface = "bold", color = "black") +
      theme_void() +
      theme(legend.position = "none")
  } else {
    ggplot() + theme_void()
  }
}

# 3. Generate the plot with 45-degree axis rotation
ggpairs(cor_data,
        upper = list(continuous = my_sig_circles),
        diag = "blank", 
        lower = "blank") +
 ggplot2:: theme_minimal() +
  theme(
    # Rotates the top axis labels (variable names) to 45 degrees
    strip.text.x = element_text(size = 7, face = "bold", angle = 45, hjust = 0),
    strip.text.y = element_text(size = 7, face = "bold",angle = 0),
    panel.border = element_rect(colour = "grey95", fill = NA),
    panel.grid.major = element_blank()
  )