library(dplyr)library(tidyverse)library(FactoMineR)library(factoextra)library(ggplot2)library(purrr) library(tidyr)library(ggtern)library(cluster)library(caret) # For RMSElibrary(gridExtra) # For arranging plots side by sidelibrary(readr)Soil_Data <-read_csv("soil.data.csv")colnames(Soil_Data)
# Select and rename relevant columnssoil_data <- Soil_Data %>% dplyr::select(-c(1:4, 30, 29, 28)) %>%rename(EC =`EC (uS/cm)`, P =`m3.P (mg/kg)`, Al =`m3.Al (mg/kg)`, B =`m3.B (mg/kg)`, Ca =`m3.Ca (mg/kg)`, Fe =`m3.Fe (mg/kg)`, K =`m3.K (mg/kg)`, Mg =`m3.Mg (mg/kg)`, Na =`m3.Na (mg/kg)`, CEC =`CEC (cmolc/kg)`, ExAc =`ExAc (cmolc/kg)`, TN =`TN (%)`, SOC =`SOC (%)`, Clay =`Clay (%)`, Sand =`Sand (%)`, Silt =`Silt (%)` ) # Convert cluster to factorsoil_data$cluster <-as.factor(soil_data$cluster) # Replace only specific NA values with 0soil_data <- soil_data %>%mutate(across(where(is.numeric), ~replace_na(., 0))) # Replace NAs with 0 for numeric columns# Compute the mean for each plot within each clustersoil_data <- soil_data %>%group_by(cluster, plot) %>%summarise(across(where(is.numeric), mean, na.rm =TRUE)) # View the resultcolnames(soil_data)
soil.data <- soil_data[,-c(1:4)]# Scale the datasoil_data_scaled <-scale(soil.data)##This step will return the proportion of variance explained by each principal component and additional information, such as eigenvalues and the percentage of variance each PC explains.# Perform PCApc.results.soil <-PCA(soil_data_scaled, scale.unit =TRUE, graph =FALSE)# Correlation circle - variable contribution to PCsfviz_pca_var(pc.results.soil, col.var ="contrib", gradient.cols =c("#00AFBB", "#E7B800", "#FC4E07"), repel =TRUE)
# Extract eigenvalueseigenvalues <- pc.results.soil$eig[,1] # First column contains eigenvalueshead(eigenvalues)
#✅ Interpetation: ##The eigenvalues represent the amount of variance captured by each principal component (PC).#The first five components have eigenvalues greater than 1, suggesting they retain meaningful information.#Cumulative variance:#PC1 explains 39.23% of the total variance.#PC2 adds 25.07%, bringing the cumulative variance to 64.30%#Variable Contributions to Principal Components (Correlation with PCs):##PC1:These properties are strongly affected by fertilisers especially NPK#Strong positive correlations: CEC (0.97), Ca (0.96), Mg (0.93), B (0.89), EC (0.77), pH (0.76).#Strong negative correlation: Al (-0.49), Fe (-0.54), ExAc (-0.39).#Suggests PC1 represents cation exchange capacity (CEC) and soil fertility.#PC2:Indirect Effects of Fertilisers#Strongest positive correlations: TN (0.88), SOC (0.81), Clay (-0.86), Sand (0.76).#Indicates PC2 captures organic matter and soil texture gradients.#K mean clustering # Determine the optimal number of clusters #Elbow method; Measures the total within-cluster sum of squares (WSS) as the number of clusters increases. It identifies the "elbow point," where adding more clusters no longer significantly reduces WSS.# Determine the optimal number of clusters using the elbow methodfviz_nbclust(pc.results.soil$ind$coord, kmeans, method ="wss") +labs(title ="Optimal Clusters (Elbow Method)")
# Gap statistic for determining clusters# Gap statistic; Compares the total within-cluster variation for different numbers of clusters with the expected variation under a null reference distribution.set.seed(123) # Ensures reproducibilitygap_stat <-clusGap(pc.results.soil$ind$coord, FUN = kmeans, nstart =25, K.max =10, B =50)fviz_gap_stat(gap_stat) +labs(title ="Optimal Clusters (Gap Statistic)")
# Perform K-means clustering on PCA scoresset.seed(123)k.groups <-kmeans(pc.results.soil$ind$coord, centers =4, nstart =25)#nstart = 25. This means that R will try 25 different random starting assignments and then select the best results corresponding to the one with the lowest within cluster variation.# Visualise PCA results with k-means clusters}fviz_pca_ind(pc.results.soil, col.ind =as.factor(k.groups$cluster), label ="var", # Label the variables in the biplotaddEllipses =TRUE, # Add ellipses to group pointsgeom ="text") +# Use text only, no scatter pointsgeom_text(aes(label =as.factor(soil_data$cluster), color =as.factor(k.groups$cluster)), size =3, vjust =-0.5) # Add cluster numbers with matching colors
SNGI
# Extract variance explained by PC1 and PC2variance_explained <- pc.results.soil$eig[,2] /sum(pc.results.soil$eig[,2]) # Ensure it's properly normalised# Compute SNGI using the first two principal componentssoil_data$SNGI <- pc.results.soil$ind$coord[,1] * variance_explained[1] + pc.results.soil$ind$coord[,2] * variance_explained[2]# Classify clusters into four categories based on SNGI quantilessoil_data$Health_Status <-cut(soil_data$SNGI, breaks =quantile(soil_data$SNGI, probs =c(0, 0.25, 0.5, 0.75, 1), na.rm =TRUE), labels =c("Very Low Nutrients", "Low Nutrients", "Moderate Nutrients", "Excessive Nutrients"), include.lowest =TRUE)# PCA Biplot with Health Classificationpca_plot <-fviz_pca_ind(pc.results.soil, col.ind =as.factor(soil_data$Health_Status), addEllipses =TRUE, repel =TRUE) +scale_color_manual(values =c("Very Low Nutrients"="#D73027", "Low Nutrients"="#FC8D59", "Moderate Nutrients"="#91BFDB", "Excessive Nutrients"="#1A9850")) +labs(title ="PCA Biplot with Soil Nutrient Gradient Index Classification", color ="Soil Nutrient Gradient Status")# Convert Cluster to a factor if not alreadysoil_data$Cluster <-as.factor(soil_data$cluster) # Create heatmap of Soil Nutrient Gradient Index (SNGI) Across Clustersheatmap_plot <-ggplot(soil_data, aes(x = Cluster, y = SNGI, fill = Health_Status)) +geom_col() +# Use geom_col() instead of geom_bar() to avoid incorrect stat usagescale_fill_manual(values =c("Very Low Nutrients"="#D73027", "Low Nutrients"="#FC8D59", "Moderate Nutrients"="#91BFDB", "Excessive Nutrients"="#1A9850")) +labs(title ="Soil Nutrient Gradient Index by Cluster", x ="Cluster", y ="Soil Nutrient Gradient Index") +theme_minimal()# Print plotsprint(heatmap_plot)
# Compute Mean SNGI per Cluster and Classify Based on Quartilesmean_sngi_cluster <- soil_data %>%group_by(Cluster) %>%summarise(Mean_SNGI =mean(SNGI, na.rm =TRUE)) %>%# Handle missing valuesmutate(Health_Status =case_when( Mean_SNGI <=quantile(Mean_SNGI, 0.25, na.rm =TRUE) ~"Very Low Nutrients", Mean_SNGI >quantile(Mean_SNGI, 0.25, na.rm =TRUE) & Mean_SNGI <=quantile(Mean_SNGI, 0.50, na.rm =TRUE) ~"Low Nutrients", Mean_SNGI >quantile(Mean_SNGI, 0.50, na.rm =TRUE) & Mean_SNGI <=quantile(Mean_SNGI, 0.75, na.rm =TRUE) ~"Moderate Nutrients", Mean_SNGI >quantile(Mean_SNGI, 0.75, na.rm =TRUE) ~"Excessive Nutrients" )) %>%mutate(Health_Status =factor(Health_Status, levels =c("Very Low Nutrients", "Low Nutrients", "Moderate Nutrients", "Excessive Nutrients"))) %>%arrange(Health_Status, Mean_SNGI) # Arrange by category & within category by SNGI# Arrange Clusters in Ascending Order of Mean SNGI (Most Degraded to Most Healthy)mean_sngi_cluster <- mean_sngi_cluster %>%arrange(Mean_SNGI) # Orders from lowest (most degraded) to highest (healthiest)# Plot Mean SNGI per Cluster (Ordered)ggplot(mean_sngi_cluster, aes(x =reorder(as.factor(Cluster), Mean_SNGI), y = Mean_SNGI, fill = Health_Status)) +geom_col() +scale_fill_manual(values =c("Very Low Nutrients"="#D73027", "Low Nutrients"="#FC8D59", "Moderate Nutrients"="#91BFDB", "Excessive Nutrients"="#1A9850")) +labs(title ="Mean Soil Nutrient Gradient Index by Cluster", x ="Cluster", y ="Mean SNGI") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Rotate x-axis labels for clarity
PCA TEST 2
# Remove first 5 columns#soil.data <- soil_data[,-c(1:4)]soil.data1 <- soil_data[, c("EC", "SOC", "TN", "Sand", "P", "Clay")]# Scale the datasoil_data_scaled1 <-scale(soil.data1)##This step will return the proportion of variance explained by each principal component and additional information, such as eigenvalues and the percentage of variance each PC explains.# Perform PCApc.results.soil1 <-PCA(soil_data_scaled1, scale.unit =TRUE, graph =FALSE)# Correlation circle - variable contribution to PCsfviz_pca_var(pc.results.soil1, col.var ="contrib", gradient.cols =c("#00AFBB", "#E7B800", "#FC4E07"), repel =TRUE)
# Extract eigenvalueseigenvalues1 <- pc.results.soil1$eig # First column contains eigenvaluesprint((eigenvalues1))
#✅ Interpetation: #PC1 (Dim.1) explains 64.91% of total variance, indicating it is the dominant factor influencing soil properties.#PC2 (Dim.2) explains an additional 23.48%, bringing the cumulative variance to 88.39% → #PC1 (Dim.1) #Strong positive correlations: SOC (0.94), TN (0.94), EC (0.82), P (0.76), Sand (0.63)#Strong negative correlation: Clay (-0.69)#Suggests PC1 represents soil fertility, organic matter, and nutrient availability.#PC2 (Dim.2) #Positive correlations: Clay (0.69), EC (0.45), P (0.39)#Negative correlation: Sand (-0.73)#Indicates a contrast between sandy soils and clay-rich soils, linked to organic matter retention.#K mean clustering # Determine the optimal number of clusters #Elbow method; Measures the total within-cluster sum of squares (WSS) as the number of clusters increases. It identifies the "elbow point," where adding more clusters no longer significantly reduces WSS.# Determine the optimal number of clusters using the elbow methodfviz_nbclust(pc.results.soil1$ind$coord, kmeans, method ="wss") +labs(title ="Optimal Clusters (Elbow Method)")
# Gap statistic for determining clusters# Gap statistic; Compares the total within-cluster variation for different numbers of clusters with the expected variation under a null reference distribution.set.seed(123) # Ensures reproducibilitygap_stat <-clusGap(pc.results.soil1$ind$coord, FUN = kmeans, nstart =25, K.max =10, B =50)fviz_gap_stat(gap_stat) +labs(title ="Optimal Clusters (Gap Statistic)")
# Perform K-means clustering on PCA scoresset.seed(123)k.groups1 <-kmeans(pc.results.soil1$ind$coord, centers =3, nstart =25)#nstart = 25. This means that R will try 25 different random starting assignments and then select the best results corresponding to the one with the lowest within cluster variation.# Visualise PCA results with k-means clusters}fviz_pca_ind(pc.results.soil1, col.ind =as.factor(k.groups1$cluster), label ="var", # Label the variables in the biplotaddEllipses =TRUE, # Add ellipses to group pointsgeom ="text") +# Use text only, no scatter pointsgeom_text(aes(label =as.factor(soil_data$cluster), color =as.factor(k.groups1$cluster)), size =3, vjust =-0.5)
SHI
# Compute SHI using PC1 and PC2 coordinatessoil_data$SHI <- (pc.results.soil$ind$coord[, 2] )# Print first few rows of SHIhead(soil_data$SHI)
# Classify clusters into four categories based on SHI quantilessoil_data$Health_Status <-cut(soil_data$SHI, breaks =quantile(soil_data$SHI, probs =c(0, 0.25, 0.5, 0.75, 1), na.rm =TRUE, type =7), labels =c("Degraded", "Near Degraded", "Near Healthy", "Healthy"), include.lowest =TRUE)# PCA Biplot with Health Classification (Using PC2)pca_plot <-fviz_pca_ind(pc.results.soil1, col.ind =as.factor(soil_data$Health_Status), addEllipses =TRUE, repel =TRUE) +scale_color_manual(values =c("Degraded"="#D73027", "Near Degraded"="#FC8D59", "Near Healthy"="#91BFDB", "Healthy"="#1A9850")) +labs(title ="PCA Biplot with Soil Health Classification (PC2)", color ="Soil Health Status")# Convert Cluster to a factorsoil_data$Cluster <-factor(soil_data$Cluster, levels =unique(soil_data$Cluster))# Heatmap of Soil Health Index (SHI) Across Clustersheatmap_plot <-ggplot(soil_data, aes(x =as.factor(Cluster), y = SHI, fill = Health_Status)) +geom_col() +scale_fill_manual(values =c("Degraded"="#D73027", "Near Degraded"="#FC8D59", "Near Healthy"="#91BFDB", "Healthy"="#1A9850")) +labs(title ="Soil Health Index by Cluster (PC2)", x ="Cluster", y ="Soil Health Index") +theme_minimal()# Print plotsprint(heatmap_plot)
# Compute Mean SHI per Cluster and Classify Based on Quartilesmean_shi_cluster <- soil_data %>%group_by(Cluster) %>%summarise(Mean_SHI =mean(SHI, na.rm =TRUE))# Compute quartiles once and use them in case_whenq <-quantile(mean_shi_cluster$Mean_SHI, probs =c(0.25, 0.5, 0.75), na.rm =TRUE)mean_shi_cluster <- mean_shi_cluster %>%mutate(Health_Status =case_when( Mean_SHI <= q[1] ~"Degraded", Mean_SHI > q[1] & Mean_SHI <= q[2] ~"Near Degraded", Mean_SHI > q[2] & Mean_SHI <= q[3] ~"Near Healthy", Mean_SHI > q[3] ~"Healthy" )) %>%mutate(Health_Status =factor(Health_Status, levels =c("Degraded", "Near Degraded", "Near Healthy", "Healthy"))) %>%arrange(Health_Status, Mean_SHI)# Arrange Clusters in Ascending Order of Mean SHImean_shi_cluster <- mean_shi_cluster %>%arrange(Mean_SHI)# Plot Mean SHI per Cluster (Ordered)ggplot(mean_shi_cluster, aes(x =reorder(as.factor(Cluster), Mean_SHI), y = Mean_SHI, fill = Health_Status)) +geom_col() +scale_fill_manual(values =c("Degraded"="#D73027", "Near Degraded"="#FC8D59", "Near Healthy"="#91BFDB", "Healthy"="#1A9850")) +labs(title ="", x ="Cluster", y ="Mean SHI") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
SOC and TN
# Step 1: Compute mean and standard deviation (SD) for SOC and TN per clustersummary_data <- soil_data %>%group_by(cluster) %>%summarise(mean_SOC =mean(SOC, na.rm =TRUE),sd_SOC =sd(SOC, na.rm =TRUE),mean_TN =mean(TN, na.rm =TRUE),sd_TN =sd(TN, na.rm =TRUE) )# Step 2: Create SOC plotplot_SOC <-ggplot(summary_data, aes(x =reorder(cluster, -mean_SOC), y = mean_SOC)) +geom_point(size =3, color ="blue") +# Points for meansgeom_errorbar(aes(ymin = mean_SOC - sd_SOC, ymax = mean_SOC + sd_SOC), width =0.2, color ="blue") +# Error bars (SD)labs(x ="Degradation gradient", y ="Mean SOC ± SD") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Rotate cluster labels# Step 3: Create TN plotplot_TN <-ggplot(summary_data, aes(x =reorder(cluster, -mean_TN), y = mean_TN)) +geom_point(size =3, color ="navy") +# Points for meansgeom_errorbar(aes(ymin = mean_TN - sd_TN, ymax = mean_TN + sd_TN), width =0.2, color ="navy") +# Error bars (SD)labs(x ="Degradation gradient", y ="Mean TN ± SD") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Rotate cluster labels# Step 4: Arrange the plots side by sidegrid_plot <-grid.arrange(plot_SOC, plot_TN, ncol =2)
# Step 5: Display the gridprint(grid_plot)
TableGrob (1 x 2) "arrange": 2 grobs
z cells name grob
1 1 (1-1,1-1) arrange gtable[layout]
2 2 (1-1,2-2) arrange gtable[layout]
Grasses
# Load required librarieslibrary(tidyverse)library(vegan)library(readr)library(dplyr)library(ggplot2)library(ggpubr)library(tidyr)library(rstatix)library(gridExtra)library(dunn.test)library(viridis)library(grid)library(lme4) # For linear mixed modelslibrary(lmerTest) # For p-valueslibrary(broom.mixed) # For extracting tidy summarieslibrary(dplyr)library(Metrics)library(patchwork)library(mgcv)library(broom)library(knitr)library(kableExtra)library(formattable)library(purrr) library(entropart)# Read CSV filegrass.data <-read_csv("Data/query_ldsf_rangeland_Enarau.csv")# Rename columnsgrass.data <- grass.data %>% dplyr::rename(herbs = rl_nearestherb,pg = rl_nearest_grass_perennial_spp,dist.pg = rl_dist_grass_perennial,ag = rl_nearest_grass_annual_spp,dist.ag = rl_dist_grass_annual,wp = rl_nearest_woody_spp,f = rl_nearest_forb_spp,dist.wp = rl_dist_woody_plant,dist.f = rl_dist_forb,bare.ground = rl_bareground,leaf.litter = rl_leaflitter,dung.presence = rl_dung,rock.cover = rl_rock,point.canopy = rl_canopy,grass.tuft = rl_grasstuft,cluster = Cluster,plot = Plot )# Replace NA values in numeric columns with 0grass.data <- grass.data %>%mutate(across(where(is.numeric), ~replace_na(., 0)))# View the updated data framecolnames(grass.data)
To assess plant species diversity across different clusters and plots, we computed Hill numbers ( q₀, q₁, and q₂) using species occurrence data in R Package entropart.
q₀ (Species richness): Counts the number of species present.
q₁ (Shannon diversity): Accounts for both species richness and evenness.
q₂ (Simpson diversity): Gives more weight to dominant species, reducing the influence of rare species.
Species data were obtained from multiple functional groups, including:
Annual grasses, Perennial grasses, Woody plants (native), Forbs (native), Invasive species (forbs and woody plants)
To assess the relationship between plant diversity calculated using Hill numbers (q0, q1, q2 and soil properties, we extracted principal component analysis (PCA) scores for PC1 and PC2 and merged the scores with plant diversity data for different plant functional groups. We applied Generalized Additive Mixed Models (GAMMs) to evaluate the effects of soil properties (PC1 and PC2) on plant diversity metrics. GAMMs were chosen due to their ability to model nonlinear relationships while accounting for random effects. The diversity indices (q0, q1, q2) for different species types were response variables, PC1 and PC2 were independent, and cluster was included as a random effect to account for spatial autocorrelation. The models were estimated using the restricted maximum likelihood (REML) method.
To assess model fit and assumptions, we examined the linearity of relationships between predictor variables and diversity indices using scatter plots with LOESS smoothers.
# Step 1: Prepare Data# Extract PCA scores for PC1 and PC2soil_pca_scores <-as.data.frame(pc.results.soil1$ind$coord)[, 1:2]colnames(soil_pca_scores) <-c("PC1", "PC2")# Convert Cluster and Plot to factorssoil_pca_scores <- soil_pca_scores %>%mutate(cluster =as.factor(soil_data$cluster), plot =as.factor(soil_data$plot))soil_data <- soil_data %>%mutate(cluster =as.factor(cluster), plot =as.factor(plot))# Reshape diversity datadiversity_wide <- diversity_long %>%pivot_wider(names_from =c(Species_Type, Hill_Number),values_from = Diversity,names_glue ="{Hill_Number}_{Species_Type}",values_fill =list(Diversity =NA) ) %>%mutate(cluster =as.factor(cluster), plot =as.factor(plot))# Merge diversity metrics with soil PCA scores and soil datadiversity_pca <- diversity_wide %>%left_join(soil_pca_scores, by =c("cluster", "plot")) %>%left_join(soil_data, by =c("cluster", "plot"))# Rename diversity columnsdiversity_pca <- diversity_pca %>%rename(q0.ag =`q0_Annual Grasses`, q1.ag =`q1_Annual Grasses`, q2.ag =`q2_Annual Grasses`,q0.pg =`q0_Perennial Grasses`, q1.pg =`q1_Perennial Grasses`, q2.pg =`q2_Perennial Grasses`,q0.wp =`q0_Woody Plants`, q1.wp =`q1_Woody Plants`, q2.wp =`q2_Woody Plants`,q0.f =`q0_Forbs`, q1.f =`q1_Forbs`, q2.f =`q2_Forbs`,q0.inv =`q0_Invasives`, q1.inv =`q1_Invasives`, q2.inv =`q2_Invasives` ) %>%mutate(across(where(is.list), ~map_dbl(.x, ~ifelse(length(.x) ==0, NA, as.numeric(.x)))), # Convert lists & handle empty elementsacross(where(is.numeric), ~replace_na(.x, 0)) # Replace NA with 0 only for numeric columns )# Define response variablesvariables <-c("q0.ag", "q1.ag", "q2.ag", "q0.pg", "q1.pg", "q2.pg", "q0.wp", "q1.wp", "q2.wp", "q0.f", "q1.f", "q2.f", "q0.inv", "q1.inv", "q2.inv")# Test whether the data is linearggplot(diversity_pca, aes(x = PC1, y = q2.pg)) +geom_point() +geom_smooth(method ="loess", col ="red") +labs(title ="Checking Linearity")
# Step 1: Run GAMM for each variablemodels_gamm <-lapply(variables, function(var) {gam(as.formula(paste(var, "~ s(PC1) + s(PC2) + s(cluster, bs='re')")), data = diversity_pca, method ="REML")})# Step 2: Summarise GAMM resultssummaries_gamm <-lapply(models_gamm, summary)# Step 3: Extract significant GAMM resultsextract_significant_results <-function(summary_gamm, var_name) {if (!is.null(summary_gamm$s.table)) { summary_gamm$s.table %>%as.data.frame() %>%rename(EDF = edf, F_Value = F, P_Value =`p-value`) %>%filter(P_Value <0.05) %>%mutate(Term =rownames(.), Variable = var_name) } else {data.frame() # Return empty dataframe if no s.table }}significant_results_gamm <-bind_rows(lapply(seq_along(summaries_gamm), function(i) {extract_significant_results(summaries_gamm[[i]], variables[i])}))# Step 4: Display GAMM results in a simple kable tableif (nrow(significant_results_gamm) >0) { gamm_table <-kable(significant_results_gamm, digits =3, caption ="Significant Smooth Terms in GAMMs")print(gamm_table)} else {message("No significant smooth terms found in GAMMs.")}
####✅ Interpetation: #Annual grasses are strongly influenced by PC1 and cluster, suggesting both soil properties and spatial heterogeneity shape their distribution.#Perennial grasses are primarily affected by cluster, with less pronounced soil effects.#Forbs show strong spatial variability across clusters.#Invasive species are significantly influenced by PC2, indicating that soil properties drive their establishment, but they also exhibit spatial variability.#Woody plants are most influenced by PC1, reflecting the role of soil health in determining their presence and diversity.# Step 6: Plot GAMM Resultsplot_list <-lapply(seq_along(variables), function(i) { model <- models_gamm[[i]]# Use median PC2 for PC1 predictions and vice versa median_PC2 <-median(diversity_pca$PC2, na.rm =TRUE) median_PC1 <-median(diversity_pca$PC1, na.rm =TRUE) most_common_cluster <- diversity_pca$cluster[which.max(table(diversity_pca$cluster))]# Generate newdata with fixed cluster df1 <-data.frame(PC1 =seq(min(diversity_pca$PC1, na.rm =TRUE), max(diversity_pca$PC1, na.rm =TRUE), length.out =100),PC2 = median_PC2, cluster = most_common_cluster) df2 <-data.frame(PC2 =seq(min(diversity_pca$PC2, na.rm =TRUE), max(diversity_pca$PC2, na.rm =TRUE), length.out =100),PC1 = median_PC1, cluster = most_common_cluster)# Predict effects and standard errors pred1 <-predict(model, newdata = df1, type ="response", se.fit =TRUE) df1$Effect <- pred1$fit df1$SE <- pred1$se.fit pred2 <-predict(model, newdata = df2, type ="response", se.fit =TRUE) df2$Effect <- pred2$fit df2$SE <- pred2$se.fit# Plot smooth effect of PC1 (Blue) plot1 <-ggplot(df1, aes(x = PC1, y = Effect)) +geom_line(color ="#1f77b4") +# Blue for PC1geom_ribbon(aes(ymin = Effect - SE, ymax = Effect + SE), fill ="#1f77b4", alpha =0.3) +ggtitle(paste("", variables[i], "")) +theme_minimal()# Plot smooth effect of PC2 (Green) plot2 <-ggplot(df2, aes(x = PC2, y = Effect)) +geom_line(color ="#2ca02c") +# Green for PC2geom_ribbon(aes(ymin = Effect - SE, ymax = Effect + SE), fill ="#2ca02c", alpha =0.3) +ggtitle(paste("", variables[i], "")) +theme_minimal()return(plot1 + plot2)})# Display all plotswrap_plots(plot_list)
#######✅ Interpetation: #PC1 (indicative of healthier soils) favours woody plants and perennial grasses while negatively affecting annual grasses, forbs, and invasive species.#PC2 strongly influences invasive species, with lower PC2 values promoting their abundance.#Invasive species and annual grasses thrive in more degraded conditions, while woody plants and perennial grasses benefit from healthier soils.
q vs pc weighted pc
We computed weighted principal component (PC) scores using the explained variance of the first two principal components (PC1 and PC2). These weighted scores provide a composite representation of soil health gradients. Generalized Additive Mixed Models (GAMMs) were applied where (PC) represents a smooth function of soil properties, and (Cluster, bs=‘re’) accounts for random variations among clusters.
## ##✅ Interpetation: #GAMM Results Interpretation (Smooth Terms Table)#The table presents results from a Generalized Additive Mixed Model (GAMM) assessing how soil properties (PC) and spatial clusters influence plant diversity indices (q0, q1, q2) across functional groups.#Annual Grasses (q0.ag, q1.ag, q2.ag)# PC significantly affects q0.ag, q1.ag, and q2.ag (P < 0.01), showing that annual grass diversity declines with increasing soil health (PC values as also seen below).# Spatial clusters significantly influence all annual grass indices, meaning diversity varies across different locations.#Perennial Grasses (q0.pg, q1.pg, q2.pg)# PC has no significant effect on q0.pg, q1.pg, and q2.pg (P > 0.05), indicating that perennial grass diversity does not strongly correlate with soil PC values.#Clusters significantly affect q0.pg (P = 0.002), suggesting location-dependent variation. #Interpretation: Perennial grasses might be more stable across soil conditions and not solely dependent on soil restoration levels.#Woody Plants (q0.wp, q1.wp)# PC significantly affects q0.wp and q1.wp (P < 0.01), meaning woody plant diversity increases with soil health as also seen below.# Clusters significantly affect q0.wp and q1.wp, implying spatial variation.#Interpretation: Woody plants are strong indicators of restored ecosystems and increase as soil conditions improve.#Forbs #(PC)) do not significantly affect forb diversity (all p > 0.86).#Spatial variation (s(cluster)) is significant (p < 0.001), meaning forb diversity is mostly driven by differences between clusters rather than soil health.#Invasive #Soil properties (s(PC)) do not significantly affect invasive species diversity (all p > 0.8).#Spatial variation (s(cluster)) is significant (p < 0.001), suggesting that invasive species are more influenced by landscape heterogeneity than soil conditions.# Generate GAMM effect plotsplot_list <-lapply(seq_along(variables), function(i) { model <- models_gamm[[i]] median_PC <-median(diversity_pca$PC, na.rm =TRUE) most_common_cluster <- diversity_pca$cluster[which.max(table(diversity_pca$cluster))] df <-data.frame(PC =seq(min(diversity_pca$PC, na.rm =TRUE), max(diversity_pca$PC, na.rm =TRUE), length.out =100),cluster = most_common_cluster ) pred <-predict(model, newdata = df, type ="response", se.fit =TRUE) df$Effect <- pred$fit df$SE <- pred$se.fitggplot(df, aes(x = PC, y = Effect)) +geom_line(color ="blue") +geom_ribbon(aes(ymin = Effect - SE, ymax = Effect + SE), alpha =0.2) +ggtitle(paste("", variables[i])) +theme_minimal()})# Display all plots in a gridwrap_plots(plot_list, ncol =3)
#######✅ Interpetation: ##The provided plot displays the effect of soil properties (PC scores) on different plant diversity indices (q0, q1, q2) across plant functional groups using Generalized Additive Mixed Models (GAMM).#The effect of soil properties (PC) on annual grasses is mostly negative, with a decline in diversity indices (q0, q1, q2) as PC increases.#Perennial Grasses (q0.pg, q1.pg, q2.pg); trend is positive, showing that all three diversity indices increase with higher PC values#Woody Plants (q0.wp, q1.wp, q2.wp) increase with increase in PC values#Forbs (q0.f, q1.f, q2.f) showed a negative effect where higher PC values correspond to lower diversity indices.#Invasive Species (q0.inv, q1.inv, q2.inv) showed a decline in diversity indices with increasing soil health (PC values).
a and b computed from q1
We calculated Alpha diversity for each species type within clusters using the shannon diversity index (q1) values. We calculated Gamma diversity as the mean of the Shannon diversity index (q1) across all clusters for each species type (functional group). We computed Beta diversity as the ratio of gamma diversity to alpha diversity for each species type within clusters.
# Calculate Alpha Diversity (Mean and SD within Clusters)alpha_diversity_q1 <- diversity.all %>%group_by(cluster, Species_Type) %>%summarise(Mean_Alpha =mean(q1, na.rm =TRUE),SD_Alpha =ifelse(n() >1, sd(q1, na.rm =TRUE), NA), # Compute SD only if more than one value.groups ="drop" )# Calculate Alpha Diversity (Mean and SD within Clusters)alpha_diversity_q1 <- diversity.all %>%group_by(cluster, Species_Type) %>%summarise(Mean_Alpha =mean(q1, na.rm =TRUE),SD_Alpha =ifelse(n() >1, sd(q1, na.rm =TRUE), NA), # Compute SD only if more than one value.groups ="drop" )# Get unique species typesspecies_types <-unique(alpha_diversity_q1$Species_Type)# Function to plot alpha diversity for a single species typeplot_alpha_diversity <-function(data, species_type) {# Filter data for the species type species_data <- data %>%filter(Species_Type == species_type) %>%mutate(cluster =factor(cluster, levels = cluster[order(-Mean_Alpha)])) # Reorder clusters by Mean_Alpha# Plotggplot(species_data, aes(x = cluster, y = Mean_Alpha)) +geom_point(color ="blue", size =2) +geom_errorbar(aes(ymin = Mean_Alpha - SD_Alpha, ymax = Mean_Alpha + SD_Alpha), width =0.2, color ="red") +labs(title =paste("α", species_type),x ="Cluster", y ="α") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))}# Generate plots for each species typealpha_plots <-map(species_types, ~plot_alpha_diversity(alpha_diversity_q1, .x))# Combine plots into a gridalpha_grid <-wrap_plots(alpha_plots, ncol =2)# Display the gridprint(alpha_grid)
# Calculate Gamma Diversity (Mean and SD for the Entire Study Area)gamma_diversity_q1 <- diversity.all %>%group_by(Species_Type) %>%summarise(Mean_Gamma =mean(q1, na.rm =TRUE), # Mean diversity across all clustersSD_Gamma =ifelse(n() >1, sd(q1, na.rm =TRUE), NA), # Compute SD only if multiple observations.groups ="drop" )# Gamma Diversity Bar Plotgamma_plot <- gamma_diversity_q1 %>%filter(!is.na(Mean_Gamma)) %>%# Ensure no NA valuesggplot(aes(x =reorder(Species_Type, -Mean_Gamma), y = Mean_Gamma, fill = Species_Type)) +geom_bar(stat ="identity") +geom_errorbar(aes(ymin = Mean_Gamma - SD_Gamma, ymax = Mean_Gamma + SD_Gamma), width =0.2, color ="black") +labs(title =" γ Across Species Types",x ="Species Type", y =" (γ)") +theme_minimal() +scale_fill_brewer(palette ="Dark2") +theme(axis.text.x =element_text(angle =45, hjust =1))print(gamma_plot)
# Compute Beta Diversity (Gamma/Alpha for q1) with SDbeta_diversity_q1 <- alpha_diversity_q1 %>%left_join(gamma_diversity_q1, by ="Species_Type") %>%mutate(Beta_q1 = Mean_Gamma / Mean_Alpha, # Turnover among clustersSD_Beta = Beta_q1 *sqrt((SD_Gamma / Mean_Gamma)^2+ (SD_Alpha / Mean_Alpha)^2) # Propagation of SD ) %>%select(Species_Type, cluster, Beta_q1, SD_Beta)# Get unique species typesspecies_types <-unique(beta_diversity_q1$Species_Type)# Function to plot beta diversity for a single species typeplot_beta_diversity <-function(data, species_type) {# Filter data for the species type species_data <- data %>%filter(Species_Type == species_type) %>%mutate(cluster =factor(cluster, levels = cluster[order(-Beta_q1)])) # Reorder clusters by Beta_q1# Plotggplot(species_data, aes(x = cluster, y = Beta_q1)) +geom_point(color ="blue", size =3) +geom_errorbar(aes(ymin = Beta_q1 - SD_Beta, ymax = Beta_q1 + SD_Beta), width =0.2, color ="red") +labs(title =paste(" β ", species_type),x ="Cluster", y ="β") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))}# Generate plots for each species typebeta_plots <-map(species_types, ~plot_beta_diversity(beta_diversity_q1, .x))# Combine plots into a gridbeta_grid <-wrap_plots(beta_plots, ncol =2)# Display the gridprint(beta_grid)
a and b vs pc
We used the Generalized Additive Models (GAM) to analyse the relationship between diversity metrics (α and β diversity) and soil properties (Mean_PC1 and Mean_PC2).
#######✅ Interpetation: #Significant results only: The table presents the results of a Generalized Additive Model (GAM), showing the effects of Mean_PC1 and Mean_PC2 (principal components representing soil properties) on different response variables (α and β diversity of woody plants (wp), invasive species (inv), and forbs (f)).#Soil properties (PC2) strongly impact woody plants and invasive species, alpha and beta diversity. The strongest effects are seen in woody plant diversity (both α and β), suggesting that tree species distribution is closely linked to soil conditions.#PC2 influences the alpha and beta diversities of forbs# Both PC1 and PC2 (soil properties) contribute to shaping invasive species diversity, with stronger effects on β diversity, indicating that soil properties drive spatial variation in invasive species.# Function to create scatterplots with GAM fitsplot_gam_effects <-function(response_var, data) {ggplot(data, aes_string(x ="Mean_PC1", y = response_var)) +geom_point(color ="blue", alpha =0.6) +geom_smooth(method ="gam", formula = y ~s(x), color ="red", se =TRUE) +labs(title =paste(response_var, ""), x ="Mean_PC1", y = response_var) +theme_minimal()}# Generate plots for all response variablesgam_plots <-lapply(response_vars, function(var) plot_gam_effects(var, diversity_pc_data))# Combine plots in a gridwrap_plots(gam_plots, ncol =5)
# Define response variablesresponse_vars <-c("\u03b1.ag", "\u03b1.f", "\u03b1.inv", "\u03b1.pg", "\u03b1.wp", "\u03b2.ag", "\u03b2.f", "\u03b2.inv", "\u03b2.pg", "\u03b2.wp")# Function to create scatterplots with GAM fits using PC2plot_gam_effects_PC2 <-function(response_var, data) {ggplot(data, aes_string(x ="Mean_PC2", y = response_var)) +geom_point(color ="blue", alpha =0.6) +geom_smooth(method ="gam", formula = y ~s(x), color ="red", se =TRUE) +labs(title =paste(response_var, ""), x ="Mean_PC2", y = response_var) +theme_minimal()}# Generate plots for all response variablesgam_plots_PC2 <-lapply(response_vars, function(var) plot_gam_effects_PC2(var, diversity_pc_data))# Combine plots in a gridwrap_plots(gam_plots_PC2, ncol =5)
#######✅ Interpetation: #α.wp (Woody Plants): Strong negative correlation, meaning woody plant diversity decreases as soil properties improve. β.wp (Woody Plants): Decreasing trend, indicating that sites become more similar in woody plant composition as PC2 increases.#α.f (Forbs): A U-shaped curve, indicating that forb diversity is lowest at intermediate PC2 values and highest at both low and high PC2 values.β.f (Forbs): A hump-shaped pattern, suggesting higher beta diversity at intermediate PC2 values.#α.ag vs PC1: Weak negative relationship (α diversity of annual grasses decreases slightly as PC1 increases) β.ag vs PC1: Slight positive trend, suggesting more variation in annual grass communities as PC1 increases.#α.ag (Annual Grasses): Slight increasing trend, suggesting that as soil properties improve (higher PC2), annual grass diversity may increase.β.ag (Annual Grasses): Slight decreasing trend, indicating reduced dissimilarity among sites as PC2 increases.#α.f vs PC1: Negative trend, suggesting forbs decrease with increasing PC1. β.f vs PC1: Weak positive trend, indicating that forb community composition shifts slightly as PC1 increases.#α.inv vs PC1: Non-linear trend, showing a slight dip at negative PC1 and stabilising around 0. β.inv vs PC1: Very slight increasing trend, meaning invasive species turnover might increase with PC1.#α.inv (Invasive Species): Slightly decreasing trend, suggesting invasive species diversity decreases as PC2 increases.β.inv (Invasive Species): Increasing trend, meaning that differences in invasive species composition increase as PC2 rises.#α.pg vs PC1: Weak U-shaped trend, meaning perennial grass diversity might be lowest at intermediate PC1. β.pg vs PC1: Non-linear U-shaped pattern, suggesting turnover in perennial grasses is highest at extreme PC1 values.#α.pg (Perennial Grasses): No strong trend, suggesting little relationship with PC2. β.pg (Perennial Grasses): No strong pattern.#α.wp vs PC1: Slight increasing trend, indicating that woody plant richness may increase with higher PC1. β.wp vs PC1: Strong U-shaped trend, showing woody plant communities are more variable at extreme PC1 values.