Plot-Level Analysis: Invasive Species Effects

Evaluating Impacts on Native Flora Biodiversity and Composition

Author

Felipe Melo and Nduah Nderi

Published

June 16, 2026

Introduction

This document performs a plot-level analysis examining the effects of invasive species cover on native flora. The analysis aggregates data collected across 12 quadrats per plot by averaging cover values, resulting in a dataset representing 100 distinct plots.


1. Environment Setup & Libraries

We begin by loading all necessary packages required for data manipulation, diversity metrics, regression modeling, and ecological community analysis.

Code
library(dplyr)
library(tidyr)
library(readxl)
library(vegan)
library(hillR)
library(segmented)
library(indicspecies)
library(ggplot2)
library(corrplot)
library(betapart)
library(lme4)
library(car)
library(emmeans)
library(scatterplot3d)
library(cowplot)
library(knitr)

## 2. Data Loading & Preprocessing

### Load Dataset First, we load the raw quadrat-level dataset.

Note

Ensure your invasive_cover.xlsx file is placed directly inside your active R project directory to support reproducible parsing paths.

Code
invasive_cover <- read_excel("invasive_cover.xlsx")

Format Transformation & Mid-Point Conversion

We convert the ordinal cover rankings to numeric mid-point percentages using a lookup vector. Then, we transform the community data into a long format for easier aggregation.

Code
ordinal_to_midpoint <- c(
  "1" = 0.5,    # <1%
  "2" = 2.5,    # 1‑5%
  "3" = 7.5,    # 5‑10%
  "4" = 17.5,   # 10‑25%
  "5" = 32.5,   # 25‑50%
  "6" = 62.5,   # 50‑75%
  "7" = 87.5    # 75‑100%
)

quadrat_cols <- colnames(invasive_cover)[5:ncol(invasive_cover)]

df_long <- invasive_cover %>%
  pivot_longer(cols = all_of(quadrat_cols),
               names_to = "quadrat_id",
               values_to = "cover_ord") %>%
  filter(cover_ord > 0) %>%
  mutate(
    cover_pct = ordinal_to_midpoint[as.character(cover_ord)],
    plot_id = sub("Q.*$", "", quadrat_id)
  )

head(df_long)
# A tibble: 6 × 8
  species_name         type  form  family quadrat_id cover_ord cover_pct plot_id
  <chr>                <chr> <chr> <chr>  <chr>          <dbl>     <dbl> <chr>  
1 Abutilon mauritianum Nati… Shrub Malva… C2Q06              1       0.5 C2     
2 Abutilon mauritianum Nati… Shrub Malva… D1Q12              2       2.5 D1     
3 Achyranthes aspera   Inva… Forb… Amara… A2Q02              2       2.5 A2     
4 Achyranthes aspera   Inva… Forb… Amara… A2Q03              3       7.5 A2     
5 Achyranthes aspera   Inva… Forb… Amara… A2Q04              4      17.5 A2     
6 Achyranthes aspera   Inva… Forb… Amara… A2Q05              3       7.5 A2     

Data Aggregation

We average quadrat cover levels up to the Plot \(\times\) Species level.

Code
plot_species <- df_long %>%
  group_by(plot_id, species_name, type, form, family) %>%
  summarise(cover_pct = mean(cover_pct, na.rm = TRUE), .groups = "drop")

head(plot_species)
# A tibble: 6 × 6
  plot_id species_name        type     form      family      cover_pct
  <chr>   <chr>               <chr>    <chr>     <chr>           <dbl>
1 A1      Ageratum conyzoides Invasive Forb/herb Asteraceae       2.5 
2 A1      Aristida kenyensis  Native   Grass     Poaceae          7.5 
3 A1      Asclepias fruticosa Native   Shrub     Apocynaceae      0.5 
4 A1      Bidens pilosa       Invasive Forb/herb Asteraceae       6.25
5 A1      Chloris pycnothrix  Native   Grass     Poaceae          7.5 
6 A1      Cirsium vulgare     Invasive Forb/herb Asteraceae       2   

## 3. Metric Calculations

### Native and Exotic Diversity (Hill Numbers) We restructure native and exotic species records into parallel wide format community matrices to calculate Hill numbers (\(q = 0, 1, 2\)).

Code
native_wide <- plot_species %>%
  filter(type == "Native") %>%
  dplyr::select(plot_id, species_name, cover_pct) %>%
  pivot_wider(names_from = species_name, values_from = cover_pct, values_fill = 0) %>%
  as.data.frame()
rownames(native_wide) <- native_wide$plot_id
native_wide_matrix <- native_wide %>% dplyr::select(-plot_id)

exotic_wide <- plot_species %>%
  filter(type != "Native") %>%
  dplyr::select(plot_id, species_name, cover_pct) %>%
  pivot_wider(names_from = species_name, values_from = cover_pct, values_fill = 0) %>%
  as.data.frame()
rownames(exotic_wide) <- exotic_wide$plot_id
exotic_wide_matrix <- exotic_wide %>% dplyr::select(-plot_id)

native_div <- data.frame(
  plot_id = rownames(native_wide_matrix),
  q0 = hill_taxa(native_wide_matrix, q = 0),
  q1 = hill_taxa(native_wide_matrix, q = 1),
  q2 = hill_taxa(native_wide_matrix, q = 2)
)

exotic_div <- data.frame(
  plot_id = rownames(exotic_wide_matrix),
  q0 = hill_taxa(exotic_wide_matrix, q = 0),
  q1 = hill_taxa(exotic_wide_matrix, q = 1),
  q2 = hill_taxa(exotic_wide_matrix, q = 2)
)

## 4. Visualizations & Distributions

### Histograms of True Diversity We evaluate the structural frequencies of our metrics across both native and exotic communities.

Code
combined_wide <- bind_rows(
  native_div %>% mutate(origin = "Native"),
  exotic_div %>% mutate(origin = "Exotic")
)

div_long <- combined_wide %>%
  pivot_longer(
    cols = c(q0, q1, q2),
    names_to = "q_level",
    values_to = "diversity_value"
  ) %>%
  mutate(
    origin = factor(origin, levels = c("Native", "Exotic")),
    q_level = case_when(
      q_level == "q0" ~ "q = 0 (Richness)",
      q_level == "q1" ~ "q = 1 (Shannon)",
      q_level == "q2" ~ "q = 2 (Simpson)"
    )
  )

ggplot(div_long, aes(x = diversity_value, fill = origin)) +
  geom_histogram(bins = 10, color = "white", alpha = 0.85) +
  facet_grid(origin ~ q_level, scales = "free") +
  scale_fill_manual(values = c("Native" = "#2e7d32", "Exotic" = "#c62828")) +
  labs(
    title = "True Diversity Value Distributions",
    x = "Diversity Metric Value",
    y = "Number of Plots (Frequency)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold", size = 11),
    strip.background = element_rect(fill = "gray95", color = "gray80"),
    panel.spacing = unit(1.2, "lines"),
    plot.title = element_text(face = "bold", size = 13)
  ) -> hist.all

hist.all

Diversity Profiles

By summarizing mean tendencies alongside 95% Confidence Intervals, we construct standard ecosystem profiling charts.

Code
profile_stats <- div_long %>%
  group_by(origin, q_level) %>%
  summarise(
    n = n(),
    mean_val = mean(diversity_value, na.rm = TRUE),
    sd_val = sd(diversity_value, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    se_val = sd_val / sqrt(n),
    ci_lower = mean_val - (1.96 * se_val),
    ci_upper = mean_val + (1.96 * se_val)
  )

pd <- position_dodge(width = 0.25)
ggplot(profile_stats, aes(x = q_level, y = mean_val, color = origin, group = origin)) +
  geom_line(size = 1.2, position = pd) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.15, size = 0.8, position = pd) +
  geom_point(size = 4, stroke = 1.5, fill = "white", shape = 21, position = pd) +
  scale_color_manual(values = c("Native" = "#2e7d32", "Exotic" = "#c62828")) +
  labs(
    title = "Taxonomic Diversity Scaling Profiles",
    x = "Order of Diversity (Hill Numbers)",
    y = "Effective Number of Species (Mean)",
    color = "Origin of Species"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold", size = 13)
  ) -> profile.all

profile.all

Ecosystem Note: The profiles demonstrate that while rare-native species are sustained well regionally, highly dominant roles within plots shift structurally toward non-native flora elements.


5. Regression Modeling & Intercept Strengths

We map native versus exotic variables horizontally using facet_grid() to observe shifting interaction trends directly.

Code
regression_data <- combined_wide %>%
  pivot_longer(
    cols = c(q0, q1, q2),
    names_to = "q_level",
    values_to = "diversity_value"
  ) %>%
  pivot_wider(
    names_from = origin,
    values_from = diversity_value
  ) %>%
  mutate(
    q_level = factor(q_level, levels = c("q0", "q1", "q2"),
                     labels = c("q0 (Richness)", "q1 (Shannon)", "q2 (Simpson)"))
  )

ggplot(regression_data, aes(x = Exotic, y = Native)) +
  geom_jitter(alpha = 0.6, color = "#2b5c8f", size = 2.5) +
  geom_smooth(method = "lm", formula = y ~ x, color = "#c62828", fill = "gray85", size = 1.2) +
  facet_grid(. ~ q_level, scales = "free") +
  labs(
    title = "Linear Regressions Across Diversity Orders",
    x = "Exotic Diversity",
    y = "Native Diversity"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    strip.text = element_text(face = "bold", size = 11),
    strip.background = element_rect(fill = "gray95", color = "gray80"),
    panel.spacing = unit(1.5, "lines"),
    plot.title = element_text(face = "bold", size = 13)
  )

Mixed-Effects Slope Comparison

To determine if regression slopes differ significantly across Hill values while controlling for repeated measures within plots, we implement a linear mixed-effects framework.

Code
regression_data_model <- combined_wide %>%
  pivot_longer(cols = c(q0, q1, q2), names_to = "q_level", values_to = "diversity_value") %>%
  pivot_wider(names_from = origin, values_from = diversity_value) %>%
  mutate(q_level = factor(q_level, levels = c("q0", "q1", "q2")))

mixed_model <- lmer(Native ~ Exotic * q_level + (1 | plot_id), data = regression_data_model)

cat("--- Type III ANOVA Results ---\n")
--- Type III ANOVA Results ---
Code
Anova(mixed_model, type = "III")
Analysis of Deviance Table (Type III Wald chisquare tests)

Response: Native
                 Chisq Df Pr(>Chisq)    
(Intercept)    63.7393  1  1.420e-15 ***
Exotic         44.8717  1  2.104e-11 ***
q_level        30.8595  2  1.990e-07 ***
Exotic:q_level  6.5332  2    0.03814 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
cat("\n--- Estimated Slopes (Beta Coefficients) per q-level ---\n")

--- Estimated Slopes (Beta Coefficients) per q-level ---
Code
slopes_output <- emtrends(mixed_model, ~ q_level, var = "Exotic")
print(slopes_output)
 q_level Exotic.trend     SE  df lower.CL upper.CL
 q0             0.608 0.0912 281    0.428    0.787
 q1             0.524 0.1080 286    0.311    0.736
 q2             0.373 0.1060 283    0.164    0.582

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 
Code
cat("\n--- Pairwise Comparisons of Slopes (Beta Strength) ---\n")

--- Pairwise Comparisons of Slopes (Beta Strength) ---
Code
pairs(slopes_output)
 contrast estimate     SE  df t.ratio p.value
 q0 - q1    0.0842 0.0899 202   0.936  0.6179
 q0 - q2    0.2350 0.0928 201   2.533  0.0322
 q1 - q2    0.1509 0.0928 196   1.627  0.2368

Degrees-of-freedom method: kenward-roger 
P value adjustment: tukey method for comparing a family of 3 estimates 
Contrast Estimate (\(\beta\) Difference) Standard Error (SE) Degrees of Freedom (df) t-ratio p-value Signif.
q0 - q1 0.0842 0.0899 202 0.936 0.6179 ns
q0 - q2 0.2350 0.0928 201 2.533 0.0322 *
q1 - q2 0.1509 0.0928 196 1.627 0.2368 ns

Conclusion: Across rare and common species metrics (\(q_0\) and \(q_1\)), exotic richness follows native baseline capacities without clear signifiers of exclusion. However, the strength of this relationship changes significantly when comparing species richness (\(q_0\)) against dominant profiles (\(q_2\)), where exotic dominance tracks distinctly higher.


6. Multivariate Cross-Boundary Species Correlations

Community Wide Matrix Preparation

We build a flat wide table tracking species across all unique plots for ecological validation.

Code
matrix_data <- plot_species %>%
  mutate(origin_status = ifelse(type == "Native", "Native", "Exotic")) %>%
  dplyr::select(species_name, origin_status, plot_id, cover_pct)

master_wide_matrix <- matrix_data %>%
  pivot_wider(names_from = plot_id, values_from = cover_pct, values_fill = 0) %>%
  arrange(origin_status, species_name)

print(master_wide_matrix[1:5, 1:5])
# A tibble: 5 × 5
  species_name            origin_status    A1   A10    A2
  <chr>                   <chr>         <dbl> <dbl> <dbl>
1 Acanthospermum hispidum Exotic         0      0    4.17
2 Achyranthes aspera      Exotic         0      0    8.5 
3 Ageratum conyzoides     Exotic         2.5    0    2.5 
4 Bidens pilosa           Exotic         6.25  32.5  5   
5 Cirsium vulgare         Exotic         2     10    5.75

Multiple Cross-Boundary Significance Mapping

We test every individual Native-to-Exotic species coverage intersection using Spearman Rank calculations due to zero-inflated tracking dimensions.

Code
community_wide <- plot_species %>%
  mutate(origin_status = ifelse(type == "Native", "Native", "Exotic")) %>%
  dplyr::select(plot_id, species_name, cover_pct) %>%
  pivot_wider(names_from = species_name, values_from = cover_pct, values_fill = 0) %>%
  as.data.frame()

rownames(community_wide) <- community_wide$plot_id
community_wide$plot_id <- NULL

native_spp <- intersect(unique(plot_species$species_name[plot_species$type == "Native"]), colnames(community_wide))
exotic_spp <- intersect(unique(plot_species$species_name[plot_species$type != "Native"]), colnames(community_wide))

test_res <- cor.mtest(community_wide, method = "spearman", conf.level = 0.95)
full_r <- cor(community_wide, method = "spearman")
full_p <- test_res$p

rownames(full_p) <- colnames(community_wide)
colnames(full_p) <- colnames(community_wide)

sub_r <- full_r[native_spp, exotic_spp, drop = FALSE]
sub_p <- full_p[native_spp, exotic_spp, drop = FALSE]

valid_rows <- apply(sub_r, 1, function(x) !all(is.na(x)))
valid_cols <- apply(sub_r, 2, function(x) !all(is.na(x)))
sub_r <- sub_r[valid_rows, valid_cols, drop = FALSE]
sub_p <- sub_p[valid_rows, valid_cols, drop = FALSE]

corrplot(
  sub_r,
  method = "color",
  p.mat = sub_p,
  sig.level = 0.05,
  insig = "label_sig",
  pch.col = "black",
  pch.cex = 0.8,
  tl.col = "black",
  tl.srt = 45,
  tl.cex = 0.7,
  mar = c(0, 0, 2, 0),
  title = "Significant Cross-Boundary Interactions (Alpha = 0.05)"
)

Quantifying Significant Relationships

We summarize the total volume of positive, negative, and neutral species-to-species interactions.

Code
summary_table <- data.frame(
  r_value = as.vector(sub_r),
  p_value = as.vector(sub_p)
) %>%
  filter(!is.na(r_value) & !is.na(p_value)) %>%
  mutate(
    interaction_type = case_when(
      p_value <= 0.05 & r_value < 0 ~ "Significant Negative (Competitive)",
      p_value <= 0.05 & r_value > 0 ~ "Significant Positive (Facilitative)",
      TRUE ~ "Non-Significant"
    )
  )

interaction_summary <- summary_table %>%
  group_by(interaction_type) %>%
  summarise(Count = n(), .groups = "drop") %>%
  mutate(Percentage = round((Count / sum(Count)) * 100, 2))

kable(interaction_summary, col.names = c("Interaction Category", "Count", "Percentage (%)"), 
      caption = "Summary Metrics of Evaluated Inter-Species Relationships")
Summary Metrics of Evaluated Inter-Species Relationships
Interaction Category Count Percentage (%)
Non-Significant 1112 90.26
Significant Negative (Competitive) 26 2.11
Significant Positive (Facilitative) 94 7.63

7. Individual Exotic Species Interaction Profiles

We evaluate which particular exotic entities hold the widest tracking footprint of significant correlations across native plants.

Code
interaction_df <- as.data.frame(as.table(sub_r)) %>% rename(Native_Species = Var1, Exotic_Species = Var2, r_value = Freq)
p_values_df    <- as.data.frame(as.table(sub_p)) %>% rename(Native_Species = Var1, Exotic_Species = Var2, p_value = Freq)

sig_interactions <- interaction_df %>%
  left_join(p_values_df, by = c("Native_Species", "Exotic_Species")) %>%
  filter(p_value <= 0.05) %>%
  mutate(correlation_direction = ifelse(r_value < 0, "Negative (Competitive)", "Positive (Facilitative)"))

exotic_order <- sig_interactions %>%
  group_by(Exotic_Species) %>%
  summarise(total_sig = n(), .groups = "drop")

sig_interactions <- sig_interactions %>%
  left_join(exotic_order, by = "Exotic_Species") %>%
  mutate(Exotic_Species = reorder(Exotic_Species, total_sig))

ggplot(sig_interactions, aes(x = r_value, y = Exotic_Species, color = correlation_direction)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50", size = 0.8) +
  geom_jitter(width = 0, height = 0.2, size = 3, alpha = 0.75) +
  scale_color_manual(values = c("Negative (Competitive)" = "#d32f2f", "Positive (Facilitative)" = "#1976d2")) +
  xlim(-0.5, 0.5) +
  labs(
    title = "Exotic Species Influence and Jitter Profiles",
    x = "Spearman Correlation Coefficient (r)",
    y = "Exotic Species",
    color = "Correlation Direction"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(face = "italic"),
    plot.title = element_text(face = "bold", size = 13),
    legend.position = "bottom"
  )

Ecological Insight: The positive interaction frequencies distinctly outweigh competitive exclusions. This indicates that during early natural regeneration, shared environmental niches and facilitation play a larger role in shaping community structure than direct competition.


8. Multiplicative Regional Beta Diversity Partitioning

To determine if exotic species are driving community homogenization (reducing variation among plots) or differentiation (increasing variation among plots), we partition diversity metrics into Alpha, Beta, and Gamma components.

Partitioning Matrix Executions

Code
native_matrix <- plot_species %>%
  filter(type == "Native") %>%
  dplyr::select(plot_id, species_name, cover_pct) %>%
  pivot_wider(names_from = species_name, values_from = cover_pct, values_fill = 0) %>%
  as.data.frame()
rownames(native_matrix) <- native_matrix$plot_id
native_matrix$plot_id <- NULL

combined_matrix <- plot_species %>%
  dplyr::select(plot_id, species_name, cover_pct) %>%
  pivot_wider(names_from = species_name, values_from = cover_pct, values_fill = 0) %>%
  as.data.frame()
rownames(combined_matrix) <- combined_matrix$plot_id
combined_matrix$plot_id <- NULL

get_hill_partition <- function(matrix_input) {
  q0_part <- hill_taxa_parti(matrix_input, q = 0)
  q1_part <- hill_taxa_parti(matrix_input, q = 1)
  q2_part <- hill_taxa_parti(matrix_input, q = 2)
  
  data.frame(
    Order  = c("q = 0 (Richness)", "q = 1 (Shannon)", "q = 2 (Simpson)"),
    Alpha  = c(q0_part$TD_alpha, q1_part$TD_alpha, q2_part$TD_alpha),
    Gamma  = c(q0_part$TD_gamma, q1_part$TD_gamma, q2_part$TD_gamma),
    Beta   = c(q0_part$TD_beta,  q1_part$TD_beta,  q2_part$TD_beta)
  )
}

native_partition   <- get_hill_partition(native_matrix) %>% mutate(Community = "Native Only")
combined_partition <- get_hill_partition(combined_matrix) %>% mutate(Community = "Combined (Nat + Exo)")

comparison_table <- bind_rows(native_partition, combined_partition) %>%
  dplyr::select(Community, Order, Alpha, Gamma, Beta) %>%
  arrange(Order, Community)

comparison_table %>%
  rename(
    `Community Profile` = Community,
    `Diversity Order` = Order,
    `Mean Alpha (α)` = Alpha,
    `Regional Gamma (γ)` = Gamma,
    `Beta Diversity (β)` = Beta
  ) %>%
  kable(format = "markdown", digits = 3, caption = "Multiplicative Hill Diversity Partitioning Summary")
Multiplicative Hill Diversity Partitioning Summary
Community Profile Diversity Order Mean Alpha (α) Regional Gamma (γ) Beta Diversity (β)
Combined (Nat + Exo) q = 0 (Richness) 25.620 78.000 3.044
Native Only q = 0 (Richness) 14.780 56.000 3.789
Combined (Nat + Exo) q = 1 (Shannon) 16.114 25.712 1.596
Native Only q = 1 (Shannon) 8.660 13.979 1.614
Combined (Nat + Exo) q = 2 (Simpson) 9.685 12.545 1.295
Native Only q = 2 (Simpson) 5.307 6.545 1.233