R Spatial Final Lab: Biodiversity in Allacher and Davies Sites

Step 1:Set up project file, load in packages and import data

wd <- getwd()
data_dir <- file.path(wd,"Data")
list.files("Data")
## [1] "ALLACHER.xls" "DAVIES.xls"
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.6
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.3
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
site1_allacher_raw <- read_excel(file.path(data_dir, "ALLACHER.xls"))
## New names:
## • `Stem dbh` -> `Stem dbh...6`
## • `Stem dbh` -> `Stem dbh...7`
## • `Stem dbh` -> `Stem dbh...8`
## • `Stem dbh` -> `Stem dbh...9`
## • `Stem dbh` -> `Stem dbh...10`
## • `Stem dbh` -> `Stem dbh...11`
## • `Stem dbh` -> `Stem dbh...12`
## • `Stem dbh` -> `Stem dbh...13`
## • `Stem dbh` -> `Stem dbh...14`
## • `Stem dbh` -> `Stem dbh...15`
## • `Stem dbh` -> `Stem dbh...16`
## • `Stem dbh` -> `Stem dbh...17`
## • `Stem dbh` -> `Stem dbh...18`
## • `Stem dbh` -> `Stem dbh...19`
## • `Stem dbh` -> `Stem dbh...20`
## • `Stem dbh` -> `Stem dbh...21`
## • `Stem dbh` -> `Stem dbh...22`
## • `Stem dbh` -> `Stem dbh...23`
## • `Stem dbh` -> `Stem dbh...24`
## • `Stem dbh` -> `Stem dbh...25`
site2_davies_raw <- read_excel(file.path(data_dir, "DAVIES.xls"))
## New names:
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`

Step 2:Clean data from null columns and rows then combine data sets

colnames(site1_allacher_raw) #Check column names
##  [1] "Line"          "Family"        "Genus"         "Species"      
##  [5] "N(Ind)"        "Stem dbh...6"  "Stem dbh...7"  "Stem dbh...8" 
##  [9] "Stem dbh...9"  "Stem dbh...10" "Stem dbh...11" "Stem dbh...12"
## [13] "Stem dbh...13" "Stem dbh...14" "Stem dbh...15" "Stem dbh...16"
## [17] "Stem dbh...17" "Stem dbh...18" "Stem dbh...19" "Stem dbh...20"
## [21] "Stem dbh...21" "Stem dbh...22" "Stem dbh...23" "Stem dbh...24"
## [25] "Stem dbh...25"
colnames(site2_davies_raw)
##  [1] "Line"         "Family"       "Genus"        "species"      "voucher1"    
##  [6] "voucher2"     "voucher3"     "voucher4"     "voucher5"     "voucher...10"
## [11] "voucher...11" "voucher...12" "LIANA"        "N(IND)"       "STEMDBH...15"
## [16] "STEMDBH...16" "STEMDBH...17" "STEMDBH...18" "STEMDBH...19" "STEMDBH...20"
## [21] "STEMDBH...21" "STEMDBH...22"
site1_allacher_clean <- site1_allacher_raw %>%
  select(Line, Family, Genus, Species, `N(Ind)`, starts_with("Stem dbh")) %>%
  mutate(
    across(starts_with("Stem dbh"), as.numeric)
  ) %>%
  pivot_longer(
    cols = starts_with("Stem dbh"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Allacher",
    transect = as.numeric(Line),
    species = str_squish(Species)
  ) %>%
  select(site, transect, species, dbh)

site2_davies_clean <- site2_davies_raw %>%
  select(Line, Family, Genus, species, `N(IND)`, starts_with("STEMDBH")) %>%
  mutate(
    across(starts_with("STEMDBH"), as.numeric)) %>%
  pivot_longer(
    cols = starts_with("STEMDBH"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Davies",
    transect = as.numeric(Line),
    species = str_squish(species)
  ) %>%
  select(site, transect, species, dbh)

all_data <- bind_rows(site1_allacher_clean, site2_davies_clean) %>%  
  mutate(
    transect = as.numeric(transect)
  ) %>%
  arrange(site, transect)

Step 3:Count Individuals by Species and Transect and Calculate Biodiversity Indices by Transect

species_count <- all_data %>%
  group_by(site, transect, species) %>%
  summarise(n = n(), groups = "drop") %>%
  arrange(site, transect, species)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
biodiversity_transect <- species_count %>%
  group_by(site, transect) %>%
  mutate(
    N = sum(n),
    S = n_distinct(species),
    pi = n / N
  ) %>%
  summarise(
    N = first(N),
    S = first(S),
    D = sum(pi^2),
    simpson_1_minus_D = 1 - D,
    simpson_reciprocal = 1 / D,
    simpson_evenness = simpson_reciprocal / S,
    shannon_H = -sum(pi * log(pi)),
    shannon_evenness = shannon_H / log(S),
    .groups = "drop"
  ) %>%
  mutate(
    transect = as.numeric(transect)
  ) %>%
  arrange(site, transect)

biodiversity_transect <- biodiversity_transect %>%
  arrange(site, as.numeric(transect))

Step 4:Make tables for each Indices

## Simpson Table
simpson_table <- biodiversity_transect %>%
  select(
    Site = site, 
    Transect = transect, 
    N,
    S,
    D, 
    'Simpson 1-D' = simpson_1_minus_D, 
    'Simpson 1/D' = simpson_reciprocal, 
    'Simpson Evenness' = simpson_evenness
    )

kable(
  simpson_table, 
  digits = 4,
  caption = "Table 1: Simpson Biodiversity Indices by Transect for Allacher and Davies") %>%
  kable_styling(full_width = FALSE)
Table 1: Simpson Biodiversity Indices by Transect for Allacher and Davies
Site Transect N S D Simpson 1-D Simpson 1/D Simpson Evenness
Allacher 1 28 10 0.1224 0.8776 8.1667 0.8167
Allacher 2 26 8 0.2130 0.7870 4.6944 0.5868
Allacher 3 13 7 0.1716 0.8284 5.8276 0.8325
Allacher 4 26 9 0.2337 0.7663 4.2785 0.4754
Allacher 5 34 9 0.1972 0.8028 5.0702 0.5634
Allacher 6 33 9 0.2525 0.7475 3.9600 0.4400
Allacher 7 40 8 0.3275 0.6725 3.0534 0.3817
Allacher 8 35 10 0.1984 0.8016 5.0412 0.5041
Allacher 9 25 6 0.2160 0.7840 4.6296 0.7716
Allacher 10 16 8 0.2422 0.7578 4.1290 0.5161
Davies 1 30 20 0.0689 0.9311 14.5161 0.7258
Davies 2 30 21 0.0667 0.9333 15.0000 0.7143
Davies 3 51 31 0.0527 0.9473 18.9854 0.6124
Davies 4 30 24 0.0511 0.9489 19.5652 0.8152
Davies 5 38 23 0.0886 0.9114 11.2813 0.4905
Davies 6 44 30 0.0403 0.9597 24.8205 0.8274
Davies 7 44 28 0.0692 0.9308 14.4478 0.5160
Davies 8 20 15 0.0800 0.9200 12.5000 0.8333
Davies 9 50 24 0.0768 0.9232 13.0208 0.5425
Davies 10 38 27 0.0526 0.9474 19.0000 0.7037
## Shannon Table
shannon_table <- biodiversity_transect %>%
  select(
    Site = site,
    Transect = transect,
    N,
    S,
    'Shannon H' = shannon_H,
    'Shannon Evenness' = shannon_evenness
  )

kable(
  shannon_table,
  digits = 4,
  caption = "Table 2: Shannon Biodiversity Indices by Transect for Allacher and Davies"
) %>%
  kable_styling(full_width = FALSE)
Table 2: Shannon Biodiversity Indices by Transect for Allacher and Davies
Site Transect N S Shannon H Shannon Evenness
Allacher 1 28 10 2.1844 0.9487
Allacher 2 26 8 1.7641 0.8484
Allacher 3 13 7 1.8446 0.9479
Allacher 4 26 9 1.7969 0.8178
Allacher 5 34 9 1.8692 0.8507
Allacher 6 33 9 1.6562 0.7538
Allacher 7 40 8 1.4421 0.6935
Allacher 8 35 10 1.9298 0.8381
Allacher 9 25 6 1.6651 0.9293
Allacher 10 16 8 1.7480 0.8406
Davies 1 30 20 2.8557 0.9533
Davies 2 30 21 2.9019 0.9532
Davies 3 51 31 3.2049 0.9333
Davies 4 30 24 3.0891 0.9720
Davies 5 38 23 2.8112 0.8966
Davies 6 44 30 3.3116 0.9737
Davies 7 44 28 3.0708 0.9216
Davies 8 20 15 2.6230 0.9686
Davies 9 50 24 2.8616 0.9004
Davies 10 38 27 3.1429 0.9536

Step 5:Site-Level Averages for Biodiversity

site_summary <- biodiversity_transect %>%
  group_by(site) %>%
  summarise(
    avg_N = mean(N),
    avg_S = mean(S),
    avg_simpson_D = mean(D),
    avg_simpson_1_minus_D = mean(simpson_1_minus_D),
    avg_simpson_reciprocal = mean(simpson_reciprocal),
    avg_simpson_evenness = mean(simpson_evenness),
    avg_shannon_H = mean(shannon_H),
    avg_shannon_evenness = mean(shannon_evenness),
    .groups = "drop"
  )

site_summary_table <- site_summary %>%
  select(
    Site = site,
    'Average N' = avg_N,
    'Average Species Richness' = avg_S,
    'Average Simpson D' = avg_simpson_D,
    'Average Simpson 1-D' = avg_simpson_1_minus_D,
    'Average Simpson 1/D' = avg_simpson_reciprocal,
    'Average Simpson Evenness' = avg_simpson_evenness,
    'Average Shannon H' = avg_shannon_H,
    'Average Shannon Evenness' = avg_shannon_evenness
  )

kable(
  site_summary_table, 
  digits = 4, 
  caption = "Table 3: Average Biodiversity Indices by Site") %>%
  kable_styling(full_width = FALSE)
Table 3: Average Biodiversity Indices by Site
Site Average N Average Species Richness Average Simpson D Average Simpson 1-D Average Simpson 1/D Average Simpson Evenness Average Shannon H Average Shannon Evenness
Allacher 27.6 8.4 0.2175 0.7825 4.8851 0.5888 1.7901 0.8469
Davies 37.5 24.3 0.0647 0.9353 16.3137 0.6781 2.9873 0.9426

Step 6:Make Graphs

## DBH Graph by Species and Transect
ggplot(all_data,
       aes(x = dbh, fill = species)) +
  geom_histogram(
    binwidth = 5,
    color = "black",
    linewidth = 0.15
  ) +
  facet_grid(site ~ transect) +
  scale_x_continuous(
    limits = c(0, 80),
    breaks = c(0, 40, 80)
  ) +
  labs(
    title = "Diameter at Breast Height Distribution by Species and Transect",
    x = "DBH (cm)",
    y = "Number of Individuals",
    fill = "Species"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 8),
    strip.text = element_text(face = "bold", size = 10),
    legend.position = "none"
  )
## Warning: Removed 654 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## DBH by Site and Transect (Cleaned Version)
ggplot(all_data, aes(x= dbh, fill = site)) +
  geom_histogram(
    binwidth = 5,
    color = "black",
    linewidth = 0.25
  ) +
  facet_grid(site ~ transect) +
  scale_x_continuous(
    breaks = seq(0, 80, by = 40)
  ) +
  coord_cartesian(xlim = c(0, 80)) +
  scale_fill_manual(
    values = c(
      "Allacher" = "#4C78A8",
      "Davies" = "#59A14F"
    )
  ) +
  labs(
    title = "Diameter at Breast Height Distribution by Site and Transect",
    x = "DBH (cm)",
    y = "Number of Individuals",
    fill = "Site"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 8),
    strip.text = element_text(size = 10, face = "bold"),
    legend.position = "bottom"
  )

## Extra Graph: Relationship Between Average DBH and Shannon Diversity
avg_dbh <- all_data %>%
  group_by(site, transect) %>%
  summarise(
    avg_dbh = mean(dbh, na.rm = TRUE),
    .group = "drop"
  ) #Average DHB by transect
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site and transect.
## ℹ Output is grouped by site.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
biodiversity_size <- biodiversity_transect %>%
  left_join(avg_dbh, by = c("site", "transect"))

ggplot(biodiversity_size,
       aes(x = avg_dbh,
           y = shannon_H,
           color = site,
           label = transect)) +
  geom_point(size = 4) +
  geom_text(vjust = -0.8, size = 3) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(
    values = c(
      "Allacher" = "#F87660",
      "Davies" = "#00BFC4"
    )
  ) +
  labs(
    title = "Relationship Between Average Diameter at Breast Height and Shannon Diversity",
    x = "Average DBH (cm)",
    y = "Shannon Diversity (H)",
    color = "Site"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0, face = "bold", size = 12),
    axis.title = element_text(face = "bold", size = 12),
    legend.title = element_text(face = "bold")
  )
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Step 7:Save Files

write.csv(
  biodiversity_transect,
  "biodiversity_transect_table.csv",
  row.names = FALSE
)

write.csv(
  site_summary_table,
  "site_summary_table.csv",
  row.names = FALSE
)

ggsave(
  filename = "dbh_species_transect_graph.png",
  width = 12,
  height = 6,
  dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
ggsave(
  filename = "dbh_distribution_by_site_transect.png",
  width = 12,
  height = 6,
  dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
ggsave(
  filename = "dbh_shannon_relationship.png",
  width = 9,
  height = 6,
  dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?