Final Biodiversity Project

R Spatial Final Lab: Biodiversity in Allacher and Davies Sites

Step 1:Set up project file, load in packages and import data

wd <- getwd()
data_dir <- file.path(wd,"Data")
list.files("Data")

## [1] "ALLACHER.xls" "DAVIES.xls"

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(stringr)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.6
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(vegan)

## Warning: package 'vegan' was built under R version 4.5.3

## Loading required package: permute

## Warning: package 'permute' was built under R version 4.5.3

library(knitr)
library(kableExtra)

## Warning: package 'kableExtra' was built under R version 4.5.3

## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows

site1_allacher_raw <- read_excel(file.path(data_dir, "ALLACHER.xls"))

## New names:
## • `Stem dbh` -> `Stem dbh...6`
## • `Stem dbh` -> `Stem dbh...7`
## • `Stem dbh` -> `Stem dbh...8`
## • `Stem dbh` -> `Stem dbh...9`
## • `Stem dbh` -> `Stem dbh...10`
## • `Stem dbh` -> `Stem dbh...11`
## • `Stem dbh` -> `Stem dbh...12`
## • `Stem dbh` -> `Stem dbh...13`
## • `Stem dbh` -> `Stem dbh...14`
## • `Stem dbh` -> `Stem dbh...15`
## • `Stem dbh` -> `Stem dbh...16`
## • `Stem dbh` -> `Stem dbh...17`
## • `Stem dbh` -> `Stem dbh...18`
## • `Stem dbh` -> `Stem dbh...19`
## • `Stem dbh` -> `Stem dbh...20`
## • `Stem dbh` -> `Stem dbh...21`
## • `Stem dbh` -> `Stem dbh...22`
## • `Stem dbh` -> `Stem dbh...23`
## • `Stem dbh` -> `Stem dbh...24`
## • `Stem dbh` -> `Stem dbh...25`

site2_davies_raw <- read_excel(file.path(data_dir, "DAVIES.xls"))

## New names:
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`

Step 2:Clean data from null columns and rows then combine data sets

colnames(site1_allacher_raw) #Check column names

##  [1] "Line"          "Family"        "Genus"         "Species"      
##  [5] "N(Ind)"        "Stem dbh...6"  "Stem dbh...7"  "Stem dbh...8" 
##  [9] "Stem dbh...9"  "Stem dbh...10" "Stem dbh...11" "Stem dbh...12"
## [13] "Stem dbh...13" "Stem dbh...14" "Stem dbh...15" "Stem dbh...16"
## [17] "Stem dbh...17" "Stem dbh...18" "Stem dbh...19" "Stem dbh...20"
## [21] "Stem dbh...21" "Stem dbh...22" "Stem dbh...23" "Stem dbh...24"
## [25] "Stem dbh...25"

colnames(site2_davies_raw)

##  [1] "Line"         "Family"       "Genus"        "species"      "voucher1"    
##  [6] "voucher2"     "voucher3"     "voucher4"     "voucher5"     "voucher...10"
## [11] "voucher...11" "voucher...12" "LIANA"        "N(IND)"       "STEMDBH...15"
## [16] "STEMDBH...16" "STEMDBH...17" "STEMDBH...18" "STEMDBH...19" "STEMDBH...20"
## [21] "STEMDBH...21" "STEMDBH...22"

site1_allacher_clean <- site1_allacher_raw %>%
  select(Line, Family, Genus, Species, `N(Ind)`, starts_with("Stem dbh")) %>%
  mutate(
    across(starts_with("Stem dbh"), as.numeric)
  ) %>%
  pivot_longer(
    cols = starts_with("Stem dbh"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Allacher",
    transect = as.numeric(Line),
    species = str_squish(Species)
  ) %>%
  select(site, transect, species, dbh)

site2_davies_clean <- site2_davies_raw %>%
  select(Line, Family, Genus, species, `N(IND)`, starts_with("STEMDBH")) %>%
  mutate(
    across(starts_with("STEMDBH"), as.numeric)) %>%
  pivot_longer(
    cols = starts_with("STEMDBH"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Davies",
    transect = as.numeric(Line),
    species = str_squish(species)
  ) %>%
  select(site, transect, species, dbh)

all_data <- bind_rows(site1_allacher_clean, site2_davies_clean) %>%  
  mutate(
    transect = as.numeric(transect)
  ) %>%
  arrange(site, transect)

Step 3:Count Individuals by Species and Transect and Calculate Biodiversity Indices by Transect

species_count <- all_data %>%
  group_by(site, transect, species) %>%
  summarise(n = n(), groups = "drop") %>%
  arrange(site, transect, species)

## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

biodiversity_transect <- species_count %>%
  group_by(site, transect) %>%
  mutate(
    N = sum(n),
    S = n_distinct(species),
    pi = n / N
  ) %>%
  summarise(
    N = first(N),
    S = first(S),
    D = sum(pi^2),
    simpson_1_minus_D = 1 - D,
    simpson_reciprocal = 1 / D,
    simpson_evenness = simpson_reciprocal / S,
    shannon_H = -sum(pi * log(pi)),
    shannon_evenness = shannon_H / log(S),
    .groups = "drop"
  ) %>%
  mutate(
    transect = as.numeric(transect)
  ) %>%
  arrange(site, transect)

biodiversity_transect <- biodiversity_transect %>%
  arrange(site, as.numeric(transect))

Step 4:Make tables for each Indices

## Simpson Table
simpson_table <- biodiversity_transect %>%
  select(
    Site = site, 
    Transect = transect, 
    N,
    S,
    D, 
    'Simpson 1-D' = simpson_1_minus_D, 
    'Simpson 1/D' = simpson_reciprocal, 
    'Simpson Evenness' = simpson_evenness
    )

kable(
  simpson_table, 
  digits = 4,
  caption = "Table 1: Simpson Biodiversity Indices by Transect for Allacher and Davies") %>%
  kable_styling(full_width = FALSE)

Table 1: Simpson Biodiversity Indices by Transect for Allacher and Davies
Site	Transect	N	S	D	Simpson 1-D	Simpson 1/D	Simpson Evenness
Allacher	1	28	10	0.1224	0.8776	8.1667	0.8167
Allacher	2	26	8	0.2130	0.7870	4.6944	0.5868
Allacher	3	13	7	0.1716	0.8284	5.8276	0.8325
Allacher	4	26	9	0.2337	0.7663	4.2785	0.4754
Allacher	5	34	9	0.1972	0.8028	5.0702	0.5634
Allacher	6	33	9	0.2525	0.7475	3.9600	0.4400
Allacher	7	40	8	0.3275	0.6725	3.0534	0.3817
Allacher	8	35	10	0.1984	0.8016	5.0412	0.5041
Allacher	9	25	6	0.2160	0.7840	4.6296	0.7716
Allacher	10	16	8	0.2422	0.7578	4.1290	0.5161
Davies	1	30	20	0.0689	0.9311	14.5161	0.7258
Davies	2	30	21	0.0667	0.9333	15.0000	0.7143
Davies	3	51	31	0.0527	0.9473	18.9854	0.6124
Davies	4	30	24	0.0511	0.9489	19.5652	0.8152
Davies	5	38	23	0.0886	0.9114	11.2813	0.4905
Davies	6	44	30	0.0403	0.9597	24.8205	0.8274
Davies	7	44	28	0.0692	0.9308	14.4478	0.5160
Davies	8	20	15	0.0800	0.9200	12.5000	0.8333
Davies	9	50	24	0.0768	0.9232	13.0208	0.5425
Davies	10	38	27	0.0526	0.9474	19.0000	0.7037

## Shannon Table
shannon_table <- biodiversity_transect %>%
  select(
    Site = site,
    Transect = transect,
    N,
    S,
    'Shannon H' = shannon_H,
    'Shannon Evenness' = shannon_evenness
  )

kable(
  shannon_table,
  digits = 4,
  caption = "Table 2: Shannon Biodiversity Indices by Transect for Allacher and Davies"
) %>%
  kable_styling(full_width = FALSE)

Table 2: Shannon Biodiversity Indices by Transect for Allacher and Davies
Site	Transect	N	S	Shannon H	Shannon Evenness
Allacher	1	28	10	2.1844	0.9487
Allacher	2	26	8	1.7641	0.8484
Allacher	3	13	7	1.8446	0.9479
Allacher	4	26	9	1.7969	0.8178
Allacher	5	34	9	1.8692	0.8507
Allacher	6	33	9	1.6562	0.7538
Allacher	7	40	8	1.4421	0.6935
Allacher	8	35	10	1.9298	0.8381
Allacher	9	25	6	1.6651	0.9293
Allacher	10	16	8	1.7480	0.8406
Davies	1	30	20	2.8557	0.9533
Davies	2	30	21	2.9019	0.9532
Davies	3	51	31	3.2049	0.9333
Davies	4	30	24	3.0891	0.9720
Davies	5	38	23	2.8112	0.8966
Davies	6	44	30	3.3116	0.9737
Davies	7	44	28	3.0708	0.9216
Davies	8	20	15	2.6230	0.9686
Davies	9	50	24	2.8616	0.9004
Davies	10	38	27	3.1429	0.9536

Step 5:Site-Level Averages for Biodiversity

site_summary <- biodiversity_transect %>%
  group_by(site) %>%
  summarise(
    avg_N = mean(N),
    avg_S = mean(S),
    avg_simpson_D = mean(D),
    avg_simpson_1_minus_D = mean(simpson_1_minus_D),
    avg_simpson_reciprocal = mean(simpson_reciprocal),
    avg_simpson_evenness = mean(simpson_evenness),
    avg_shannon_H = mean(shannon_H),
    avg_shannon_evenness = mean(shannon_evenness),
    .groups = "drop"
  )

site_summary_table <- site_summary %>%
  select(
    Site = site,
    'Average N' = avg_N,
    'Average Species Richness' = avg_S,
    'Average Simpson D' = avg_simpson_D,
    'Average Simpson 1-D' = avg_simpson_1_minus_D,
    'Average Simpson 1/D' = avg_simpson_reciprocal,
    'Average Simpson Evenness' = avg_simpson_evenness,
    'Average Shannon H' = avg_shannon_H,
    'Average Shannon Evenness' = avg_shannon_evenness
  )

kable(
  site_summary_table, 
  digits = 4, 
  caption = "Table 3: Average Biodiversity Indices by Site") %>%
  kable_styling(full_width = FALSE)

Table 3: Average Biodiversity Indices by Site
Site	Average N	Average Species Richness	Average Simpson D	Average Simpson 1-D	Average Simpson 1/D	Average Simpson Evenness	Average Shannon H	Average Shannon Evenness
Allacher	27.6	8.4	0.2175	0.7825	4.8851	0.5888	1.7901	0.8469
Davies	37.5	24.3	0.0647	0.9353	16.3137	0.6781	2.9873	0.9426

Step 6:Make Graphs

## DBH Graph by Species and Transect
ggplot(all_data,
       aes(x = dbh, fill = species)) +
  geom_histogram(
    binwidth = 5,
    color = "black",
    linewidth = 0.15
  ) +
  facet_grid(site ~ transect) +
  scale_x_continuous(
    limits = c(0, 80),
    breaks = c(0, 40, 80)
  ) +
  labs(
    title = "Diameter at Breast Height Distribution by Species and Transect",
    x = "DBH (cm)",
    y = "Number of Individuals",
    fill = "Species"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
    axis.title = element_text(face = "bold", size = 12),
    axis.text = element_text(size = 8),
    strip.text = element_text(face = "bold", size = 10),
    legend.position = "none"
  )

## Warning: Removed 654 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## DBH by Site and Transect (Cleaned Version)
ggplot(all_data, aes(x= dbh, fill = site)) +
  geom_histogram(
    binwidth = 5,
    color = "black",
    linewidth = 0.25
  ) +
  facet_grid(site ~ transect) +
  scale_x_continuous(
    breaks = seq(0, 80, by = 40)
  ) +
  coord_cartesian(xlim = c(0, 80)) +
  scale_fill_manual(
    values = c(
      "Allacher" = "#4C78A8",
      "Davies" = "#59A14F"
    )
  ) +
  labs(
    title = "Diameter at Breast Height Distribution by Site and Transect",
    x = "DBH (cm)",
    y = "Number of Individuals",
    fill = "Site"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 8),
    strip.text = element_text(size = 10, face = "bold"),
    legend.position = "bottom"
  )

## Extra Graph: Relationship Between Average DBH and Shannon Diversity
avg_dbh <- all_data %>%
  group_by(site, transect) %>%
  summarise(
    avg_dbh = mean(dbh, na.rm = TRUE),
    .group = "drop"
  ) #Average DHB by transect

## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site and transect.
## ℹ Output is grouped by site.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

biodiversity_size <- biodiversity_transect %>%
  left_join(avg_dbh, by = c("site", "transect"))

ggplot(biodiversity_size,
       aes(x = avg_dbh,
           y = shannon_H,
           color = site,
           label = transect)) +
  geom_point(size = 4) +
  geom_text(vjust = -0.8, size = 3) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(
    values = c(
      "Allacher" = "#F87660",
      "Davies" = "#00BFC4"
    )
  ) +
  labs(
    title = "Relationship Between Average Diameter at Breast Height and Shannon Diversity",
    x = "Average DBH (cm)",
    y = "Shannon Diversity (H)",
    color = "Site"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0, face = "bold", size = 12),
    axis.title = element_text(face = "bold", size = 12),
    legend.title = element_text(face = "bold")
  )

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Step 7:Save Files

write.csv(
  biodiversity_transect,
  "biodiversity_transect_table.csv",
  row.names = FALSE
)

write.csv(
  site_summary_table,
  "site_summary_table.csv",
  row.names = FALSE
)

ggsave(
  filename = "dbh_species_transect_graph.png",
  width = 12,
  height = 6,
  dpi = 300
)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggsave(
  filename = "dbh_distribution_by_site_transect.png",
  width = 12,
  height = 6,
  dpi = 300
)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

ggsave(
  filename = "dbh_shannon_relationship.png",
  width = 9,
  height = 6,
  dpi = 300
)

## `geom_smooth()` using formula = 'y ~ x'

## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Final Biodiversity Project

Angelica Chicas

2026-05-14

R Spatial Final Lab: Biodiversity in Allacher and Davies Sites

Step 1:Set up project file, load in packages and import data

Step 2:Clean data from null columns and rows then combine data sets

Step 3:Count Individuals by Species and Transect and Calculate Biodiversity Indices by Transect

Step 4:Make tables for each Indices

Step 5:Site-Level Averages for Biodiversity

Step 6:Make Graphs

Step 7:Save Files