RStudio Final Project: Biodiversity Progress Report

Step 1: Set up project file, load packages and data from the two sites

wd <- getwd()
data_dir <- file.path(wd,"Data")
list.files("Data")
## [1] "ALLACHER.xls" "DAVIES.xls"
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.6
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.3
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
install.packages("vegan")
## Warning: package 'vegan' is in use and will not be installed
install.packages("kableExtra")
## Warning: package 'kableExtra' is in use and will not be installed
site1_allacher_raw <- read_excel(file.path(data_dir, "ALLACHER.xls"))
## New names:
## • `Stem dbh` -> `Stem dbh...6`
## • `Stem dbh` -> `Stem dbh...7`
## • `Stem dbh` -> `Stem dbh...8`
## • `Stem dbh` -> `Stem dbh...9`
## • `Stem dbh` -> `Stem dbh...10`
## • `Stem dbh` -> `Stem dbh...11`
## • `Stem dbh` -> `Stem dbh...12`
## • `Stem dbh` -> `Stem dbh...13`
## • `Stem dbh` -> `Stem dbh...14`
## • `Stem dbh` -> `Stem dbh...15`
## • `Stem dbh` -> `Stem dbh...16`
## • `Stem dbh` -> `Stem dbh...17`
## • `Stem dbh` -> `Stem dbh...18`
## • `Stem dbh` -> `Stem dbh...19`
## • `Stem dbh` -> `Stem dbh...20`
## • `Stem dbh` -> `Stem dbh...21`
## • `Stem dbh` -> `Stem dbh...22`
## • `Stem dbh` -> `Stem dbh...23`
## • `Stem dbh` -> `Stem dbh...24`
## • `Stem dbh` -> `Stem dbh...25`
site2_davies_raw <- read_excel(file.path(data_dir, "DAVIES.xls"))
## New names:
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`

Step 2: Reformat data and clean rows and columns

colnames(site1_allacher_raw)
##  [1] "Line"          "Family"        "Genus"         "Species"      
##  [5] "N(Ind)"        "Stem dbh...6"  "Stem dbh...7"  "Stem dbh...8" 
##  [9] "Stem dbh...9"  "Stem dbh...10" "Stem dbh...11" "Stem dbh...12"
## [13] "Stem dbh...13" "Stem dbh...14" "Stem dbh...15" "Stem dbh...16"
## [17] "Stem dbh...17" "Stem dbh...18" "Stem dbh...19" "Stem dbh...20"
## [21] "Stem dbh...21" "Stem dbh...22" "Stem dbh...23" "Stem dbh...24"
## [25] "Stem dbh...25"
colnames(site2_davies_raw)
##  [1] "Line"         "Family"       "Genus"        "species"      "voucher1"    
##  [6] "voucher2"     "voucher3"     "voucher4"     "voucher5"     "voucher...10"
## [11] "voucher...11" "voucher...12" "LIANA"        "N(IND)"       "STEMDBH...15"
## [16] "STEMDBH...16" "STEMDBH...17" "STEMDBH...18" "STEMDBH...19" "STEMDBH...20"
## [21] "STEMDBH...21" "STEMDBH...22"
site1_allacher_clean <- site1_allacher_raw %>%
  select(Line, Species, `N(Ind)`, starts_with("Stem dbh")) %>%
  pivot_longer(
    cols = starts_with("Stem dbh"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Allacher",
    transect = as.character(Line),
    species = Species
  ) %>%
  select(site, transect, species, dbh)

site2_davies_clean <- site2_davies_raw %>%
  select(Line, species, `N(IND)`, starts_with("STEMDBH")) %>%
  mutate(across(starts_with("STEMDBH"), as.numeric)) %>%
  pivot_longer(
    cols = starts_with("STEMDBH"),
    names_to = "stem_id",
    values_to = "dbh"
  ) %>%
  filter(!is.na(dbh)) %>%
  mutate(
    site = "Davies",
    transect = as.character(Line),
    species = species
  ) %>%
  select(site, transect, species, dbh)

site1_allacher_clean <- site1_allacher_clean %>%
  mutate(transect = as.character(transect))

site2_davies_clean <- site2_davies_clean %>%
  mutate(transect = as.character(transect))

Step 3: Combine rows and ccount individuals (What biodiversity is based on)

all_data <- bind_rows(site1_allacher_clean, site2_davies_clean)

table(all_data$site)
## 
## Allacher   Davies 
##      276      375
table(all_data$transect)
## 
##  1 10  2  3  4  5  6  7  8  9 
## 58 54 56 64 56 72 77 84 55 75
species_count <- all_data %>%
  group_by(site, transect, species) %>%
  summarise(n = n(), groups = "drop")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.
biodiversity_transect <- species_count %>%
  group_by(site, transect) %>%
  mutate(
    N = sum(n),
    S= n_distinct(species),
    pi = n/N
  ) %>%
  summarise(
    N = first(N),
    S = first(S),
    D = sum(pi^2),
    simpson_1_minus_D = 1 - D,
    simpson_reciprocal = 1 / D,
    ESimpson = simpson_reciprocal / S,
    H = -sum(pi * log(pi)),
    EShannon = H / log(S),
    .groups = "drop"
  )

biodiversity_transect <- biodiversity_transect %>%
  arrange(site, as.numeric(transect))

Step 4: Make tables for indices and site-level averages

# Simpson Table
simpson_table <- biodiversity_transect %>%
  select(site, transect, D, simpson_1_minus_D, simpson_reciprocal, ESimpson)

kable(simpson_table, caption = "Simpson Biodiversity Indices by Transect") %>%
  kable_styling()
Simpson Biodiversity Indices by Transect
site transect D simpson_1_minus_D simpson_reciprocal ESimpson
Allacher 1 0.1224490 0.8775510 8.166667 0.8166667
Allacher 2 0.2130178 0.7869822 4.694444 0.5868056
Allacher 3 0.1715976 0.8284024 5.827586 0.8325123
Allacher 4 0.2337278 0.7662722 4.278481 0.4753868
Allacher 5 0.1972318 0.8027682 5.070175 0.5633528
Allacher 6 0.2525253 0.7474747 3.960000 0.4400000
Allacher 7 0.3275000 0.6725000 3.053435 0.3816794
Allacher 8 0.1983673 0.8016327 5.041152 0.5041152
Allacher 9 0.2160000 0.7840000 4.629630 0.7716049
Allacher 10 0.2421875 0.7578125 4.129032 0.5161290
Davies 1 0.0688889 0.9311111 14.516129 0.7258065
Davies 2 0.0666667 0.9333333 15.000000 0.7142857
Davies 3 0.0526720 0.9473280 18.985401 0.6124323
Davies 4 0.0511111 0.9488889 19.565217 0.8152174
Davies 5 0.0886427 0.9113573 11.281250 0.4904891
Davies 6 0.0402893 0.9597107 24.820513 0.8273504
Davies 7 0.0692149 0.9307851 14.447761 0.5159915
Davies 8 0.0800000 0.9200000 12.500000 0.8333333
Davies 9 0.0768000 0.9232000 13.020833 0.5425347
Davies 10 0.0526316 0.9473684 19.000000 0.7037037
# Shannon Table
shannon_table <- biodiversity_transect %>%
  select(site, transect, H, EShannon)

kable(shannon_table, caption = "Shannon Biodiversity Indices by Transect") %>%
  kable_styling()
Shannon Biodiversity Indices by Transect
site transect H EShannon
Allacher 1 2.184439 0.9486897
Allacher 2 1.764122 0.8483632
Allacher 3 1.844621 0.9479479
Allacher 4 1.796925 0.8178160
Allacher 5 1.869245 0.8507301
Allacher 6 1.656192 0.7537652
Allacher 7 1.442080 0.6934940
Allacher 8 1.929821 0.8381107
Allacher 9 1.665142 0.9293335
Allacher 10 1.747966 0.8405941
Davies 1 2.855699 0.9532557
Davies 2 2.901909 0.9531573
Davies 3 3.204919 0.9332938
Davies 4 3.089055 0.9719959
Davies 5 2.811228 0.8965820
Davies 6 3.311589 0.9736540
Davies 7 3.070812 0.9215556
Davies 8 2.622996 0.9685922
Davies 9 2.861566 0.9004147
Davies 10 3.142909 0.9535996
site_summary <- biodiversity_transect %>%
  group_by(site) %>%
  summarise(
    avg_N = mean(N),
    avg_S = mean(S),
    avg_D = mean(D),
    avg_1_minus_D = mean(simpson_1_minus_D),
    avg_reciprocal = mean(simpson_reciprocal),
    avg_ESimpson = mean(ESimpson),
    avg_H = mean(H),
    avg_EShannon = mean(EShannon),
    .groups = "drop"
  )

kable(site_summary, digits = 4, caption = "Average Biodiversity Indices by Site") %>%
  kable_styling()
Average Biodiversity Indices by Site
site avg_N avg_S avg_D avg_1_minus_D avg_reciprocal avg_ESimpson avg_H avg_EShannon
Allacher 27.6 8.4 0.2175 0.7825 4.8851 0.5888 1.7901 0.8469
Davies 37.5 24.3 0.0647 0.9353 16.3137 0.6781 2.9873 0.9426

Step 5: Make DBH graphs for a least one site

all_data <- all_data %>%
  mutate(transect = as.numeric(transect))

ggplot(filter(all_data, site == "Allacher"), aes(x = dbh)) +
  geom_histogram(binwidth = 5, fill = "steelblue", color = "black") +
  facet_wrap(~ transect, ncol = 5) +
  labs(
    title = "Allacher: DBH Distribution by Transect",
    x = "DBH (cm)",
    y = "Number of individuals"
  ) +
  theme_minimal()