RStudio Final Project: Biodiversity Progress Report
Step 1: Set up project file, load packages and data from the two
sites
wd <- getwd()
data_dir <- file.path(wd,"Data")
list.files("Data")
## [1] "ALLACHER.xls" "DAVIES.xls"
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.6
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.3
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
install.packages("vegan")
## Warning: package 'vegan' is in use and will not be installed
install.packages("kableExtra")
## Warning: package 'kableExtra' is in use and will not be installed
site1_allacher_raw <- read_excel(file.path(data_dir, "ALLACHER.xls"))
## New names:
## • `Stem dbh` -> `Stem dbh...6`
## • `Stem dbh` -> `Stem dbh...7`
## • `Stem dbh` -> `Stem dbh...8`
## • `Stem dbh` -> `Stem dbh...9`
## • `Stem dbh` -> `Stem dbh...10`
## • `Stem dbh` -> `Stem dbh...11`
## • `Stem dbh` -> `Stem dbh...12`
## • `Stem dbh` -> `Stem dbh...13`
## • `Stem dbh` -> `Stem dbh...14`
## • `Stem dbh` -> `Stem dbh...15`
## • `Stem dbh` -> `Stem dbh...16`
## • `Stem dbh` -> `Stem dbh...17`
## • `Stem dbh` -> `Stem dbh...18`
## • `Stem dbh` -> `Stem dbh...19`
## • `Stem dbh` -> `Stem dbh...20`
## • `Stem dbh` -> `Stem dbh...21`
## • `Stem dbh` -> `Stem dbh...22`
## • `Stem dbh` -> `Stem dbh...23`
## • `Stem dbh` -> `Stem dbh...24`
## • `Stem dbh` -> `Stem dbh...25`
site2_davies_raw <- read_excel(file.path(data_dir, "DAVIES.xls"))
## New names:
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
Step 3: Combine rows and ccount individuals (What biodiversity is
based on)
all_data <- bind_rows(site1_allacher_clean, site2_davies_clean)
table(all_data$site)
##
## Allacher Davies
## 276 375
table(all_data$transect)
##
## 1 10 2 3 4 5 6 7 8 9
## 58 54 56 64 56 72 77 84 55 75
species_count <- all_data %>%
group_by(site, transect, species) %>%
summarise(n = n(), groups = "drop")
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
biodiversity_transect <- species_count %>%
group_by(site, transect) %>%
mutate(
N = sum(n),
S= n_distinct(species),
pi = n/N
) %>%
summarise(
N = first(N),
S = first(S),
D = sum(pi^2),
simpson_1_minus_D = 1 - D,
simpson_reciprocal = 1 / D,
ESimpson = simpson_reciprocal / S,
H = -sum(pi * log(pi)),
EShannon = H / log(S),
.groups = "drop"
)
biodiversity_transect <- biodiversity_transect %>%
arrange(site, as.numeric(transect))
Step 4: Make tables for indices and site-level averages
# Simpson Table
simpson_table <- biodiversity_transect %>%
select(site, transect, D, simpson_1_minus_D, simpson_reciprocal, ESimpson)
kable(simpson_table, caption = "Simpson Biodiversity Indices by Transect") %>%
kable_styling()
Simpson Biodiversity Indices by Transect
|
site
|
transect
|
D
|
simpson_1_minus_D
|
simpson_reciprocal
|
ESimpson
|
|
Allacher
|
1
|
0.1224490
|
0.8775510
|
8.166667
|
0.8166667
|
|
Allacher
|
2
|
0.2130178
|
0.7869822
|
4.694444
|
0.5868056
|
|
Allacher
|
3
|
0.1715976
|
0.8284024
|
5.827586
|
0.8325123
|
|
Allacher
|
4
|
0.2337278
|
0.7662722
|
4.278481
|
0.4753868
|
|
Allacher
|
5
|
0.1972318
|
0.8027682
|
5.070175
|
0.5633528
|
|
Allacher
|
6
|
0.2525253
|
0.7474747
|
3.960000
|
0.4400000
|
|
Allacher
|
7
|
0.3275000
|
0.6725000
|
3.053435
|
0.3816794
|
|
Allacher
|
8
|
0.1983673
|
0.8016327
|
5.041152
|
0.5041152
|
|
Allacher
|
9
|
0.2160000
|
0.7840000
|
4.629630
|
0.7716049
|
|
Allacher
|
10
|
0.2421875
|
0.7578125
|
4.129032
|
0.5161290
|
|
Davies
|
1
|
0.0688889
|
0.9311111
|
14.516129
|
0.7258065
|
|
Davies
|
2
|
0.0666667
|
0.9333333
|
15.000000
|
0.7142857
|
|
Davies
|
3
|
0.0526720
|
0.9473280
|
18.985401
|
0.6124323
|
|
Davies
|
4
|
0.0511111
|
0.9488889
|
19.565217
|
0.8152174
|
|
Davies
|
5
|
0.0886427
|
0.9113573
|
11.281250
|
0.4904891
|
|
Davies
|
6
|
0.0402893
|
0.9597107
|
24.820513
|
0.8273504
|
|
Davies
|
7
|
0.0692149
|
0.9307851
|
14.447761
|
0.5159915
|
|
Davies
|
8
|
0.0800000
|
0.9200000
|
12.500000
|
0.8333333
|
|
Davies
|
9
|
0.0768000
|
0.9232000
|
13.020833
|
0.5425347
|
|
Davies
|
10
|
0.0526316
|
0.9473684
|
19.000000
|
0.7037037
|
# Shannon Table
shannon_table <- biodiversity_transect %>%
select(site, transect, H, EShannon)
kable(shannon_table, caption = "Shannon Biodiversity Indices by Transect") %>%
kable_styling()
Shannon Biodiversity Indices by Transect
|
site
|
transect
|
H
|
EShannon
|
|
Allacher
|
1
|
2.184439
|
0.9486897
|
|
Allacher
|
2
|
1.764122
|
0.8483632
|
|
Allacher
|
3
|
1.844621
|
0.9479479
|
|
Allacher
|
4
|
1.796925
|
0.8178160
|
|
Allacher
|
5
|
1.869245
|
0.8507301
|
|
Allacher
|
6
|
1.656192
|
0.7537652
|
|
Allacher
|
7
|
1.442080
|
0.6934940
|
|
Allacher
|
8
|
1.929821
|
0.8381107
|
|
Allacher
|
9
|
1.665142
|
0.9293335
|
|
Allacher
|
10
|
1.747966
|
0.8405941
|
|
Davies
|
1
|
2.855699
|
0.9532557
|
|
Davies
|
2
|
2.901909
|
0.9531573
|
|
Davies
|
3
|
3.204919
|
0.9332938
|
|
Davies
|
4
|
3.089055
|
0.9719959
|
|
Davies
|
5
|
2.811228
|
0.8965820
|
|
Davies
|
6
|
3.311589
|
0.9736540
|
|
Davies
|
7
|
3.070812
|
0.9215556
|
|
Davies
|
8
|
2.622996
|
0.9685922
|
|
Davies
|
9
|
2.861566
|
0.9004147
|
|
Davies
|
10
|
3.142909
|
0.9535996
|
site_summary <- biodiversity_transect %>%
group_by(site) %>%
summarise(
avg_N = mean(N),
avg_S = mean(S),
avg_D = mean(D),
avg_1_minus_D = mean(simpson_1_minus_D),
avg_reciprocal = mean(simpson_reciprocal),
avg_ESimpson = mean(ESimpson),
avg_H = mean(H),
avg_EShannon = mean(EShannon),
.groups = "drop"
)
kable(site_summary, digits = 4, caption = "Average Biodiversity Indices by Site") %>%
kable_styling()
Average Biodiversity Indices by Site
|
site
|
avg_N
|
avg_S
|
avg_D
|
avg_1_minus_D
|
avg_reciprocal
|
avg_ESimpson
|
avg_H
|
avg_EShannon
|
|
Allacher
|
27.6
|
8.4
|
0.2175
|
0.7825
|
4.8851
|
0.5888
|
1.7901
|
0.8469
|
|
Davies
|
37.5
|
24.3
|
0.0647
|
0.9353
|
16.3137
|
0.6781
|
2.9873
|
0.9426
|
Step 5: Make DBH graphs for a least one site
all_data <- all_data %>%
mutate(transect = as.numeric(transect))
ggplot(filter(all_data, site == "Allacher"), aes(x = dbh)) +
geom_histogram(binwidth = 5, fill = "steelblue", color = "black") +
facet_wrap(~ transect, ncol = 5) +
labs(
title = "Allacher: DBH Distribution by Transect",
x = "DBH (cm)",
y = "Number of individuals"
) +
theme_minimal()
