R Spatial Final Lab: Biodiversity in Allacher and Davies Sites
Step 1:Set up project file, load in packages and import data
wd <- getwd()
data_dir <- file.path(wd,"Data")
list.files("Data")
## [1] "ALLACHER.xls" "DAVIES.xls"
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.6
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.3
library(knitr)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.3
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
site1_allacher_raw <- read_excel(file.path(data_dir, "ALLACHER.xls"))
## New names:
## • `Stem dbh` -> `Stem dbh...6`
## • `Stem dbh` -> `Stem dbh...7`
## • `Stem dbh` -> `Stem dbh...8`
## • `Stem dbh` -> `Stem dbh...9`
## • `Stem dbh` -> `Stem dbh...10`
## • `Stem dbh` -> `Stem dbh...11`
## • `Stem dbh` -> `Stem dbh...12`
## • `Stem dbh` -> `Stem dbh...13`
## • `Stem dbh` -> `Stem dbh...14`
## • `Stem dbh` -> `Stem dbh...15`
## • `Stem dbh` -> `Stem dbh...16`
## • `Stem dbh` -> `Stem dbh...17`
## • `Stem dbh` -> `Stem dbh...18`
## • `Stem dbh` -> `Stem dbh...19`
## • `Stem dbh` -> `Stem dbh...20`
## • `Stem dbh` -> `Stem dbh...21`
## • `Stem dbh` -> `Stem dbh...22`
## • `Stem dbh` -> `Stem dbh...23`
## • `Stem dbh` -> `Stem dbh...24`
## • `Stem dbh` -> `Stem dbh...25`
site2_davies_raw <- read_excel(file.path(data_dir, "DAVIES.xls"))
## New names:
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
## • `STEMDBH` -> `STEMDBH...22`
Step 2:Clean data from null columns and rows then combine data
sets
colnames(site1_allacher_raw) #Check column names
## [1] "Line" "Family" "Genus" "Species"
## [5] "N(Ind)" "Stem dbh...6" "Stem dbh...7" "Stem dbh...8"
## [9] "Stem dbh...9" "Stem dbh...10" "Stem dbh...11" "Stem dbh...12"
## [13] "Stem dbh...13" "Stem dbh...14" "Stem dbh...15" "Stem dbh...16"
## [17] "Stem dbh...17" "Stem dbh...18" "Stem dbh...19" "Stem dbh...20"
## [21] "Stem dbh...21" "Stem dbh...22" "Stem dbh...23" "Stem dbh...24"
## [25] "Stem dbh...25"
colnames(site2_davies_raw)
## [1] "Line" "Family" "Genus" "species" "voucher1"
## [6] "voucher2" "voucher3" "voucher4" "voucher5" "voucher...10"
## [11] "voucher...11" "voucher...12" "LIANA" "N(IND)" "STEMDBH...15"
## [16] "STEMDBH...16" "STEMDBH...17" "STEMDBH...18" "STEMDBH...19" "STEMDBH...20"
## [21] "STEMDBH...21" "STEMDBH...22"
site1_allacher_clean <- site1_allacher_raw %>%
select(Line, Family, Genus, Species, `N(Ind)`, starts_with("Stem dbh")) %>%
mutate(
across(starts_with("Stem dbh"), as.numeric)
) %>%
pivot_longer(
cols = starts_with("Stem dbh"),
names_to = "stem_id",
values_to = "dbh"
) %>%
filter(!is.na(dbh)) %>%
mutate(
site = "Allacher",
transect = as.numeric(Line),
species = str_squish(Species)
) %>%
select(site, transect, species, dbh)
site2_davies_clean <- site2_davies_raw %>%
select(Line, Family, Genus, species, `N(IND)`, starts_with("STEMDBH")) %>%
mutate(
across(starts_with("STEMDBH"), as.numeric)) %>%
pivot_longer(
cols = starts_with("STEMDBH"),
names_to = "stem_id",
values_to = "dbh"
) %>%
filter(!is.na(dbh)) %>%
mutate(
site = "Davies",
transect = as.numeric(Line),
species = str_squish(species)
) %>%
select(site, transect, species, dbh)
all_data <- bind_rows(site1_allacher_clean, site2_davies_clean) %>%
mutate(
transect = as.numeric(transect)
) %>%
arrange(site, transect)
Step 3:Count Individuals by Species and Transect and Calculate
Biodiversity Indices by Transect
species_count <- all_data %>%
group_by(site, transect, species) %>%
summarise(n = n(), groups = "drop") %>%
arrange(site, transect, species)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
biodiversity_transect <- species_count %>%
group_by(site, transect) %>%
mutate(
N = sum(n),
S = n_distinct(species),
pi = n / N
) %>%
summarise(
N = first(N),
S = first(S),
D = sum(pi^2),
simpson_1_minus_D = 1 - D,
simpson_reciprocal = 1 / D,
simpson_evenness = simpson_reciprocal / S,
shannon_H = -sum(pi * log(pi)),
shannon_evenness = shannon_H / log(S),
.groups = "drop"
) %>%
mutate(
transect = as.numeric(transect)
) %>%
arrange(site, transect)
biodiversity_transect <- biodiversity_transect %>%
arrange(site, as.numeric(transect))
Step 4:Make tables for each Indices
## Simpson Table
simpson_table <- biodiversity_transect %>%
select(
Site = site,
Transect = transect,
N,
S,
D,
'Simpson 1-D' = simpson_1_minus_D,
'Simpson 1/D' = simpson_reciprocal,
'Simpson Evenness' = simpson_evenness
)
kable(
simpson_table,
digits = 4,
caption = "Table 1: Simpson Biodiversity Indices by Transect for Allacher and Davies") %>%
kable_styling(full_width = FALSE)
Table 1: Simpson Biodiversity Indices by Transect for Allacher and
Davies
|
Site
|
Transect
|
N
|
S
|
D
|
Simpson 1-D
|
Simpson 1/D
|
Simpson Evenness
|
|
Allacher
|
1
|
28
|
10
|
0.1224
|
0.8776
|
8.1667
|
0.8167
|
|
Allacher
|
2
|
26
|
8
|
0.2130
|
0.7870
|
4.6944
|
0.5868
|
|
Allacher
|
3
|
13
|
7
|
0.1716
|
0.8284
|
5.8276
|
0.8325
|
|
Allacher
|
4
|
26
|
9
|
0.2337
|
0.7663
|
4.2785
|
0.4754
|
|
Allacher
|
5
|
34
|
9
|
0.1972
|
0.8028
|
5.0702
|
0.5634
|
|
Allacher
|
6
|
33
|
9
|
0.2525
|
0.7475
|
3.9600
|
0.4400
|
|
Allacher
|
7
|
40
|
8
|
0.3275
|
0.6725
|
3.0534
|
0.3817
|
|
Allacher
|
8
|
35
|
10
|
0.1984
|
0.8016
|
5.0412
|
0.5041
|
|
Allacher
|
9
|
25
|
6
|
0.2160
|
0.7840
|
4.6296
|
0.7716
|
|
Allacher
|
10
|
16
|
8
|
0.2422
|
0.7578
|
4.1290
|
0.5161
|
|
Davies
|
1
|
30
|
20
|
0.0689
|
0.9311
|
14.5161
|
0.7258
|
|
Davies
|
2
|
30
|
21
|
0.0667
|
0.9333
|
15.0000
|
0.7143
|
|
Davies
|
3
|
51
|
31
|
0.0527
|
0.9473
|
18.9854
|
0.6124
|
|
Davies
|
4
|
30
|
24
|
0.0511
|
0.9489
|
19.5652
|
0.8152
|
|
Davies
|
5
|
38
|
23
|
0.0886
|
0.9114
|
11.2813
|
0.4905
|
|
Davies
|
6
|
44
|
30
|
0.0403
|
0.9597
|
24.8205
|
0.8274
|
|
Davies
|
7
|
44
|
28
|
0.0692
|
0.9308
|
14.4478
|
0.5160
|
|
Davies
|
8
|
20
|
15
|
0.0800
|
0.9200
|
12.5000
|
0.8333
|
|
Davies
|
9
|
50
|
24
|
0.0768
|
0.9232
|
13.0208
|
0.5425
|
|
Davies
|
10
|
38
|
27
|
0.0526
|
0.9474
|
19.0000
|
0.7037
|
## Shannon Table
shannon_table <- biodiversity_transect %>%
select(
Site = site,
Transect = transect,
N,
S,
'Shannon H' = shannon_H,
'Shannon Evenness' = shannon_evenness
)
kable(
shannon_table,
digits = 4,
caption = "Table 2: Shannon Biodiversity Indices by Transect for Allacher and Davies"
) %>%
kable_styling(full_width = FALSE)
Table 2: Shannon Biodiversity Indices by Transect for Allacher and
Davies
|
Site
|
Transect
|
N
|
S
|
Shannon H
|
Shannon Evenness
|
|
Allacher
|
1
|
28
|
10
|
2.1844
|
0.9487
|
|
Allacher
|
2
|
26
|
8
|
1.7641
|
0.8484
|
|
Allacher
|
3
|
13
|
7
|
1.8446
|
0.9479
|
|
Allacher
|
4
|
26
|
9
|
1.7969
|
0.8178
|
|
Allacher
|
5
|
34
|
9
|
1.8692
|
0.8507
|
|
Allacher
|
6
|
33
|
9
|
1.6562
|
0.7538
|
|
Allacher
|
7
|
40
|
8
|
1.4421
|
0.6935
|
|
Allacher
|
8
|
35
|
10
|
1.9298
|
0.8381
|
|
Allacher
|
9
|
25
|
6
|
1.6651
|
0.9293
|
|
Allacher
|
10
|
16
|
8
|
1.7480
|
0.8406
|
|
Davies
|
1
|
30
|
20
|
2.8557
|
0.9533
|
|
Davies
|
2
|
30
|
21
|
2.9019
|
0.9532
|
|
Davies
|
3
|
51
|
31
|
3.2049
|
0.9333
|
|
Davies
|
4
|
30
|
24
|
3.0891
|
0.9720
|
|
Davies
|
5
|
38
|
23
|
2.8112
|
0.8966
|
|
Davies
|
6
|
44
|
30
|
3.3116
|
0.9737
|
|
Davies
|
7
|
44
|
28
|
3.0708
|
0.9216
|
|
Davies
|
8
|
20
|
15
|
2.6230
|
0.9686
|
|
Davies
|
9
|
50
|
24
|
2.8616
|
0.9004
|
|
Davies
|
10
|
38
|
27
|
3.1429
|
0.9536
|
Step 5:Site-Level Averages for Biodiversity
site_summary <- biodiversity_transect %>%
group_by(site) %>%
summarise(
avg_N = mean(N),
avg_S = mean(S),
avg_simpson_D = mean(D),
avg_simpson_1_minus_D = mean(simpson_1_minus_D),
avg_simpson_reciprocal = mean(simpson_reciprocal),
avg_simpson_evenness = mean(simpson_evenness),
avg_shannon_H = mean(shannon_H),
avg_shannon_evenness = mean(shannon_evenness),
.groups = "drop"
)
site_summary_table <- site_summary %>%
select(
Site = site,
'Average N' = avg_N,
'Average Species Richness' = avg_S,
'Average Simpson D' = avg_simpson_D,
'Average Simpson 1-D' = avg_simpson_1_minus_D,
'Average Simpson 1/D' = avg_simpson_reciprocal,
'Average Simpson Evenness' = avg_simpson_evenness,
'Average Shannon H' = avg_shannon_H,
'Average Shannon Evenness' = avg_shannon_evenness
)
kable(
site_summary_table,
digits = 4,
caption = "Table 3: Average Biodiversity Indices by Site") %>%
kable_styling(full_width = FALSE)
Table 3: Average Biodiversity Indices by Site
|
Site
|
Average N
|
Average Species Richness
|
Average Simpson D
|
Average Simpson 1-D
|
Average Simpson 1/D
|
Average Simpson Evenness
|
Average Shannon H
|
Average Shannon Evenness
|
|
Allacher
|
27.6
|
8.4
|
0.2175
|
0.7825
|
4.8851
|
0.5888
|
1.7901
|
0.8469
|
|
Davies
|
37.5
|
24.3
|
0.0647
|
0.9353
|
16.3137
|
0.6781
|
2.9873
|
0.9426
|
Step 6:Make Graphs
## DBH Graph by Species and Transect
ggplot(all_data,
aes(x = dbh, fill = species)) +
geom_histogram(
binwidth = 5,
color = "black",
linewidth = 0.15
) +
facet_grid(site ~ transect) +
scale_x_continuous(
limits = c(0, 80),
breaks = c(0, 40, 80)
) +
labs(
title = "Diameter at Breast Height Distribution by Species and Transect",
x = "DBH (cm)",
y = "Number of Individuals",
fill = "Species"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
axis.title = element_text(face = "bold", size = 12),
axis.text = element_text(size = 8),
strip.text = element_text(face = "bold", size = 10),
legend.position = "none"
)
## Warning: Removed 654 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## DBH by Site and Transect (Cleaned Version)
ggplot(all_data, aes(x= dbh, fill = site)) +
geom_histogram(
binwidth = 5,
color = "black",
linewidth = 0.25
) +
facet_grid(site ~ transect) +
scale_x_continuous(
breaks = seq(0, 80, by = 40)
) +
coord_cartesian(xlim = c(0, 80)) +
scale_fill_manual(
values = c(
"Allacher" = "#4C78A8",
"Davies" = "#59A14F"
)
) +
labs(
title = "Diameter at Breast Height Distribution by Site and Transect",
x = "DBH (cm)",
y = "Number of Individuals",
fill = "Site"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 8),
strip.text = element_text(size = 10, face = "bold"),
legend.position = "bottom"
)

## Extra Graph: Relationship Between Average DBH and Shannon Diversity
avg_dbh <- all_data %>%
group_by(site, transect) %>%
summarise(
avg_dbh = mean(dbh, na.rm = TRUE),
.group = "drop"
) #Average DHB by transect
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site and transect.
## ℹ Output is grouped by site.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
biodiversity_size <- biodiversity_transect %>%
left_join(avg_dbh, by = c("site", "transect"))
ggplot(biodiversity_size,
aes(x = avg_dbh,
y = shannon_H,
color = site,
label = transect)) +
geom_point(size = 4) +
geom_text(vjust = -0.8, size = 3) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_manual(
values = c(
"Allacher" = "#F87660",
"Davies" = "#00BFC4"
)
) +
labs(
title = "Relationship Between Average Diameter at Breast Height and Shannon Diversity",
x = "Average DBH (cm)",
y = "Shannon Diversity (H)",
color = "Site"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0, face = "bold", size = 12),
axis.title = element_text(face = "bold", size = 12),
legend.title = element_text(face = "bold")
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

Step 7:Save Files
write.csv(
biodiversity_transect,
"biodiversity_transect_table.csv",
row.names = FALSE
)
write.csv(
site_summary_table,
"site_summary_table.csv",
row.names = FALSE
)
ggsave(
filename = "dbh_species_transect_graph.png",
width = 12,
height = 6,
dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
ggsave(
filename = "dbh_distribution_by_site_transect.png",
width = 12,
height = 6,
dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
ggsave(
filename = "dbh_shannon_relationship.png",
width = 9,
height = 6,
dpi = 300
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: The following aesthetics were dropped during statistical transformation: label.
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?