# Install and load required R packages
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Import and inspect the sites.
cedar_bluffs <- read_excel("/Users/giannaselleck/Documents/RStudio/FinalProject/Transect_Data_Set-2/temp.namerica/CEDARBLU.xls")
## New names:
## • `voucher` -> `voucher...5`
## • `voucher` -> `voucher...6`
## • `voucher` -> `voucher...7`
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `Stemdbh` -> `Stemdbh...15`
## • `Stemdbh` -> `Stemdbh...16`
## • `Stemdbh` -> `Stemdbh...17`
## • `Stemdbh` -> `Stemdbh...18`
## • `Stemdbh` -> `Stemdbh...19`
## • `Stemdbh` -> `Stemdbh...20`
## • `Stemdbh` -> `Stemdbh...21`
## • `Stemdbh` -> `Stemdbh...22`
## • `Stemdbh` -> `Stemdbh...23`
## • `Stemdbh` -> `Stemdbh...24`
## • `Stemdbh` -> `Stemdbh...25`
## • `Stemdbh` -> `Stemdbh...26`
## • `Stemdbh` -> `Stemdbh...27`
## • `Stemdbh` -> `Stemdbh...28`
## • `Stemdbh` -> `Stemdbh...29`
la_selva <- read_excel("/Users/giannaselleck/Documents/RStudio/FinalProject/Transect_Data_Set-2/mesoamerica/LASELVA.xls")
## New names:
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
head(cedar_bluffs)
## # A tibble: 6 × 29
## Line Family Genus Species voucher...5 voucher...6 voucher...7 voucher...8
## <chr> <chr> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
## 1 1 ACERACEAE ACER NEGUNDO NA NA NA NA
## 2 1 ACERACEAE ACER SACCHAR… NA NA NA NA
## 3 10 ACERACEAE ACER SACCHAR… NA NA NA NA
## 4 2 ACERACEAE ACER SACCHAR… NA NA NA NA
## 5 3 ACERACEAE ACER SACCHAR… NA NA NA NA
## 6 4 ACERACEAE ACER SACCHAR… NA NA NA NA
## # ℹ 21 more variables: voucher...9 <lgl>, voucher...10 <lgl>,
## # voucher...11 <lgl>, voucher...12 <lgl>, Liana <chr>, `N(Ind.)` <dbl>,
## # Stemdbh...15 <dbl>, Stemdbh...16 <dbl>, Stemdbh...17 <dbl>,
## # Stemdbh...18 <dbl>, Stemdbh...19 <dbl>, Stemdbh...20 <dbl>,
## # Stemdbh...21 <dbl>, Stemdbh...22 <dbl>, Stemdbh...23 <dbl>,
## # Stemdbh...24 <dbl>, Stemdbh...25 <dbl>, Stemdbh...26 <dbl>,
## # Stemdbh...27 <dbl>, Stemdbh...28 <dbl>, Stemdbh...29 <dbl>
str(cedar_bluffs)
## tibble [65 × 29] (S3: tbl_df/tbl/data.frame)
## $ Line : chr [1:65] "1" "1" "10" "2" ...
## $ Family : chr [1:65] "ACERACEAE" "ACERACEAE" "ACERACEAE" "ACERACEAE" ...
## $ Genus : chr [1:65] "ACER" "ACER" "ACER" "ACER" ...
## $ Species : chr [1:65] "NEGUNDO" "SACCHARUM" "SACCHARUM" "SACCHARUM" ...
## $ voucher...5 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...6 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...7 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...8 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...9 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...10: logi [1:65] NA NA NA NA NA NA ...
## $ voucher...11: logi [1:65] NA NA NA NA NA NA ...
## $ voucher...12: logi [1:65] NA NA NA NA NA NA ...
## $ Liana : chr [1:65] NA NA NA NA ...
## $ N(Ind.) : num [1:65] 3 8 15 7 9 7 12 10 11 10 ...
## $ Stemdbh...15: num [1:65] 8 2.5 4 3 6 8 5 6 9 2.5 ...
## $ Stemdbh...16: num [1:65] 4 2.5 10 13 4 13 3 9 3 2.5 ...
## $ Stemdbh...17: num [1:65] 7 8 3 3 12 2.5 7 12 11 6 ...
## $ Stemdbh...18: num [1:65] NA 38 4 5 15 4 10 6 2.5 5 ...
## $ Stemdbh...19: num [1:65] NA 4 6 7 7 7 18 4 2.5 7 ...
## $ Stemdbh...20: num [1:65] NA 27 4 35 9 5 2.5 4 7 13 ...
## $ Stemdbh...21: num [1:65] NA 12 4 2.5 5 12 20 5 19 9 ...
## $ Stemdbh...22: num [1:65] NA NA 8 NA 6 NA 6 11 8 8 ...
## $ Stemdbh...23: num [1:65] NA NA 2.5 NA 8 NA 4 11 9 26 ...
## $ Stemdbh...24: num [1:65] NA NA 8 NA NA NA 3 9 4 12 ...
## $ Stemdbh...25: num [1:65] NA NA 7 NA NA NA 2.5 NA 3 NA ...
## $ Stemdbh...26: num [1:65] NA NA 6 NA NA NA 9 NA NA NA ...
## $ Stemdbh...27: num [1:65] NA NA 5 NA NA NA NA NA NA NA ...
## $ Stemdbh...28: num [1:65] NA NA 8 NA NA NA NA NA NA NA ...
## $ Stemdbh...29: num [1:65] NA NA 9 NA NA NA NA NA NA NA ...
head(la_selva)
## # A tibble: 6 × 21
## Line FAMILY GENUS SPECIES voucher1 voucher2 voucher3 voucher4 voucher...9
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 1 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 2 2 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 3 3 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 4 4 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 5 5 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 6 6 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## # ℹ 12 more variables: voucher...10 <lgl>, voucher...11 <lgl>,
## # voucher...12 <lgl>, LIANA <chr>, `N(IND.)` <dbl>, STEMDBH...15 <dbl>,
## # STEMDBH...16 <dbl>, STEMDBH...17 <dbl>, STEMDBH...18 <dbl>,
## # STEMDBH...19 <dbl>, STEMDBH...20 <dbl>, STEMDBH...21 <dbl>
str(la_selva)
## tibble [250 × 21] (S3: tbl_df/tbl/data.frame)
## $ Line : num [1:250] 1 2 3 4 5 6 7 8 9 10 ...
## $ FAMILY : chr [1:250] "ANNONACEAE" "ANNONACEAE" "ANNONACEAE" "ANNONACEAE" ...
## $ GENUS : chr [1:250] "ANAXAGOREA" "ANAXAGOREA" "ANAXAGOREA" "ANAXAGOREA" ...
## $ SPECIES : chr [1:250] "PHAEOCARPA" "PHAEOCARPA" "PHAEOCARPA" "PHAEOCARPA" ...
## $ voucher1 : num [1:250] 78484 78484 78484 78484 78484 ...
## $ voucher2 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher3 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher4 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher...9 : logi [1:250] NA NA NA NA NA NA ...
## $ voucher...10: logi [1:250] NA NA NA NA NA NA ...
## $ voucher...11: logi [1:250] NA NA NA NA NA NA ...
## $ voucher...12: logi [1:250] NA NA NA NA NA NA ...
## $ LIANA : chr [1:250] NA NA NA NA ...
## $ N(IND.) : num [1:250] 2 6 2 3 4 7 6 4 6 2 ...
## $ STEMDBH...15: num [1:250] 5 3 3 4 5 7 3 2.5 3 4 ...
## $ STEMDBH...16: num [1:250] 5 3 5 9 7 7 7 7 7 2.5 ...
## $ STEMDBH...17: num [1:250] NA 2.5 NA 4 8 4 2.5 3 5 NA ...
## $ STEMDBH...18: num [1:250] NA 5 NA 7 5 2.5 7 4 3 NA ...
## $ STEMDBH...19: num [1:250] NA 4 NA NA NA 6 2.5 NA 3 NA ...
## $ STEMDBH...20: num [1:250] NA 13 NA NA NA 9 4 NA 7 NA ...
## $ STEMDBH...21: num [1:250] NA NA NA NA NA 7 6 NA NA NA ...
# Clean column names and rename column names for La Selva.
la_selva <- la_selva %>% janitor::clean_names()
la_selva <- la_selva %>%
mutate(species_name = paste(genus, species))
la_selva_long <- la_selva %>%
pivot_longer(
cols = starts_with("stemdbh"),
names_to = "stem",
values_to = "dbh"
)
la_selva_long <- la_selva_long %>%
filter(!is.na(dbh))
la_selva_clean <- la_selva_long %>%
select(
transect = line,
species = species_name,
dbh,
n_ind
) %>%
mutate(site = "La Selva")
glimpse(la_selva_clean)
## Rows: 347
## Columns: 5
## $ transect <dbl> 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6…
## $ species <chr> "ANAXAGOREA PHAEOCARPA", "ANAXAGOREA PHAEOCARPA", "ANAXAGOREA…
## $ dbh <dbl> 5.0, 5.0, 3.0, 3.0, 2.5, 5.0, 4.0, 13.0, 3.0, 5.0, 4.0, 9.0, …
## $ n_ind <dbl> 2, 2, 6, 6, 6, 6, 6, 6, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7…
## $ site <chr> "La Selva", "La Selva", "La Selva", "La Selva", "La Selva", "…
# Clean column names and rename column names for Cedar Bluffs.
cedar_bluffs <- cedar_bluffs %>% janitor::clean_names()
cedar_bluffs <- cedar_bluffs %>%
mutate(species_name = paste(genus, species))
cedar_long <- cedar_bluffs %>%
pivot_longer(
cols = starts_with("stemdbh"),
names_to = "stem",
values_to = "dbh"
)
cedar_long <- cedar_long %>%
filter(!is.na(dbh))
cedar_clean <- cedar_long %>%
select(
transect = line,
species = species_name,
dbh,
n_ind
) %>%
mutate(site = "Cedar Bluffs")
glimpse(cedar_clean)
## Rows: 175
## Columns: 5
## $ transect <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "10", "10",…
## $ species <chr> "ACER NEGUNDO", "ACER NEGUNDO", "ACER NEGUNDO", "ACER SACCHAR…
## $ dbh <dbl> 8.0, 4.0, 7.0, 2.5, 2.5, 8.0, 38.0, 4.0, 27.0, 12.0, 4.0, 10.…
## $ n_ind <dbl> 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ site <chr> "Cedar Bluffs", "Cedar Bluffs", "Cedar Bluffs", "Cedar Bluffs…
# Combine data from both sites and group by site, transect, and species.
la_selva_clean <- la_selva_clean %>%
mutate(transect = as.character(transect))
cedar_clean <- cedar_clean %>%
mutate(transect = as.character(transect))
combined_data <- bind_rows(la_selva_clean, cedar_clean)
combined_data$transect <- factor(
combined_data$transect,
levels = sort(unique(as.numeric(combined_data$transect)))
)
combined_data %>%
group_by(site, transect, species) %>%
summarise(count = n())
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
## # A tibble: 314 × 4
## # Groups: site, transect [20]
## site transect species count
## <chr> <fct> <chr> <int>
## 1 Cedar Bluffs 1 ACER NEGUNDO 3
## 2 Cedar Bluffs 1 ACER SACCHARUM 7
## 3 Cedar Bluffs 1 QUERCUS RUBRA 1
## 4 Cedar Bluffs 1 ULMUS RUBRA 1
## 5 Cedar Bluffs 2 ACER SACCHARUM 7
## 6 Cedar Bluffs 2 CARYA OVATA 1
## 7 Cedar Bluffs 2 CERCIS CANADENSIS 1
## 8 Cedar Bluffs 2 JUGLANS NIGRA 1
## 9 Cedar Bluffs 2 OSTRYA VIRGINIANA 3
## 10 Cedar Bluffs 2 PRUNUS SEROTINA 2
## # ℹ 304 more rows
# Calculate biodiversity metrics (Simpson’s D, 1-D, 1/D, ESimpson, Shannon H, EShannon) for each transect at both sites.
species_counts <- combined_data %>%
group_by(site, transect, species) %>%
summarise(n = n(), .groups = "drop")
transect_summary <- species_counts %>%
group_by(site, transect) %>%
mutate(
N = sum(n),
S = n_distinct(species),
pi = n / N
)
biodiversity_indices <- transect_summary %>%
group_by(site, transect) %>%
summarise(
S = first(S),
N = first(N),
D = sum(pi^2), # Simpson's D
simpson_1_D = 1 - D, # Simpson's index of diversity
simpson_1_over_D = 1 / D, # Simpson's reciprocal index
E_simpson = (1 / D) / S, # Simpson's index of diversity evenness (ESimpson)
shannon_H = -sum(pi * log(pi)), # Shannon diversity index (H)
E_shannon = shannon_H / log(S), # Shannon evenness (EShannon)
.groups = "drop"
)
knitr::kable(
biodiversity_indices,
digits = 3,
caption = "Biodiversity Metrics by Transect for La Selva and Cedar Bluffs",
booktabs = TRUE
) %>%
kable_styling(
full_width = FALSE,
position = "center"
) %>%
row_spec(0, bold = TRUE) %>%
column_spec(1:ncol(biodiversity_indices), border_right = TRUE)
Biodiversity Metrics by Transect for La Selva and Cedar Bluffs
|
site
|
transect
|
S
|
N
|
D
|
simpson_1_D
|
simpson_1_over_D
|
E_simpson
|
shannon_H
|
E_shannon
|
|
Cedar Bluffs
|
1
|
4
|
12
|
0.417
|
0.583
|
2.400
|
0.600
|
1.075
|
0.776
|
|
Cedar Bluffs
|
2
|
8
|
17
|
0.232
|
0.768
|
4.313
|
0.539
|
1.757
|
0.845
|
|
Cedar Bluffs
|
3
|
6
|
16
|
0.359
|
0.641
|
2.783
|
0.464
|
1.363
|
0.761
|
|
Cedar Bluffs
|
4
|
7
|
17
|
0.253
|
0.747
|
3.959
|
0.566
|
1.624
|
0.835
|
|
Cedar Bluffs
|
5
|
6
|
19
|
0.429
|
0.571
|
2.329
|
0.388
|
1.229
|
0.686
|
|
Cedar Bluffs
|
6
|
5
|
16
|
0.430
|
0.570
|
2.327
|
0.465
|
1.160
|
0.721
|
|
Cedar Bluffs
|
7
|
6
|
20
|
0.355
|
0.645
|
2.817
|
0.469
|
1.347
|
0.752
|
|
Cedar Bluffs
|
8
|
6
|
15
|
0.467
|
0.533
|
2.143
|
0.357
|
1.173
|
0.655
|
|
Cedar Bluffs
|
9
|
7
|
17
|
0.377
|
0.623
|
2.651
|
0.379
|
1.397
|
0.718
|
|
Cedar Bluffs
|
10
|
10
|
26
|
0.355
|
0.645
|
2.817
|
0.282
|
1.589
|
0.690
|
|
La Selva
|
1
|
29
|
37
|
0.040
|
0.960
|
24.891
|
0.858
|
3.297
|
0.979
|
|
La Selva
|
2
|
28
|
39
|
0.057
|
0.943
|
17.483
|
0.624
|
3.139
|
0.942
|
|
La Selva
|
3
|
29
|
36
|
0.039
|
0.961
|
25.920
|
0.894
|
3.314
|
0.984
|
|
La Selva
|
4
|
23
|
27
|
0.056
|
0.944
|
17.780
|
0.773
|
3.039
|
0.969
|
|
La Selva
|
5
|
25
|
35
|
0.055
|
0.945
|
18.284
|
0.731
|
3.080
|
0.957
|
|
La Selva
|
6
|
21
|
38
|
0.082
|
0.918
|
12.237
|
0.583
|
2.784
|
0.915
|
|
La Selva
|
7
|
25
|
44
|
0.074
|
0.926
|
13.444
|
0.538
|
2.933
|
0.911
|
|
La Selva
|
8
|
19
|
24
|
0.069
|
0.931
|
14.400
|
0.758
|
2.831
|
0.962
|
|
La Selva
|
9
|
23
|
37
|
0.065
|
0.935
|
15.382
|
0.669
|
2.955
|
0.942
|
|
La Selva
|
10
|
27
|
30
|
0.040
|
0.960
|
25.000
|
0.926
|
3.263
|
0.990
|
# Set a global theme for plots
theme_set(
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(size = 18, face = "bold"),
axis.title = element_text(size = 15),
axis.text = element_text(size = 14),
legend.title = element_text(size = 15),
legend.text = element_text(size = 14)
)
)
# DBH distribution of all trees by site
ggplot(combined_data, aes(x = site, y = dbh, fill = site)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(alpha = 0.25, width = 0.15, size = 1) +
scale_y_log10() +
labs(
title = "Distribution of Tree DBH by Site",
x = "Site",
y = "DBH (cm, log scale)"
)

# DBH distributions of dominant species by site
top_species <- combined_data %>%
count(site, species, sort = TRUE) %>%
group_by(site) %>%
slice_head(n = 10) %>%
ungroup()
filtered_data <- combined_data %>%
semi_join(top_species, by = c("site", "species"))
ggplot(filtered_data, aes(x = species, y = dbh, fill = site)) +
geom_boxplot(alpha = 0.7) +
facet_wrap(~site, scales = "free_x") +
theme(axis.text.x = element_text(angle = 90)) +
labs(
title = "DBH Distributions of Dominant Tree Species by Site",
x = "Species",
y = "DBH (cm)"
)

# Cumulative distribution of DBH by site
ggplot(combined_data, aes(x = dbh, color = site)) +
stat_ecdf(size = 1.2) +
scale_x_log10() +
labs(
title = "Cumulative Distribution of Tree DBH by Site",
x = "DBH (cm, log scale)",
y = "Cumulative Proportion of Individual Trees"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Mean DBH per transect by site
transect_means <- combined_data %>%
group_by(site, transect) %>%
summarise(mean_dbh = mean(dbh), .groups = "drop")
ggplot(transect_means, aes(x = site, y = mean_dbh, fill = site)) +
geom_boxplot() +
geom_jitter(width = 0.1) +
scale_y_log10() +
labs(
title = "Mean Transect DBH by Site",
x = "Site",
y = "Mean DBH per Transect (cm, log scale)"
)

# Calculate average biodiversity metrics across transects for each site
site_summary <- biodiversity_indices %>%
group_by(site) %>%
summarise(
mean_D = mean(D),
mean_1_D = mean(simpson_1_D),
mean_1_over_D = mean(simpson_1_over_D),
mean_Esimpson = mean(E_simpson),
mean_shannon_H = mean(shannon_H),
mean_Eshannon = mean(E_shannon),
.groups = "drop"
)
knitr::kable(
site_summary,
digits = 3,
caption = "Average Biodiversity Metrics for Each Site",
booktabs = TRUE
) %>%
kable_styling(
full_width = FALSE,
position = "center"
) %>%
row_spec(0, bold = TRUE) %>%
column_spec(1:ncol(site_summary), border_right = TRUE)
Average Biodiversity Metrics for Each Site
|
site
|
mean_D
|
mean_1_D
|
mean_1_over_D
|
mean_Esimpson
|
mean_shannon_H
|
mean_Eshannon
|
|
Cedar Bluffs
|
0.367
|
0.633
|
2.854
|
0.451
|
1.372
|
0.744
|
|
La Selva
|
0.058
|
0.942
|
18.482
|
0.735
|
3.064
|
0.955
|
# Shannon diversity comparison between sites
ggplot(biodiversity_indices,
aes(x = site, y = shannon_H, fill = site)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.1, size = 2, alpha = 0.7) +
labs(
title = "Shannon Diversity Index by Site",
x = "Site",
y = "Shannon Diversity Index (H)"
)
