# Install and load required R packages
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# Import and inspect the sites.
cedar_bluffs <- read_excel("/Users/giannaselleck/Documents/RStudio/FinalProject/Transect_Data_Set-2/temp.namerica/CEDARBLU.xls")
## New names:
## • `voucher` -> `voucher...5`
## • `voucher` -> `voucher...6`
## • `voucher` -> `voucher...7`
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `Stemdbh` -> `Stemdbh...15`
## • `Stemdbh` -> `Stemdbh...16`
## • `Stemdbh` -> `Stemdbh...17`
## • `Stemdbh` -> `Stemdbh...18`
## • `Stemdbh` -> `Stemdbh...19`
## • `Stemdbh` -> `Stemdbh...20`
## • `Stemdbh` -> `Stemdbh...21`
## • `Stemdbh` -> `Stemdbh...22`
## • `Stemdbh` -> `Stemdbh...23`
## • `Stemdbh` -> `Stemdbh...24`
## • `Stemdbh` -> `Stemdbh...25`
## • `Stemdbh` -> `Stemdbh...26`
## • `Stemdbh` -> `Stemdbh...27`
## • `Stemdbh` -> `Stemdbh...28`
## • `Stemdbh` -> `Stemdbh...29`
la_selva <- read_excel("/Users/giannaselleck/Documents/RStudio/FinalProject/Transect_Data_Set-2/mesoamerica/LASELVA.xls")
## New names:
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMDBH` -> `STEMDBH...15`
## • `STEMDBH` -> `STEMDBH...16`
## • `STEMDBH` -> `STEMDBH...17`
## • `STEMDBH` -> `STEMDBH...18`
## • `STEMDBH` -> `STEMDBH...19`
## • `STEMDBH` -> `STEMDBH...20`
## • `STEMDBH` -> `STEMDBH...21`
head(cedar_bluffs)
## # A tibble: 6 × 29
## Line Family Genus Species voucher...5 voucher...6 voucher...7 voucher...8
## <chr> <chr> <chr> <chr> <lgl> <lgl> <lgl> <lgl>
## 1 1 ACERACEAE ACER NEGUNDO NA NA NA NA
## 2 1 ACERACEAE ACER SACCHAR… NA NA NA NA
## 3 10 ACERACEAE ACER SACCHAR… NA NA NA NA
## 4 2 ACERACEAE ACER SACCHAR… NA NA NA NA
## 5 3 ACERACEAE ACER SACCHAR… NA NA NA NA
## 6 4 ACERACEAE ACER SACCHAR… NA NA NA NA
## # ℹ 21 more variables: voucher...9 <lgl>, voucher...10 <lgl>,
## # voucher...11 <lgl>, voucher...12 <lgl>, Liana <chr>, `N(Ind.)` <dbl>,
## # Stemdbh...15 <dbl>, Stemdbh...16 <dbl>, Stemdbh...17 <dbl>,
## # Stemdbh...18 <dbl>, Stemdbh...19 <dbl>, Stemdbh...20 <dbl>,
## # Stemdbh...21 <dbl>, Stemdbh...22 <dbl>, Stemdbh...23 <dbl>,
## # Stemdbh...24 <dbl>, Stemdbh...25 <dbl>, Stemdbh...26 <dbl>,
## # Stemdbh...27 <dbl>, Stemdbh...28 <dbl>, Stemdbh...29 <dbl>
str(cedar_bluffs)
## tibble [65 × 29] (S3: tbl_df/tbl/data.frame)
## $ Line : chr [1:65] "1" "1" "10" "2" ...
## $ Family : chr [1:65] "ACERACEAE" "ACERACEAE" "ACERACEAE" "ACERACEAE" ...
## $ Genus : chr [1:65] "ACER" "ACER" "ACER" "ACER" ...
## $ Species : chr [1:65] "NEGUNDO" "SACCHARUM" "SACCHARUM" "SACCHARUM" ...
## $ voucher...5 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...6 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...7 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...8 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...9 : logi [1:65] NA NA NA NA NA NA ...
## $ voucher...10: logi [1:65] NA NA NA NA NA NA ...
## $ voucher...11: logi [1:65] NA NA NA NA NA NA ...
## $ voucher...12: logi [1:65] NA NA NA NA NA NA ...
## $ Liana : chr [1:65] NA NA NA NA ...
## $ N(Ind.) : num [1:65] 3 8 15 7 9 7 12 10 11 10 ...
## $ Stemdbh...15: num [1:65] 8 2.5 4 3 6 8 5 6 9 2.5 ...
## $ Stemdbh...16: num [1:65] 4 2.5 10 13 4 13 3 9 3 2.5 ...
## $ Stemdbh...17: num [1:65] 7 8 3 3 12 2.5 7 12 11 6 ...
## $ Stemdbh...18: num [1:65] NA 38 4 5 15 4 10 6 2.5 5 ...
## $ Stemdbh...19: num [1:65] NA 4 6 7 7 7 18 4 2.5 7 ...
## $ Stemdbh...20: num [1:65] NA 27 4 35 9 5 2.5 4 7 13 ...
## $ Stemdbh...21: num [1:65] NA 12 4 2.5 5 12 20 5 19 9 ...
## $ Stemdbh...22: num [1:65] NA NA 8 NA 6 NA 6 11 8 8 ...
## $ Stemdbh...23: num [1:65] NA NA 2.5 NA 8 NA 4 11 9 26 ...
## $ Stemdbh...24: num [1:65] NA NA 8 NA NA NA 3 9 4 12 ...
## $ Stemdbh...25: num [1:65] NA NA 7 NA NA NA 2.5 NA 3 NA ...
## $ Stemdbh...26: num [1:65] NA NA 6 NA NA NA 9 NA NA NA ...
## $ Stemdbh...27: num [1:65] NA NA 5 NA NA NA NA NA NA NA ...
## $ Stemdbh...28: num [1:65] NA NA 8 NA NA NA NA NA NA NA ...
## $ Stemdbh...29: num [1:65] NA NA 9 NA NA NA NA NA NA NA ...
head(la_selva)
## # A tibble: 6 × 21
## Line FAMILY GENUS SPECIES voucher1 voucher2 voucher3 voucher4 voucher...9
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 1 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 2 2 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 3 3 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 4 4 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 5 5 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## 6 6 ANNONACEAE ANAX… PHAEOC… 78484 NA NA NA NA
## # ℹ 12 more variables: voucher...10 <lgl>, voucher...11 <lgl>,
## # voucher...12 <lgl>, LIANA <chr>, `N(IND.)` <dbl>, STEMDBH...15 <dbl>,
## # STEMDBH...16 <dbl>, STEMDBH...17 <dbl>, STEMDBH...18 <dbl>,
## # STEMDBH...19 <dbl>, STEMDBH...20 <dbl>, STEMDBH...21 <dbl>
str(la_selva)
## tibble [250 × 21] (S3: tbl_df/tbl/data.frame)
## $ Line : num [1:250] 1 2 3 4 5 6 7 8 9 10 ...
## $ FAMILY : chr [1:250] "ANNONACEAE" "ANNONACEAE" "ANNONACEAE" "ANNONACEAE" ...
## $ GENUS : chr [1:250] "ANAXAGOREA" "ANAXAGOREA" "ANAXAGOREA" "ANAXAGOREA" ...
## $ SPECIES : chr [1:250] "PHAEOCARPA" "PHAEOCARPA" "PHAEOCARPA" "PHAEOCARPA" ...
## $ voucher1 : num [1:250] 78484 78484 78484 78484 78484 ...
## $ voucher2 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher3 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher4 : num [1:250] NA NA NA NA NA NA NA NA NA NA ...
## $ voucher...9 : logi [1:250] NA NA NA NA NA NA ...
## $ voucher...10: logi [1:250] NA NA NA NA NA NA ...
## $ voucher...11: logi [1:250] NA NA NA NA NA NA ...
## $ voucher...12: logi [1:250] NA NA NA NA NA NA ...
## $ LIANA : chr [1:250] NA NA NA NA ...
## $ N(IND.) : num [1:250] 2 6 2 3 4 7 6 4 6 2 ...
## $ STEMDBH...15: num [1:250] 5 3 3 4 5 7 3 2.5 3 4 ...
## $ STEMDBH...16: num [1:250] 5 3 5 9 7 7 7 7 7 2.5 ...
## $ STEMDBH...17: num [1:250] NA 2.5 NA 4 8 4 2.5 3 5 NA ...
## $ STEMDBH...18: num [1:250] NA 5 NA 7 5 2.5 7 4 3 NA ...
## $ STEMDBH...19: num [1:250] NA 4 NA NA NA 6 2.5 NA 3 NA ...
## $ STEMDBH...20: num [1:250] NA 13 NA NA NA 9 4 NA 7 NA ...
## $ STEMDBH...21: num [1:250] NA NA NA NA NA 7 6 NA NA NA ...
# Clean column names and rename column names for La Selva.
la_selva <- la_selva %>% janitor::clean_names()
la_selva <- la_selva %>%
mutate(species_name = paste(genus, species))
la_selva_long <- la_selva %>%
pivot_longer(
cols = starts_with("stemdbh"),
names_to = "stem",
values_to = "dbh"
)
la_selva_long <- la_selva_long %>%
filter(!is.na(dbh))
la_selva_clean <- la_selva_long %>%
select(
transect = line,
species = species_name,
dbh,
n_ind
) %>%
mutate(site = "La Selva")
glimpse(la_selva_clean)
## Rows: 347
## Columns: 5
## $ transect <dbl> 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6…
## $ species <chr> "ANAXAGOREA PHAEOCARPA", "ANAXAGOREA PHAEOCARPA", "ANAXAGOREA…
## $ dbh <dbl> 5.0, 5.0, 3.0, 3.0, 2.5, 5.0, 4.0, 13.0, 3.0, 5.0, 4.0, 9.0, …
## $ n_ind <dbl> 2, 2, 6, 6, 6, 6, 6, 6, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 7, 7, 7…
## $ site <chr> "La Selva", "La Selva", "La Selva", "La Selva", "La Selva", "…
# Clean column names and rename column names for Cedar Bluffs.
cedar_bluffs <- cedar_bluffs %>% janitor::clean_names()
cedar_bluffs <- cedar_bluffs %>%
mutate(species_name = paste(genus, species))
cedar_long <- cedar_bluffs %>%
pivot_longer(
cols = starts_with("stemdbh"),
names_to = "stem",
values_to = "dbh"
)
cedar_long <- cedar_long %>%
filter(!is.na(dbh))
cedar_clean <- cedar_long %>%
select(
transect = line,
species = species_name,
dbh,
n_ind
) %>%
mutate(site = "Cedar Bluffs")
glimpse(cedar_clean)
## Rows: 175
## Columns: 5
## $ transect <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "10", "10",…
## $ species <chr> "ACER NEGUNDO", "ACER NEGUNDO", "ACER NEGUNDO", "ACER SACCHAR…
## $ dbh <dbl> 8.0, 4.0, 7.0, 2.5, 2.5, 8.0, 38.0, 4.0, 27.0, 12.0, 4.0, 10.…
## $ n_ind <dbl> 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ site <chr> "Cedar Bluffs", "Cedar Bluffs", "Cedar Bluffs", "Cedar Bluffs…
# Combine data from both sites and group by site, transect, and species.
la_selva_clean <- la_selva_clean %>%
mutate(transect = as.character(transect))
cedar_clean <- cedar_clean %>%
mutate(transect = as.character(transect))
combined_data <- bind_rows(la_selva_clean, cedar_clean)
combined_data$transect <- factor(
combined_data$transect,
levels = sort(unique(as.numeric(combined_data$transect)))
)
combined_data %>%
group_by(site, transect, species) %>%
summarise(count = n())
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by site, transect, and species.
## ℹ Output is grouped by site and transect.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(site, transect, species))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
## # A tibble: 314 × 4
## # Groups: site, transect [20]
## site transect species count
## <chr> <fct> <chr> <int>
## 1 Cedar Bluffs 1 ACER NEGUNDO 3
## 2 Cedar Bluffs 1 ACER SACCHARUM 7
## 3 Cedar Bluffs 1 QUERCUS RUBRA 1
## 4 Cedar Bluffs 1 ULMUS RUBRA 1
## 5 Cedar Bluffs 2 ACER SACCHARUM 7
## 6 Cedar Bluffs 2 CARYA OVATA 1
## 7 Cedar Bluffs 2 CERCIS CANADENSIS 1
## 8 Cedar Bluffs 2 JUGLANS NIGRA 1
## 9 Cedar Bluffs 2 OSTRYA VIRGINIANA 3
## 10 Cedar Bluffs 2 PRUNUS SEROTINA 2
## # ℹ 304 more rows
# Calculate biodiversity metrics (Simpson’s D, 1-D, 1/D, ESimpson, Shannon H, EShannon)
# for each transect at both sites.
species_counts <- combined_data %>%
group_by(site, transect, species) %>%
summarise(n = n(), .groups = "drop")
transect_summary <- species_counts %>%
group_by(site, transect) %>%
mutate(
N = sum(n),
S = n_distinct(species),
pi = n / N
)
biodiversity_indices <- transect_summary %>%
group_by(site, transect) %>%
summarise(
S = first(S),
N = first(N),
D = sum(pi^2), # Simpson's D
simpson_diversity_index = 1 - D, # Simpson's index of diversity
simpson_reciprocal_index = 1 / D, # Simpson's reciprocal index
E_simpson = (1 / D) / S, # Simpson's index of diversity evenness (ESimpson)
shannon_H = -sum(pi * log(pi)), # Shannon diversity index (H)
E_shannon = shannon_H / log(S), # Shannon evenness (EShannon)
.groups = "drop"
)
print(biodiversity_indices)
## # A tibble: 20 × 10
## site transect S N D simpson_diversity_index
## <chr> <fct> <int> <int> <dbl> <dbl>
## 1 Cedar Bluffs 1 4 12 0.417 0.583
## 2 Cedar Bluffs 2 8 17 0.232 0.768
## 3 Cedar Bluffs 3 6 16 0.359 0.641
## 4 Cedar Bluffs 4 7 17 0.253 0.747
## 5 Cedar Bluffs 5 6 19 0.429 0.571
## 6 Cedar Bluffs 6 5 16 0.430 0.570
## 7 Cedar Bluffs 7 6 20 0.355 0.645
## 8 Cedar Bluffs 8 6 15 0.467 0.533
## 9 Cedar Bluffs 9 7 17 0.377 0.623
## 10 Cedar Bluffs 10 10 26 0.355 0.645
## 11 La Selva 1 29 37 0.0402 0.960
## 12 La Selva 2 28 39 0.0572 0.943
## 13 La Selva 3 29 36 0.0386 0.961
## 14 La Selva 4 23 27 0.0562 0.944
## 15 La Selva 5 25 35 0.0547 0.945
## 16 La Selva 6 21 38 0.0817 0.918
## 17 La Selva 7 25 44 0.0744 0.926
## 18 La Selva 8 19 24 0.0694 0.931
## 19 La Selva 9 23 37 0.0650 0.935
## 20 La Selva 10 27 30 0.04 0.96
## # ℹ 4 more variables: simpson_reciprocal_index <dbl>, E_simpson <dbl>,
## # shannon_H <dbl>, E_shannon <dbl>
# DBH distribution by transect
ggplot(combined_data, aes(x = dbh, fill = site)) +
geom_histogram(
binwidth = 0.1,
color = "black",
alpha = 0.7
) +
scale_x_log10() + facet_grid(site ~ transect, scales = "free_y") +
labs(
title = "Distribution of DBH by Transect",
x = "DBH (cm, log scale)",
y = "Number of Individuals"
)

# DBH distribution by species
top_species <- combined_data %>%
count(site, species, sort = TRUE) %>%
group_by(site) %>%
slice_head(n = 10) %>%
ungroup()
filtered_data <- combined_data %>%
semi_join(top_species, by = c("site", "species"))
ggplot(filtered_data, aes(x = species, y = dbh)) +
geom_boxplot() +
facet_wrap(~site) +
theme(axis.text.x = element_text(angle = 90)) +
labs(
title = "DBH Distributions of Dominant Tree Species By Site",
x = "Species",
y = "DBH (cm)"
)

# Comparison of DBH Across Transects Between Sites
ggplot(combined_data, aes(x = transect, y = dbh, color = site)) +
geom_boxplot() +
geom_jitter(alpha = 0.3, width = 0.2) +
scale_y_log10() +
labs(
title = "Comparison of DBH Across Transects Between Sites",
x = "Transect",
y = "DBH (cm, log scale)"
)
