# Data Cleaning
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.3
# Site 1: Camorin-Jacarepaguá, Brazil
camorin <- read_excel("CAMORIN.xls")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMSDBH` -> `STEMSDBH...15`
## • `STEMSDBH` -> `STEMSDBH...16`
## • `STEMSDBH` -> `STEMSDBH...17`
## • `STEMSDBH` -> `STEMSDBH...18`
## • `STEMSDBH` -> `STEMSDBH...19`
camorin$Line <- as.numeric(camorin$Line)
camorin <- camorin[order(camorin$Line),]
# Remove rows with missing values (number of individuals)
camorin <- camorin[!is.na(camorin[,"N(IND.)"]),]
# Total individuals per transect (N. Ind)
camorin_line <- aggregate(camorin[,"N(IND.)"], list(Line=camorin$Line), sum)
colnames(camorin_line)[2] <- "N(IND.)"
# Per species calculations (proportions)
camorin[,"Prop"] <- camorin[,"N(IND.)"]/camorin_line[match(camorin$Line, camorin_line$Line), "N(IND.)"]
camorin[,"PropSquared"] <- camorin$Prop^2
# Simpson's index calculations
camorin_line[,"D"] <- aggregate(list(D=camorin$PropSquared), list(Line=camorin$Line), sum)$D
camorin_line[,"1-D"] <- 1 - camorin_line[,"D"]
camorin_line[,"1/D"] <- 1 / camorin_line[,"D"]
camorin_line[,"S"] <- aggregate(list(S=camorin$Line), list(Line=camorin$Line), length)$S
camorin_line[,"Esimpson"] <- camorin_line[,"1/D"] / camorin_line[,"S"]
# Shannon index calculations
# H = -sum(pi * ln(pi)) per transect
camorin[,"PlogP"] <- ifelse(camorin$Prop > 0, camorin$Prop * log(camorin$Prop), 0)
camorin_line[,"H"] <- -aggregate(list(H=camorin$PlogP), list(Line=camorin$Line), sum)$H
camorin_line[,"Eshannon"] <- camorin_line[,"H"] / log(camorin_line[,"S"]) # Evenness = H / ln(S)
# Camorin averages
camorin_line$Mean_D <- mean(camorin_line$D)
camorin_line$Mean_1minusD <- mean(camorin_line$`1-D`)
camorin_line$Mean_1overD <- mean(camorin_line$`1/D`)
camorin_line$Mean_Esimpson <- mean(camorin_line$Esimpson)
camorin_line$Mean_H <- mean(camorin_line$H)
camorin_line$Mean_Eshannon <- mean(camorin_line$Eshannon)
camorin_line$Mean_S <- mean(camorin_line$S)
# DBH calculations
dbh_cols_camorin <- grep("STEMSDBH", colnames(camorin))
camorin[, dbh_cols_camorin] <- lapply(camorin[, dbh_cols_camorin], as.numeric)
camorin[,"DBHsum"] <- rowSums(camorin[, dbh_cols_camorin], na.rm=TRUE)
camorin_line[,"DBHsum"] <- aggregate(camorin$DBHsum, list(camorin$Line), sum)$x
camorin_line[,"DBHmean"] <- camorin_line[,"DBHsum"] / camorin_line[,"N(IND.)"]
# SITE 2: Ruissalo, Finland
ruissalo <- read_excel("RUISSALO.xls")
## New names:
## • `voucher` -> `voucher...5`
## • `voucher` -> `voucher...6`
## • `voucher` -> `voucher...7`
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `Stemdbh` -> `Stemdbh...15`
## • `Stemdbh` -> `Stemdbh...16`
## • `Stemdbh` -> `Stemdbh...17`
## • `Stemdbh` -> `Stemdbh...18`
## • `Stemdbh` -> `Stemdbh...19`
## • `Stemdbh` -> `Stemdbh...20`
## • `Stemdbh` -> `Stemdbh...21`
## • `Stemdbh` -> `Stemdbh...22`
## • `Stemdbh` -> `Stemdbh...23`
## • `Stemdbh` -> `Stemdbh...24`
## • `Stemdbh` -> `Stemdbh...25`
## • `Stemdbh` -> `Stemdbh...26`
## • `Stemdbh` -> `Stemdbh...27`
## • `Stemdbh` -> `Stemdbh...28`
## • `Stemdbh` -> `Stemdbh...29`
## • `Stemdbh` -> `Stemdbh...30`
## • `Stemdbh` -> `Stemdbh...31`
## • `Stemdbh` -> `Stemdbh...32`
## • `Stemdbh` -> `Stemdbh...33`
ruissalo$Line <- as.numeric(ruissalo$Line)
ruissalo <- ruissalo[order(ruissalo$Line),]
# Remove rows with missing values (number of individuals)
ruissalo <- ruissalo[!is.na(ruissalo[,"N(Ind.)"]),]
# Total individuals per transect (N. Ind)
ruissalo_line <- aggregate(ruissalo[,"N(Ind.)"], list(Line=ruissalo$Line), sum)
colnames(ruissalo_line)[2] <- "N(IND.)"
# Per species calculations (proportions)
ruissalo[,"Prop"] <- ruissalo[,"N(Ind.)"]/ruissalo_line[match(ruissalo$Line, ruissalo_line$Line), "N(IND.)"]
ruissalo[,"PropSquared"] <- ruissalo$Prop^2
# Simpson's index calculations
ruissalo_line[,"D"] <- aggregate(list(D=ruissalo$PropSquared), list(Line=ruissalo$Line), sum)$D
ruissalo_line[,"1-D"] <- 1 - ruissalo_line[,"D"]
ruissalo_line[,"1/D"] <- 1 / ruissalo_line[,"D"]
ruissalo_line[,"S"] <- aggregate(list(S=ruissalo$Line), list(Line=ruissalo$Line), length)$S
ruissalo_line[,"Esimpson"] <- ruissalo_line[,"1/D"] / ruissalo_line[,"S"]
# Shannon index
ruissalo[,"PlogP"] <- ifelse(ruissalo$Prop > 0, ruissalo$Prop * log(ruissalo$Prop), 0)
ruissalo_line[,"H"] <- -aggregate(list(H=ruissalo$PlogP), list(Line=ruissalo$Line), sum)$H
ruissalo_line[,"Eshannon"] <- ruissalo_line[,"H"] / log(ruissalo_line[,"S"])
# Site averages
ruissalo_line$Mean_D <- mean(ruissalo_line$D)
ruissalo_line$Mean_1minusD <- mean(ruissalo_line$`1-D`)
ruissalo_line$Mean_1overD <- mean(ruissalo_line$`1/D`)
ruissalo_line$Mean_Esimpson <- mean(ruissalo_line$Esimpson)
ruissalo_line$Mean_H <- mean(ruissalo_line$H)
ruissalo_line$Mean_Eshannon <- mean(ruissalo_line$Eshannon)
ruissalo_line$Mean_S <- mean(ruissalo_line$S)
# DBH calculations
dbh_cols_ruissalo <- grep("Stemdbh", colnames(ruissalo))
ruissalo[, dbh_cols_ruissalo] <- lapply(ruissalo[, dbh_cols_ruissalo], as.numeric)
ruissalo[,"DBHsum"] <- rowSums(ruissalo[, dbh_cols_ruissalo], na.rm=TRUE)
ruissalo_line[,"DBHsum"] <- aggregate(ruissalo$DBHsum, list(ruissalo$Line), sum)$x
ruissalo_line[,"DBHmean"] <- ruissalo_line[,"DBHsum"] / ruissalo_line[,"N(IND.)"]
# Summary Tables
# Simpson's table
simpsons_table <- data.frame(
Site = c("Camorin, Brazil", "Ruissalo, Finland"),
D = c(camorin_line$Mean_D[1], ruissalo_line$Mean_D[1]),
"1-D" = c(camorin_line$Mean_1minusD[1], ruissalo_line$Mean_1minusD[1]),
"1/D" = c(camorin_line$Mean_1overD[1], ruissalo_line$Mean_1overD[1]),
Esimpson = c(camorin_line$Mean_Esimpson[1], ruissalo_line$Mean_Esimpson[1])
)
# Shannon table
shannon_table <- data.frame(
Site = c("Camorin, Brazil", "Ruissalo, Finland"),
H = c(camorin_line$Mean_H[1], ruissalo_line$Mean_H[1]),
Eshannon = c(camorin_line$Mean_Eshannon[1], ruissalo_line$Mean_Eshannon[1])
)
# Print tables
print(simpsons_table)
## Site D X1.D X1.D.1 Esimpson
## 1 Camorin, Brazil 0.04758249 0.9524175 21.696706 0.7903022
## 2 Ruissalo, Finland 0.28832369 0.7116763 3.614451 0.7017526
print(shannon_table)
## Site H Eshannon
## 1 Camorin, Brazil 3.192966 0.9674377
## 2 Ruissalo, Finland 1.405109 0.8622421
# Visualizations
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.5.3
# Rename columns
colnames(simpsons_table) <- c("Site", "D", "1-D", "1/D", "Esimpson")
# Plot 1: Side-by-side average DBH bar plot
dbh_matrix <- rbind(camorin_line$DBHmean, ruissalo_line$DBHmean)
rownames(dbh_matrix) <- c("Camorin, Brazil", "Ruissalo, Finland")
colnames(dbh_matrix) <- camorin_line$Line
barplot(dbh_matrix,
beside = TRUE,
col = c("forestgreen", "lightblue"),
xlab = "Transect Number",
ylab = "Average DBH (cm)",
main = "Average Diameter at Breast Height (DBH) per Transect",
legend.text = TRUE,
args.legend = list(x = "topleft"),
cex.main = 0.9)

# Plot 2: Ridge plot of DBH distributions by transect and site
# Camorin long format
camorin_raw2 <- read_excel("CAMORIN.xls")
## New names:
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `STEMSDBH` -> `STEMSDBH...15`
## • `STEMSDBH` -> `STEMSDBH...16`
## • `STEMSDBH` -> `STEMSDBH...17`
## • `STEMSDBH` -> `STEMSDBH...18`
## • `STEMSDBH` -> `STEMSDBH...19`
ruissalo_raw2 <- read_excel("RUISSALO.xls")
## New names:
## • `voucher` -> `voucher...5`
## • `voucher` -> `voucher...6`
## • `voucher` -> `voucher...7`
## • `voucher` -> `voucher...8`
## • `voucher` -> `voucher...9`
## • `voucher` -> `voucher...10`
## • `voucher` -> `voucher...11`
## • `voucher` -> `voucher...12`
## • `Stemdbh` -> `Stemdbh...15`
## • `Stemdbh` -> `Stemdbh...16`
## • `Stemdbh` -> `Stemdbh...17`
## • `Stemdbh` -> `Stemdbh...18`
## • `Stemdbh` -> `Stemdbh...19`
## • `Stemdbh` -> `Stemdbh...20`
## • `Stemdbh` -> `Stemdbh...21`
## • `Stemdbh` -> `Stemdbh...22`
## • `Stemdbh` -> `Stemdbh...23`
## • `Stemdbh` -> `Stemdbh...24`
## • `Stemdbh` -> `Stemdbh...25`
## • `Stemdbh` -> `Stemdbh...26`
## • `Stemdbh` -> `Stemdbh...27`
## • `Stemdbh` -> `Stemdbh...28`
## • `Stemdbh` -> `Stemdbh...29`
## • `Stemdbh` -> `Stemdbh...30`
## • `Stemdbh` -> `Stemdbh...31`
## • `Stemdbh` -> `Stemdbh...32`
## • `Stemdbh` -> `Stemdbh...33`
# Camorin long format
dbh_cols1 <- grep("STEMSDBH", colnames(camorin_raw2))
camorin_long <- camorin_raw2[, c(1, dbh_cols1)] # column 1 = Line
colnames(camorin_long)[1] <- "Line"
camorin_long[, -1] <- lapply(camorin_long[, -1], as.numeric)
camorin_long <- reshape(as.data.frame(camorin_long),
varying = colnames(camorin_long)[-1],
v.names = "DBH",
timevar = "stem",
direction = "long")
camorin_long <- camorin_long[!is.na(camorin_long$DBH),]
camorin_long$Site <- "Camorin, Brazil"
camorin_long$Transect <- factor(paste("T", camorin_long$Line, sep=""),
levels = c("T1","T2","T3","T4","T5",
"T6","T7","T8","T9","T10"))
# Ruissalo long format
dbh_cols2 <- grep("Stemdbh", colnames(ruissalo_raw2))
ruissalo_long <- ruissalo_raw2[, c(1, dbh_cols2)]
colnames(ruissalo_long)[1] <- "Line"
ruissalo_long[, -1] <- lapply(ruissalo_long[, -1], as.numeric)
ruissalo_long <- reshape(as.data.frame(ruissalo_long),
varying = colnames(ruissalo_long)[-1],
v.names = "DBH",
timevar = "stem",
direction = "long")
ruissalo_long <- ruissalo_long[!is.na(ruissalo_long$DBH),]
ruissalo_long$Site <- "Ruissalo, Finland"
ruissalo_long$Transect <- factor(paste("T", ruissalo_long$Line, sep=""),
levels = c("T1","T2","T3","T4","T5",
"T6","T7","T8","T9","T10"))
# Combine
dbh_long <- rbind(
camorin_long[, c("Site", "Transect", "DBH")],
ruissalo_long[, c("Site", "Transect", "DBH")]
)
# Ridge plot
ggplot(dbh_long, aes(x = DBH, y = Transect, fill = Site)) +
geom_density_ridges(alpha = 0.7, scale = 1.2) +
facet_wrap(~Site) +
scale_fill_manual(values = c("Camorin, Brazil" = "forestgreen",
"Ruissalo, Finland" = "steelblue")) +
labs(title = "DBH Distribution by Transect",
subtitle = "Camorin-Jacarepaguá, Brazil vs Ruissalo, Finland",
x = "Diameter at Breast Height (cm)",
y = "Transect") +
theme_ridges() +
theme(legend.position = "none")
## Picking joint bandwidth of 2.5
## Picking joint bandwidth of 1.4

# Plot 3: Rarefaction Curves
library(vegan)
## Warning: package 'vegan' was built under R version 4.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.5.3
# Camorin
camorin_matrix <- aggregate(camorin[,"N(IND.)"],
list(Line = camorin$Line,
Species = paste(camorin$Genus, camorin$species)),
sum)
camorin_wide <- reshape(camorin_matrix,
idvar = "Line",
timevar = "Species",
direction = "wide")
rownames(camorin_wide) <- paste("Camorin_T", camorin_wide$Line, sep="")
camorin_wide$Line <- NULL
camorin_wide[is.na(camorin_wide)] <- 0
# Ruissalo
ruissalo_matrix <- aggregate(ruissalo[,"N(Ind.)"],
list(Line = ruissalo$Line,
Species = paste(ruissalo$Genus, ruissalo$Species)),
sum)
ruissalo_wide <- reshape(ruissalo_matrix,
idvar = "Line",
timevar = "Species",
direction = "wide")
rownames(ruissalo_wide) <- paste("Ruissalo_T", ruissalo_wide$Line, sep="")
ruissalo_wide$Line <- NULL
ruissalo_wide[is.na(ruissalo_wide)] <- 0
# Plot Rarefaction curves using vegan
par(mfrow = c(1,2))
rarecurve(camorin_wide,
step = 5,
col = "forestgreen",
xlab = "Number of Individuals",
ylab = "Number of Species",
main = "Camorin, Brazil",
label = FALSE)
rarecurve(ruissalo_wide,
step = 5,
col = "steelblue",
xlab = "Number of Individuals",
ylab = "Number of Species",
main = "Ruissalo, Finland",
label = FALSE)

# Tables 1 and 2: Biodiversity indices per transect
# Camorin per-transect table
camorin_table1 <- data.frame(
Transect = camorin_line$Line,
D = round(camorin_line$D, 4),
"1-D" = round(camorin_line$`1-D`, 4),
"1/D" = round(camorin_line$`1/D`, 4),
Esimpson = round(camorin_line$Esimpson, 4),
H = round(camorin_line$H, 4),
Eshannon = round(camorin_line$Eshannon, 4),
S = camorin_line$S)
# Ruissalo per-transect table
ruissalo_table2 <- data.frame(
Transect = ruissalo_line$Line,
D = round(ruissalo_line$D, 4),
"1-D" = round(ruissalo_line$`1-D`, 4),
"1/D" = round(ruissalo_line$`1/D`, 4),
Esimpson = round(ruissalo_line$Esimpson, 4),
H = round(ruissalo_line$H, 4),
Eshannon = round(ruissalo_line$Eshannon, 4),
S = ruissalo_line$S)
# Fix column names
colnames(camorin_table1) <- c("Transect","D","1-D","1/D","Esimpson","H","Eshannon","S")
colnames(ruissalo_table2) <- c("Transect","D","1-D","1/D","Esimpson","H","Eshannon","S")
print(camorin_table1)
## Transect D 1-D 1/D Esimpson H Eshannon S
## 1 1 0.0432 0.9568 23.1429 0.8571 3.2224 0.9777 27
## 2 2 0.0446 0.9554 22.4426 0.8312 3.2080 0.9733 27
## 3 3 0.0443 0.9557 22.5882 0.6644 3.3461 0.9489 34
## 4 4 0.0457 0.9543 21.8788 0.7544 3.2452 0.9638 29
## 5 5 0.0308 0.9692 32.4386 0.8767 3.5556 0.9847 37
## 6 6 0.0542 0.9458 18.4576 0.8390 3.0027 0.9714 22
## 7 7 0.0587 0.9413 17.0435 0.7747 2.9856 0.9659 22
## 8 8 0.0585 0.9415 17.0899 0.6573 3.0662 0.9411 26
## 9 9 0.0510 0.9490 19.6122 0.8527 3.0594 0.9757 23
## 10 10 0.0449 0.9551 22.2727 0.7955 3.2385 0.9719 28
print(ruissalo_table2)
## Transect D 1-D 1/D Esimpson H Eshannon S
## 1 1 0.2416 0.7584 4.1391 0.6898 1.5542 0.8674 6
## 2 2 0.4118 0.5882 2.4286 0.6071 1.0910 0.7870 4
## 3 3 0.2143 0.7857 4.6667 0.6667 1.6908 0.8689 7
## 4 4 0.2350 0.7650 4.2553 0.7092 1.5741 0.8785 6
## 5 5 0.3291 0.6709 3.0388 0.7597 1.2214 0.8811 4
## 6 6 0.2645 0.7355 3.7812 0.6302 1.5029 0.8388 6
## 7 7 0.2992 0.7008 3.3419 0.6684 1.3602 0.8451 5
## 8 8 0.2450 0.7550 4.0816 0.8163 1.4786 0.9187 5
## 9 9 0.2663 0.7337 3.7556 0.9389 1.3517 0.9750 4
## 10 10 0.3765 0.6235 2.6557 0.5311 1.2261 0.7618 5
# Table 3: Site-level averages
table3 <- data.frame(
Site = c("Camorin, Brazil", "Ruissalo, Finland"),
D = round(c(camorin_line$Mean_D[1], ruissalo_line$Mean_D[1]), 4),
"1-D" = round(c(camorin_line$Mean_1minusD[1], ruissalo_line$Mean_1minusD[1]), 4),
"1/D" = round(c(camorin_line$Mean_1overD[1], ruissalo_line$Mean_1overD[1]), 4),
Esimpson = round(c(camorin_line$Mean_Esimpson[1], ruissalo_line$Mean_Esimpson[1]), 4),
H = round(c(camorin_line$Mean_H[1], ruissalo_line$Mean_H[1]), 4),
Eshannon = round(c(camorin_line$Mean_Eshannon[1], ruissalo_line$Mean_Eshannon[1]), 4),
S = round(c(camorin_line$Mean_S[1], ruissalo_line$Mean_S[1]), 1))
colnames(table3) <- c("Site","D","1-D","1/D","Esimpson","H","Eshannon","S")
print(table3)
## Site D 1-D 1/D Esimpson H Eshannon S
## 1 Camorin, Brazil 0.0476 0.9524 21.6967 0.7903 3.1930 0.9674 27.5
## 2 Ruissalo, Finland 0.2883 0.7117 3.6145 0.7018 1.4051 0.8622 5.2