Title: Tree Demography Analysis
This analysis processes tree girth measurements from IITB campus collected during the field tutorial to compute diameter, basal area, and size-class distributions.
library(tidyverse)
data <- read.csv("/Users/sakshi/Documents/CM_703/demographics_dataset_iitb.csv",
stringsAsFactors = FALSE)
# Inspect column names
names(data)
## [1] "Obs.ID" "Observer.ID"
## [3] "Observer.Name" "Tree.ID"
## [5] "Branch.ID" "G1..cm."
## [7] "G2..cm." "G3..cm."
## [9] "Status..Dead.Alive." "Canopy.openness....sky.visible."
## [11] "No..of.neighboring.trees" "group_id"
## [13] "...12" "G4..cm."
## [15] "G5..cm."
df1 <- data |>
mutate(
G1 = as.numeric(str_extract(G1..cm., "[0-9.]+")),
G2 = as.numeric(str_extract(G2..cm., "[0-9.]+")),
G3 = as.numeric(str_extract(G3..cm., "[0-9.]+"))
)
df1 <- df1 |>
mutate(Tree.ID = str_remove_all(Tree.ID, " "))
df1 <- df1 |>
mutate(
G_mean = rowMeans(across(c(G1, G2, G3)), na.rm = TRUE),
D = G_mean / pi,
area = pi * D^2 / 4
)
df2 <- df1 |>
group_by(Tree.ID) |>
summarise(BA = sum(area), .groups = "drop") |>
mutate(D_eff = sqrt(4 * BA / pi))
dclasses <- seq(0, 500, by = 5)
#dclasses = 10^(seq(log10(1), log10(1000), length.out=10))
plot_area <- pi * 8^2 / 4
df3 <- df2 |>
mutate(class = cut(D_eff, breaks = dclasses)) |>
mutate(class1 = as.integer(class)) |>
group_by(class, class1) |>
summarise(
D_mean = mean(D_eff),
freq = n(),
.groups = "drop"
) |>
drop_na() |>
mutate(
freq = freq * 1e4 / plot_area,
dD = dclasses[class1 + 1] - dclasses[class1],
freq_by_dD = freq / dD
)
ggplot(df3, aes(x = D_mean, y = freq)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm") +
labs(x = "Mean Diameter (D_mean)", y = "Frequency")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(df3, aes(x = D_mean, y = freq_by_dD)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
geom_smooth(method = "lm") +
labs(x = "Mean Diameter (D_mean)", y = "Frequency / dD")
## `geom_smooth()` using formula = 'y ~ x'
model1 <- lm(log10(freq_by_dD) ~ log10(D_mean), data = df3)
model2 <- lm(log10(freq) ~ log10(D_mean), data = df3)
summary(model1)
##
## Call:
## lm(formula = log10(freq_by_dD) ~ log10(D_mean), data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4732 -0.2575 0.1123 0.2144 0.3715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.1599 0.2619 12.064 6.13e-06 ***
## log10(D_mean) -0.8025 0.1776 -4.519 0.00274 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared: 0.7447, Adjusted R-squared: 0.7083
## F-statistic: 20.42 on 1 and 7 DF, p-value: 0.002735
summary(model2)
##
## Call:
## lm(formula = log10(freq) ~ log10(D_mean), data = df3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4732 -0.2575 0.1123 0.2144 0.3715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8589 0.2619 14.733 1.59e-06 ***
## log10(D_mean) -0.8025 0.1776 -4.519 0.00274 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared: 0.7447, Adjusted R-squared: 0.7083
## F-statistic: 20.42 on 1 and 7 DF, p-value: 0.002735