Title: Tree Demography Analysis


Introduction

This analysis processes tree girth measurements from IITB campus collected during the field tutorial to compute diameter, basal area, and size-class distributions.


Setup

library(tidyverse)

Data Import

data <- read.csv("/Users/sakshi/Documents/CM_703/demographics_dataset_iitb.csv",
                 stringsAsFactors = FALSE)

# Inspect column names
names(data)
##  [1] "Obs.ID"                          "Observer.ID"                    
##  [3] "Observer.Name"                   "Tree.ID"                        
##  [5] "Branch.ID"                       "G1..cm."                        
##  [7] "G2..cm."                         "G3..cm."                        
##  [9] "Status..Dead.Alive."             "Canopy.openness....sky.visible."
## [11] "No..of.neighboring.trees"        "group_id"                       
## [13] "...12"                           "G4..cm."                        
## [15] "G5..cm."

Data Processing

Extract numeric girth values

df1 <- data |>
  mutate(
    G1 = as.numeric(str_extract(G1..cm., "[0-9.]+")),
    G2 = as.numeric(str_extract(G2..cm., "[0-9.]+")),
    G3 = as.numeric(str_extract(G3..cm., "[0-9.]+"))
  )

Tree ID

df1 <- df1 |> 
  mutate(Tree.ID = str_remove_all(Tree.ID, " "))

Compute mean girth, diameter, and basal area

df1 <- df1 |>
  mutate(
    G_mean = rowMeans(across(c(G1, G2, G3)), na.rm = TRUE),
    D = G_mean / pi,
    area = pi * D^2 / 4
  )

Aggregation to Tree Level

df2 <- df1 |> 
  group_by(Tree.ID) |> 
  summarise(BA = sum(area), .groups = "drop") |> 
  mutate(D_eff = sqrt(4 * BA / pi))

Size Class Distribution

dclasses <- seq(0, 500, by = 5)
#dclasses = 10^(seq(log10(1), log10(1000), length.out=10))

plot_area <- pi * 8^2 / 4  

df3 <- df2 |> 
  mutate(class = cut(D_eff, breaks = dclasses)) |> 
  mutate(class1 = as.integer(class)) |> 
  group_by(class, class1) |>
  summarise(
    D_mean = mean(D_eff),
    freq = n(),
    .groups = "drop"
  ) |> 
  drop_na() |>
  mutate(
    freq = freq * 1e4 / plot_area,
    dD = dclasses[class1 + 1] - dclasses[class1],
    freq_by_dD = freq / dD
  )

Visualization

Frequency vs Diameter

ggplot(df3, aes(x = D_mean, y = freq)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method = "lm") +
  labs(x = "Mean Diameter (D_mean)", y = "Frequency")
## `geom_smooth()` using formula = 'y ~ x'

Normalized Frequency vs Diameter

ggplot(df3, aes(x = D_mean, y = freq_by_dD)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method = "lm") +
  labs(x = "Mean Diameter (D_mean)", y = "Frequency / dD")
## `geom_smooth()` using formula = 'y ~ x'


Log–Log Regression

model1 <- lm(log10(freq_by_dD) ~ log10(D_mean), data = df3)
model2 <- lm(log10(freq) ~ log10(D_mean), data = df3)

summary(model1)
## 
## Call:
## lm(formula = log10(freq_by_dD) ~ log10(D_mean), data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4732 -0.2575  0.1123  0.2144  0.3715 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.1599     0.2619  12.064 6.13e-06 ***
## log10(D_mean)  -0.8025     0.1776  -4.519  0.00274 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared:  0.7447, Adjusted R-squared:  0.7083 
## F-statistic: 20.42 on 1 and 7 DF,  p-value: 0.002735
summary(model2)
## 
## Call:
## lm(formula = log10(freq) ~ log10(D_mean), data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4732 -0.2575  0.1123  0.2144  0.3715 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.8589     0.2619  14.733 1.59e-06 ***
## log10(D_mean)  -0.8025     0.1776  -4.519  0.00274 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared:  0.7447, Adjusted R-squared:  0.7083 
## F-statistic: 20.42 on 1 and 7 DF,  p-value: 0.002735

Notes