Title: Tree Demography Analysis

Introduction

This analysis processes tree girth measurements from IITB campus collected during the field tutorial to compute diameter, basal area, and size-class distributions.

Setup

library(tidyverse)

Data Import

data <- read.csv("/Users/sakshi/Documents/CM_703/demographics_dataset_iitb.csv",
                 stringsAsFactors = FALSE)

# Inspect column names
names(data)

##  [1] "Obs.ID"                          "Observer.ID"                    
##  [3] "Observer.Name"                   "Tree.ID"                        
##  [5] "Branch.ID"                       "G1..cm."                        
##  [7] "G2..cm."                         "G3..cm."                        
##  [9] "Status..Dead.Alive."             "Canopy.openness....sky.visible."
## [11] "No..of.neighboring.trees"        "group_id"                       
## [13] "...12"                           "G4..cm."                        
## [15] "G5..cm."

Data Processing

Extract numeric girth values

df1 <- data |>
  mutate(
    G1 = as.numeric(str_extract(G1..cm., "[0-9.]+")),
    G2 = as.numeric(str_extract(G2..cm., "[0-9.]+")),
    G3 = as.numeric(str_extract(G3..cm., "[0-9.]+"))
  )

Tree ID

df1 <- df1 |> 
  mutate(Tree.ID = str_remove_all(Tree.ID, " "))

Compute mean girth, diameter, and basal area

df1 <- df1 |>
  mutate(
    G_mean = rowMeans(across(c(G1, G2, G3)), na.rm = TRUE),
    D = G_mean / pi,
    area = pi * D^2 / 4
  )

Aggregation to Tree Level

df2 <- df1 |> 
  group_by(Tree.ID) |> 
  summarise(BA = sum(area), .groups = "drop") |> 
  mutate(D_eff = sqrt(4 * BA / pi))

Size Class Distribution

dclasses <- seq(0, 500, by = 5)
#dclasses = 10^(seq(log10(1), log10(1000), length.out=10))

plot_area <- pi * 8^2 / 4  

df3 <- df2 |> 
  mutate(class = cut(D_eff, breaks = dclasses)) |> 
  mutate(class1 = as.integer(class)) |> 
  group_by(class, class1) |>
  summarise(
    D_mean = mean(D_eff),
    freq = n(),
    .groups = "drop"
  ) |> 
  drop_na() |>
  mutate(
    freq = freq * 1e4 / plot_area,
    dD = dclasses[class1 + 1] - dclasses[class1],
    freq_by_dD = freq / dD
  )

Visualization

Frequency vs Diameter

ggplot(df3, aes(x = D_mean, y = freq)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method = "lm") +
  labs(x = "Mean Diameter (D_mean)", y = "Frequency")

## `geom_smooth()` using formula = 'y ~ x'

Normalized Frequency vs Diameter

ggplot(df3, aes(x = D_mean, y = freq_by_dD)) +
  geom_point() +
  scale_x_log10() +
  scale_y_log10() +
  geom_smooth(method = "lm") +
  labs(x = "Mean Diameter (D_mean)", y = "Frequency / dD")

## `geom_smooth()` using formula = 'y ~ x'

Log–Log Regression

model1 <- lm(log10(freq_by_dD) ~ log10(D_mean), data = df3)
model2 <- lm(log10(freq) ~ log10(D_mean), data = df3)

summary(model1)

## 
## Call:
## lm(formula = log10(freq_by_dD) ~ log10(D_mean), data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4732 -0.2575  0.1123  0.2144  0.3715 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.1599     0.2619  12.064 6.13e-06 ***
## log10(D_mean)  -0.8025     0.1776  -4.519  0.00274 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared:  0.7447, Adjusted R-squared:  0.7083 
## F-statistic: 20.42 on 1 and 7 DF,  p-value: 0.002735

summary(model2)

## 
## Call:
## lm(formula = log10(freq) ~ log10(D_mean), data = df3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4732 -0.2575  0.1123  0.2144  0.3715 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.8589     0.2619  14.733 1.59e-06 ***
## log10(D_mean)  -0.8025     0.1776  -4.519  0.00274 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3007 on 7 degrees of freedom
## Multiple R-squared:  0.7447, Adjusted R-squared:  0.7083 
## F-statistic: 20.42 on 1 and 7 DF,  p-value: 0.002735

Notes

The diameter is computed from girth assuming ( D = G_mean/pi )
Basal area is calculated as ( = pi * D^2 / 4 )
Frequency normalization depends on plot area (manually defined here)
dclasses = 10^(seq(log10(1), log10(1000), length.out=10)) can be used to better differentiate between the frequency vs diameter and Normalized Frequency vs Diameter
Log–log regression is used to examine scaling relationships

Forest Diversity and Coexistence

CM 703

Introduction

Setup

Data Import

Data Processing

Extract numeric girth values

Tree ID

Compute mean girth, diameter, and basal area

Aggregation to Tree Level

Size Class Distribution

Visualization

Frequency vs Diameter

Normalized Frequency vs Diameter

Log–Log Regression

Notes