Homework #2 - Volume Ratio Equations and Non-linear Regression
library(tidyverse)
library(tidyverse)
library(readxl)
library(readr)
Vol_data = read_xls("C:/Users/du11505/OneDrive - University of Georgia/Desktop/Forest Stand Dynamics/Vol_Ratio_Data.xls")
summary(Vol_data)
## TreeNo DBH H Section
## Min. : 1.00 Min. : 0.800 Min. : 7.50 Min. : 0.000
## 1st Qu.: 25.00 1st Qu.: 4.500 1st Qu.:29.80 1st Qu.: 2.000
## Median : 47.00 Median : 5.800 Median :39.70 Median : 5.000
## Mean : 48.96 Mean : 6.201 Mean :38.01 Mean : 5.431
## 3rd Qu.: 74.00 3rd Qu.: 7.800 3rd Qu.:49.20 3rd Qu.: 8.000
## Max. :102.00 Max. :12.300 Max. :61.50 Max. :15.000
## dob ht Vol_tot Vol_merch
## Min. : 0.000 Min. : 0.10 Min. : 0.09688 Min. : 0.002182
## 1st Qu.: 2.200 1st Qu.: 6.40 1st Qu.: 1.55127 1st Qu.: 0.749714
## Median : 3.800 Median :15.40 Median : 3.23565 Median : 2.121328
## Mean : 4.174 Mean :17.16 Mean : 5.32389 Mean : 3.598058
## 3rd Qu.: 5.750 3rd Qu.:25.05 3rd Qu.: 7.35719 3rd Qu.: 5.135525
## Max. :14.800 Max. :61.50 Max. :20.81273 Max. :20.812730
## Ratio
## Min. :0.007767
## 1st Qu.:0.453062
## Median :0.771642
## Mean :0.672220
## 3rd Qu.:0.944762
## Max. :1.000000
sum(is.na(Vol_data))
## [1] 0
I have reviewed the dataset and found that data cleaning is not necessary, since there are no missing values. Additionally, after checking the statistics of each variable, we can confirm that there are no outliers.
\[ Vol_{tot} = \beta_{0}+ \beta_{1}D^{2}H\]
The first step is to generate a subset with the necessary data for each tree. I will use total volume, DBH, and total height.
subset_data <- Vol_data[, c("TreeNo", "DBH", "H", "Vol_tot")]
tree_data <- subset_data %>%
group_by(TreeNo) %>%
summarise(
DBH = first(DBH),
H = first(H),
Vol_tot = first(Vol_tot)
)
tree_data$D2H= (tree_data$DBH)^2*(tree_data$H)
Now we have the dataset we can fit the model:
model_combined <- lm(Vol_tot ~ D2H, data = tree_data)
summary(model_combined)
##
## Call:
## lm(formula = Vol_tot ~ D2H, data = tree_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.15948 -0.12378 0.00594 0.09848 1.29188
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.045e-01 4.333e-02 2.411 0.0177 *
## D2H 2.551e-03 1.882e-05 135.544 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3361 on 99 degrees of freedom
## Multiple R-squared: 0.9946, Adjusted R-squared: 0.9946
## F-statistic: 1.837e+04 on 1 and 99 DF, p-value: < 2.2e-16
then the resulting equation is:
\[\hat{V_{t}}= 0.1045 + 0.002551D^{2}H \]
## add the predictions to dataset
tree_data$predict_vol = predict(model_combined)
head(tree_data)
## # A tibble: 6 × 6
## TreeNo DBH H Vol_tot D2H predict_vol
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 6 39.9 3.77 1436. 3.77
## 2 2 3.8 33.2 1.54 479. 1.33
## 3 3 7.5 40.5 6.07 2278. 5.92
## 4 4 5 38.3 2.42 957. 2.55
## 5 5 3.1 15.6 0.567 150. 0.487
## 6 6 4 18.6 0.850 298. 0.864
b0 <- coef(model_combined)[1]; b1 <- coef(model_combined)[2]
r2 <- summary(model_combined)$r.squared
ggplot(tree_data, aes(D2H,Vol_tot))+
geom_point()+
geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
annotate(
"text",
x = Inf, y = -Inf, hjust = 1.02, vjust = -0.6,
label = sprintf("ŷ = %.4f + %.7f·D²H \nR² = %.3f", b0, b1, r2),
size = 4
)
## `geom_smooth()` using formula = 'y ~ x'
\[ Ratio = 1+\beta_{1}(dob^{\beta_{2}}/D^{\beta_{3}})+\epsilon \]
## first subset the data
#library(minpack.lm)
subset2 <- Vol_data %>% filter(dob>0)
model2 <- nls(Ratio ~ 1+ B1*(dob^B2)/(DBH^B3), data=subset2, start = list(B1=0.05, B2= 0.01, B3=0.02))
summary(model2)
##
## Formula: Ratio ~ 1 + B1 * (dob^B2)/(DBH^B3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## B1 -0.281693 0.008914 -31.60 <2e-16 ***
## B2 1.933546 0.032575 59.36 <2e-16 ***
## B3 1.528426 0.026675 57.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1169 on 915 degrees of freedom
##
## Number of iterations to convergence: 15
## Achieved convergence tolerance: 3.834e-06
then the prediction equation is: \[\hat{Ratio} = 1 -0.281693(dob^{ 1.933546}/D^{ 1.528426 }) \]
\[ Vol_{merch} = (Vol_{tol})(Ratio) \]
then
\[ \hat{Vol_{merch}} = (0.1045 +
0.002551D^{2}H)(1 -0.281693(dob^{ 1.933546}/D^{ 1.528426 }) )
\]
| ID | DBH | H | Top_in_dob |
|---|---|---|---|
| i | 3.1 | 18.7 | 2 |
| ii | 5.3 | 34.7 | 3 |
| iii | 8.2 | 43.8 | 3 |
| iv | 11.4 | 58.7 | 4 |
\[ Vol_{merch} = (\beta_{0}+ \beta_{1}D^{2}H)(1+\beta_{2}(dob^{\beta_{3}}/D^{\beta_{4}}))+ \epsilon \]
subset2$D2H= (subset2$DBH)^2*(subset2$H)
## as starting values I will use the coefficients got in the previous fit
model3 <- nls(Vol_merch ~ (B0+B1*D2H)*(1+ B2*(dob^B3)/(DBH^B4)), data=subset2, start = list(B0=0.1, B1= 0.0025, B2=-0.28,B3=1.93, B4=1.53))
summary(model3)
##
## Formula: Vol_merch ~ (B0 + B1 * D2H) * (1 + B2 * (dob^B3)/(DBH^B4))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## B0 5.046e-02 5.831e-02 0.865 0.387
## B1 2.843e-03 3.586e-05 79.266 <2e-16 ***
## B2 -5.864e-01 4.023e-02 -14.576 <2e-16 ***
## B3 1.826e+00 5.030e-02 36.302 <2e-16 ***
## B4 1.759e+00 5.648e-02 31.139 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6909 on 913 degrees of freedom
##
## Number of iterations to convergence: 8
## Achieved convergence tolerance: 5.252e-06
this second formula for merchantable volume looks this way:
\[ Vol_{merch} = (0.05046+ 0.002843D^{2}H)(1-0.5864(dob^{1.826}/D^{1.759})) \]
the coefficients are not the same, but are similar. The main difference is in the B0, where the original was the double
| ID | DBH | H | Top_in_dob |
|---|---|---|---|
| i | 3.1 | 18.7 | 2 |
| ii | 5.3 | 34.7 | 3 |
| iii | 8.2 | 43.8 | 3 |
| iv | 11.4 | 58.7 | 4 |
library(nlme)
## Warning: package 'nlme' was built under R version 4.4.2
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
# Modelo no lineal: V = a * DBH^b * H^c
# Efecto aleatorio (intercepto multiplicativo) por parcela en 'a'
library(nlme)
subset2$grupo <- factor(subset2$TreeNo)
fit_nlme <- nlme(
Vol_merch ~ (B0 + B1 * D2H) * (1 + B2 * (dob^B3) / (DBH^B4)),
data = subset2,
fixed = B0 + B1 + B2 + B3 + B4 ~ 1,
random = B0 + B1 ~ 1 | grupo,
start = c(B0 = 0.1, B1 = 0.0025, B2 = -0.28, B3 = 1.93, B4 = 1.53),
na.action = na.omit,
control = nlmeControl(msMaxIter = 200, pnlsMaxIter = 50, returnObject = TRUE)
)
## Warning in nlme.formula(Vol_merch ~ (B0 + B1 * D2H) * (1 + B2 *
## (dob^B3)/(DBH^B4)), : Singular precision matrix in level -1, block 1
# 2) Resumen y componentes aleatorios
summary(fit_nlme)
## Nonlinear mixed-effects model fit by maximum likelihood
## Model: Vol_merch ~ (B0 + B1 * D2H) * (1 + B2 * (dob^B3)/(DBH^B4))
## Data: subset2
## AIC BIC logLik
## 1744.496 1787.895 -863.2478
##
## Random effects:
## Formula: list(B0 ~ 1, B1 ~ 1)
## Level: grupo
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## B0 0.0457231604 B0
## B1 0.0002162246 -0.954
## Residual 0.5950890033
##
## Fixed effects: B0 + B1 + B2 + B3 + B4 ~ 1
## Value Std.Error DF t-value p-value
## B0 0.1092154 0.06383532 813 1.71089 0.0875
## B1 0.0028421 0.00005525 813 51.44262 0.0000
## B2 -0.5772540 0.03597089 813 -16.04781 0.0000
## B3 1.7625429 0.04382000 813 40.22234 0.0000
## B4 1.6850017 0.05046542 813 33.38923 0.0000
## Correlation:
## B0 B1 B2 B3
## B1 -0.596
## B2 -0.200 -0.112
## B3 -0.025 -0.461 0.132
## B4 0.086 -0.367 -0.421 0.842
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -5.63140016 -0.33388202 0.07917065 0.38309166 7.18184062
##
## Number of Observations: 918
## Number of Groups: 101
\[ \widehat{\text{Vol_merch}}_i = \Big[(0.1092154 + u_{0i}) + (0.0028421 + u_{1i}) \cdot D2H\Big] \cdot \Big(1 - 0.5772540 \cdot \dfrac{dob^{1.7625429}}{D^{1.6850017}}\Big) \] b. Compare the estimated coefficients between those derived in section 1 and 2. Comment in your memo on any differences noted.
tabla <- data.frame(
Parameter = c("B0", "B1", "B2", "B3", "B4"),
`Model 1` = c(0.1045, 0.002551, -0.281693, 1.933546, 1.528426),
`Model 2` = c(0.05046, 0.00284, -0.58640, 1.82600, 1.75900),
`Mixed Model (fixed part)` = c(0.1092154, 0.0028421, -0.577254, 1.762549, 1.6850017)
)
knitr::kable(tabla, caption = "Coefficient comparison between models", digits = 6)
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "format"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| Parameter | Model.1 | Model.2 | Mixed.Model..fixed.part. |
|---|---|---|---|
| B0 | 0.104500 | 0.05046 | 0.109215 |
| B1 | 0.002551 | 0.00284 | 0.002842 |
| B2 | -0.281693 | -0.58640 | -0.577254 |
| B3 | 1.933546 | 1.82600 | 1.762549 |
| B4 | 1.528426 | 1.75900 | 1.685002 |
still the coefficients are pretty similar, however in this third example I am comparing only the fixed effect, and we have to add the randon effect to B0 and B1, this will change that coefficients for each tree.
| ID | DBH | H | Top_in_dob |
|---|---|---|---|
| i | 3.1 | 18.7 | 2 |
| ii | 5.3 | 34.7 | 3 |
| iii | 8.2 | 43.8 | 3 |
| iv | 11.4 | 58.7 | 4 |
for this part i will se the different coefficients for the first 4 trees:
library(knitr)
tabla <- data.frame(
Tree = c("Tree 1", "Tree 2", "Tree 3", "Tree 4"),
`Model 1` = c(0.4557, 2.1152, 6.8978, 17.617),
`Model 2` = c(0.395597, 2.09526, 7.489196, 19.4),
`Mixed Model` = c(0.4395, 2.2083, 7.776, 18.923)
)
kable(tabla, caption = "Moldel predictions by tree", digits = 4)
## Warning in attr(x, "align"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
## Warning in attr(x, "format"): 'xfun::attr()' is deprecated.
## Use 'xfun::attr2()' instead.
## See help("Deprecated")
| Tree | Model.1 | Model.2 | Mixed.Model |
|---|---|---|---|
| Tree 1 | 0.4557 | 0.3956 | 0.4395 |
| Tree 2 | 2.1152 | 2.0953 | 2.2083 |
| Tree 3 | 6.8978 | 7.4892 | 7.7760 |
| Tree 4 | 17.6170 | 19.4000 | 18.9230 |
As shown in the table, the differences between the predictions of the three models are relatively small at the tree level. However, when these predictions are aggregated to the stand level, the differences become significant. Overall, I prefer the mixed-effects model with random effects because it captures the variability among certain groups, such as plots(in this case tree).
library(dplyr)
library(ggplot2)
# Supongamos que tu dataset se llama df y tiene columnas:
# TreeID, dob (diámetro sobre corteza), D (DBH), H (altura), Vol_merch_obs (observado)
# Filtrar los dos árboles
trees <- c(8, 78)
df_sub <- subset2 %>% filter(TreeNo %in% trees)
# Extraer los efectos aleatorios
ranef_df <- ranef(fit_nlme)
# Filtrar para los árboles 8 y 78
ranef_subset <- ranef_df[c("8", "78"), ]
ranef_df <- tibble::rownames_to_column(ranef(fit_nlme), var = "TreeNo") # si los nombres de fila son los IDs
df_sub <- df_sub %>% mutate(TreeNo = as.numeric(TreeNo))
ranef_df <- ranef_df %>% mutate(TreeNo = as.numeric(TreeNo))
df_sub <- df_sub %>% left_join(ranef_df, by = "TreeNo")
# Función para calcular volumen merchantable según la ecuación
calc_vol_merch_m1 <- function(DBH, H, dob) {
B0 <- 0.1045
B1 <- 0.002551
B2 <- -0.281693
B3 <- 1.933546
B4 <- 1.528426
vol_total <- B0 + B1 * (DBH^2 * H)
ratio <- 1 + B2 * (dob^B3) / (DBH^B4)
vol_merch_m1 <- vol_total * ratio
return(vol_merch_m1)
}
df_sub <- df_sub %>%
mutate(Vol_merch_pred_1 = calc_vol_merch_m1(DBH, H, dob))
# Función para calcular volumen merchantable según la ecuación M2
calc_vol_merch_m2 <- function(DBH, H, dob) {
B0 <- 0.05046
B1 <- 0.002843
B2 <- -0.5864
B3 <- 1.826
B4 <- 1.759
vol_total <- B0 + B1 * (DBH^2 * H)
ratio <- 1 + B2 * (dob^B3) / (DBH^B4)
vol_merch_m2 <- vol_total * ratio
return(vol_merch_m2)
}
df_sub <- df_sub %>%
mutate(Vol_merch_pred_2 = calc_vol_merch_m2(DBH, H, dob))
# Función para calcular volumen merchantable según la ecuación M3
calc_vol_merch_m3 <- function(DBH, H, dob, B0, B1) {
B0t <- 0.05046+B0
B1t <- 0.002843+B1
B2 <- -0.577254
B3 <- 1.762549
B4 <- 1.6850017
vol_total <- B0t + B1t * (DBH^2 * H)
ratio <- 1 + B2 * (dob^B3) / (DBH^B4)
vol_merch_m3 <- vol_total * ratio
return(vol_merch_m3)
}
df_sub <- df_sub %>%
mutate(Vol_merch_pred_3 = calc_vol_merch_m3(DBH, H, dob, B0, B1))
# Graficar para cada árbol
ggplot(df_sub, aes(x = dob)) +
geom_point(aes(y = Vol_merch), color = "blue", size = 3) + # Observado
geom_line(aes(y = Vol_merch_pred_1), color = "red", size = 1) + # Predicción
geom_line(aes(y = Vol_merch_pred_2), color = "green", size = 1) +
geom_line(aes(y = Vol_merch_pred_3), color = "purple", size = 1) +
facet_wrap(~ TreeNo, scales = "free") +
labs(title = "Stem Profile: Merchantable Volume vs Diameter",
x = "Diameter over bark (dob)",
y = "Merchantable Volume (cu-ft)",
caption = "Blue = Observed, Red = Predicted part i.e, green= Predict. part 2a, purple= Predict part 3a ") +
theme_minimal()
df_sub <- df_sub %>%
mutate(res_pred_1 = Vol_merch - Vol_merch_pred_1)
df_sub <- df_sub %>%
mutate(res_pred_2 = Vol_merch - Vol_merch_pred_2)
df_sub <- df_sub %>%
mutate(res_pred_3 = Vol_merch - Vol_merch_pred_3)
# Calcular residuos absolutos y relativos
df_sub <- df_sub %>%
mutate(residual_pct_1 = 100 * res_pred_1 / Vol_merch)
df_sub <- df_sub %>%
mutate(residual_pct_2 = 100 * res_pred_2 / Vol_merch)
df_sub <- df_sub %>%
mutate(residual_pct_3 = 100 * res_pred_3 / Vol_merch)