Growth models help us translate age into
size (or weight).
They are used all over fisheries science, for example to:
In this practical, we will:
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(minpack.lm) # nlsLM (more stable than base nls)
library(brms) # Bayesian non-linear models
library(bayesplot) # MCMC diagnostics
If a package is missing, install it first with:
install.packages("brms")(same logic for others)
Before fitting models to data, we look at what shapes the models can produce.
# Von Bertalanffy classic function
vb_fun <- function(age, Linf, k, t0) {
Linf * (1 - exp(-k * (age - t0)))
}
# Gompertz function
gompertz_fun <- function(age, Linf, k, t0) {
Linf * exp(-exp(-k * (age - t0)))
}
# Logistic growth
logistic_fun <- function(age, Linf, k, t0) {
Linf / (1 + exp(-k * (age - t0)))
}
# Richards function (generalized logistic): extra shape parameter nu (>0)
richards_fun <- function(age, Linf, k, t0, nu) {
Linf / (1 + exp(-k * (age - t0)))^(1 / nu)
}
We pick a few plausible parameter values to show the typical curve shapes.
param_sets <- tribble(
~model, ~Linf, ~k, ~t0, ~nu, ~label,
"VB", 240, 0.12, -0.8, NA, "VB: slower",
"VB", 240, 0.25, -0.8, NA, "VB: faster",
"Gompertz", 240, 0.20, 3.0, NA, "Gompertz",
"Logistic", 240, 0.50, 6.0, NA, "Logistic",
"Richards", 240, 0.40, 6.0, 0.6, "Richards (nu=0.6)",
"Richards", 240, 0.40, 6.0, 2.0, "Richards (nu=2.0)"
)
age_grid <- tibble(Age = seq(0, 25, length.out = 300))
concept_df <- param_sets %>%
mutate(tmp = pmap(list(model, Linf, k, t0, nu),
function(model, Linf, k, t0, nu) {
if (model == "VB") vb_fun(age_grid$Age, Linf, k, t0)
else if (model == "Gompertz") gompertz_fun(age_grid$Age, Linf, k, t0)
else if (model == "Logistic") logistic_fun(age_grid$Age, Linf, k, t0)
else if (model == "Richards") richards_fun(age_grid$Age, Linf, k, t0, nu)
else rep(NA_real_, nrow(age_grid))
})) %>%
select(label, model, tmp) %>%
unnest_longer(tmp, values_to = "Size") %>%
mutate(Age = rep(age_grid$Age, times = nrow(param_sets)))
ggplot(concept_df, aes(Age, Size, colour = label)) +
geom_line(linewidth = 1) +
theme_bw() +
labs(
title = "Conceptual growth curves (different model shapes)",
x = "Age (years)", y = "Size",
colour = "Curve"
)
Here we use a simple (but real) dataset from a large migratory species It is alerady cleaned for missing ages, ageing error, etc.
We simulate ages as integers (typical for age readings).
dat <- read.csv("Ages.csv", sep = ",", header = TRUE)
head(dat, 10)
## Size Sex Age
## 1 190 M 9
## 2 240 F 22
## 3 225 F 16
## 4 217 M 15
## 5 195 M 12
## 6 182 M 9
## 7 212 M 13
## 8 206 M 13
## 9 218 F 14
## 10 229 F 14
dat %>%
summarise(
n = n(),
age_min = min(Age), age_max = max(Age),
size_min = min(Size), size_max = max(Size)
)
## n age_min age_max size_min size_max
## 1 544 4 25 136 252
# Age distribution
ggplot(dat, aes(Age)) +
geom_histogram(bins = 20) +
theme_bw() +
labs(title = "Age distribution", x = "Age (years)", y = "Count")
# Age–Size scatter + smooth
ggplot(dat, aes(Age, Size)) +
geom_point(alpha = 0.2) +
geom_smooth(method = "loess", se = TRUE) +
theme_bw() +
labs(title = "Age–Size data (raw + loess)", x = "Age (years)", y = "Size")
** Class discussion:** - Are there ages with fewer observations? - Is the relationship monotonic (always increasing)?
We will fit multiple models using non-linear regression.
Important: non-linear regression needs starting values.
Linf_start <- max(dat$Size) * 1.05
k_start <- 0.2
t0_start <- -0.5
nu_start <- 1.0
nlsLMfit_vb <- nlsLM(
Size ~ Linf * (1 - exp(-k * (Age - t0))),
data = dat,
start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
control = nls.lm.control(maxiter = 500)
)
fit_gompertz <- nlsLM(
Size ~ Linf * exp(-exp(-k * (Age - t0))),
data = dat,
start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
control = nls.lm.control(maxiter = 500)
)
fit_logistic <- nlsLM(
Size ~ Linf / (1 + exp(-k * (Age - t0))),
data = dat,
start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
control = nls.lm.control(maxiter = 500)
)
fit_richards <- nlsLM(
Size ~ Linf / (1 + exp(-k * (Age - t0)))^(1 / nu),
data = dat,
start = list(Linf = Linf_start, k = k_start, t0 = t0_start, nu = nu_start),
control = nls.lm.control(maxiter = 500)
)
fits <- list(
VB = fit_vb,
Gompertz = fit_gompertz,
Logistic = fit_logistic,
Richards = fit_richards
)
for (nm in names(fits)) {
cat("\n==============================\n")
cat("Model:", nm, "\n")
cat("==============================\n")
print(summary(fits[[nm]]))
}
##
## ==============================
## Model: VB
## ==============================
##
## Formula: Size ~ Linf * (1 - exp(-k * (Age - t0)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Linf 295.238088 11.286524 26.158 < 2e-16 ***
## k 0.061058 0.007406 8.244 1.27e-15 ***
## t0 -6.815973 0.914472 -7.453 3.63e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.979 on 541 degrees of freedom
##
## Number of iterations to convergence: 13
## Achieved convergence tolerance: 1.49e-08
##
##
## ==============================
## Model: Gompertz
## ==============================
##
## Formula: Size ~ Linf * exp(-exp(-k * (Age - t0)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Linf 278.857870 7.501548 37.173 <2e-16 ***
## k 0.088937 0.007594 11.712 <2e-16 ***
## t0 -0.671593 0.228978 -2.933 0.0035 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.963 on 541 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 1.49e-08
##
##
## ==============================
## Model: Logistic
## ==============================
##
## Formula: Size ~ Linf/(1 + exp(-k * (Age - t0)))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Linf 2.692e+02 5.662e+00 47.54 <2e-16 ***
## k 1.168e-01 7.803e-03 14.97 <2e-16 ***
## t0 2.627e+00 2.337e-01 11.24 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.951 on 541 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 1.49e-08
##
##
## ==============================
## Model: Richards
## ==============================
##
## Formula: Size ~ Linf/(1 + exp(-k * (Age - t0)))^(1/nu)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## Linf 248.87860 4.89731 50.819 < 2e-16 ***
## k 0.27890 0.07679 3.632 0.000308 ***
## t0 16.12196 1.16033 13.894 < 2e-16 ***
## nu 6.73022 2.65935 2.531 0.011664 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.932 on 540 degrees of freedom
##
## Number of iterations to convergence: 47
## Achieved convergence tolerance: 1.49e-08
Lower AIC = better fit after penalizing complexity.
aic_tab <- data.frame(
Model = names(fits),
AIC = sapply(fits, AIC)
)
aic_tab <- aic_tab[order(aic_tab$AIC), ]
aic_tab$deltaAIC <- aic_tab$AIC - min(aic_tab$AIC)
aic_tab
## Model AIC deltaAIC
## Richards Richards 3932.074 0.000000
## Logistic Logistic 3933.420 1.345569
## Gompertz Gompertz 3934.946 2.871936
## VB VB 3936.863 4.788830
pred_grid <- data.frame(Age = seq(min(dat$Age), max(dat$Age), length.out = 250))
pred_all <- do.call(rbind, lapply(names(fits), function(m) {
data.frame(
Age = pred_grid$Age,
Pred = as.numeric(predict(fits[[m]], newdata = pred_grid)),
Model = m
)
}))
ggplot(dat, aes(Age, Size)) +
geom_point(alpha = 0.15, position = position_jitter(width = 0.1, height = 0)) +
geom_line(data = pred_all, aes(Age, Pred, colour = Model), linewidth = 1.1) +
theme_bw() +
labs(title = "Growth model fitting (frequentist)",
x = "Age (years)", y = "Size", colour = "Model")
Here we only check residuals for the model with best AIC.
best_model <- aic_tab$Model[1]
best_fit <- fits[[best_model]]
resid_best <- residuals(best_fit)
fitted_best <- fitted(best_fit)
ggplot(data.frame(resid_best), aes(x = resid_best)) +
geom_histogram(bins = 30) +
theme_bw() +
labs(
title = paste("Histogram of residuals (best by AIC:", best_model, ")"),
x = "Residual", y = "Count"
)
ggplot(data.frame(fitted_best, resid_best), aes(fitted_best, resid_best)) +
geom_point(alpha = 0.2) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw() +
labs(
title = paste("Residuals vs fitted (best by AIC:", best_model, ")"),
x = "Fitted", y = "Residual"
)
ggplot(data.frame(resid_best), aes(sample = resid_best)) +
stat_qq(alpha = 0.25) +
stat_qq_line() +
theme_bw() +
labs(title = paste("QQ plot of residuals (best by AIC:", best_model, ")"))
Bayesian models use priors + data likelihood to estimate parameters.
Linf_guess <- max(dat$Size) * 1.10
k_guess <- 0.20
t0_guess <- -0.5
Linf_sd <- 50
k_sd <- 0.20
t0_sd <- 2
n_prior <- 10000
Linf_prior <- rlnorm(n_prior, meanlog = log(Linf_guess), sdlog = 0.20)
k_prior <- rlnorm(n_prior, meanlog = log(k_guess), sdlog = 0.50)
t0_prior <- rnorm(n_prior, mean = t0_guess, sd = t0_sd)
prior_draws <- data.frame(Linf = Linf_prior, k = k_prior, t0 = t0_prior)
prior_long <- pivot_longer(prior_draws,
cols = c(Linf, k, t0),
names_to = "param", values_to = "value")
ggplot(prior_long, aes(value)) +
geom_density(linewidth = 1) +
facet_wrap(~param, scales = "free", ncol = 1) +
theme_bw() +
labs(title = "Priors (visual check)", x = "Value", y = "Density")
brms)vb_formula <- bf(
Size ~ Linf * (1 - exp(-k * (Age - t0))),
Linf ~ 1,
k ~ 1,
t0 ~ 1,
nl = TRUE
)
vb_priors <- c(
set_prior(paste0("normal(", Linf_guess, ",", Linf_sd, ")"), nlpar = "Linf", lb = 0),
set_prior(paste0("normal(", k_guess, ",", k_sd, ")"), nlpar = "k", lb = 0),
set_prior(paste0("normal(", t0_guess, ",", t0_sd, ")"), nlpar = "t0"),
set_prior("exponential(1)", class = "sigma", lb = 0)
)
fit_vb_bayes <- brm(
formula = vb_formula,
data = dat,
family = gaussian(),
prior = vb_priors,
chains = 2,
cores = 1,
iter = 1500,
warmup = 500,
thin = 2,
seed = 123,
control = list(adapt_delta = 0.95)
)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.000531 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 5.31 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 1: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 1: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 1: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 1: Iteration: 501 / 1500 [ 33%] (Sampling)
## Chain 1: Iteration: 650 / 1500 [ 43%] (Sampling)
## Chain 1: Iteration: 800 / 1500 [ 53%] (Sampling)
## Chain 1: Iteration: 950 / 1500 [ 63%] (Sampling)
## Chain 1: Iteration: 1100 / 1500 [ 73%] (Sampling)
## Chain 1: Iteration: 1250 / 1500 [ 83%] (Sampling)
## Chain 1: Iteration: 1400 / 1500 [ 93%] (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 10.315 seconds (Warm-up)
## Chain 1: 21.519 seconds (Sampling)
## Chain 1: 31.834 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.000405 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 4.05 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 2: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 2: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 2: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 2: Iteration: 501 / 1500 [ 33%] (Sampling)
## Chain 2: Iteration: 650 / 1500 [ 43%] (Sampling)
## Chain 2: Iteration: 800 / 1500 [ 53%] (Sampling)
## Chain 2: Iteration: 950 / 1500 [ 63%] (Sampling)
## Chain 2: Iteration: 1100 / 1500 [ 73%] (Sampling)
## Chain 2: Iteration: 1250 / 1500 [ 83%] (Sampling)
## Chain 2: Iteration: 1400 / 1500 [ 93%] (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 10.766 seconds (Warm-up)
## Chain 2: 16.798 seconds (Sampling)
## Chain 2: 27.564 seconds (Total)
## Chain 2:
fit_vb_bayes
## Family: gaussian
## Links: mu = identity
## Formula: Size ~ Linf * (1 - exp(-k * (Age - t0)))
## Linf ~ 1
## k ~ 1
## t0 ~ 1
## Data: dat (Number of observations: 544)
## Draws: 2 chains, each with iter = 1500; warmup = 500; thin = 2;
## total post-warmup draws = 1000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Linf_Intercept 286.41 8.28 272.51 303.68 1.00 439 477
## k_Intercept 0.07 0.01 0.06 0.08 1.00 443 478
## t0_Intercept -5.98 0.69 -7.37 -4.77 1.00 479 553
##
## Further Distributional Parameters:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma 8.93 0.26 8.47 9.50 1.00 511 519
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
mcmc_trace(
as_draws_df(fit_vb_bayes),
pars = c("b_Linf_Intercept", "b_k_Intercept", "b_t0_Intercept", "sigma")
)
pred_grid_b <- data.frame(Age = seq(min(dat$Age), max(dat$Age), length.out = 200))
fb <- fitted(fit_vb_bayes, newdata = pred_grid_b, probs = c(0.025, 0.975))
preddf_b <- cbind(pred_grid_b, as.data.frame(fb))
ggplot() +
geom_point(data = dat, aes(Age, Size), alpha = 0.15,
position = position_jitter(width = 0.1, height = 0)) +
geom_ribbon(data = preddf_b, aes(x = Age, ymin = Q2.5, ymax = Q97.5), alpha = 0.25) +
geom_line(data = preddf_b, aes(x = Age, y = Estimate), linewidth = 1.1) +
theme_bw() +
labs(
title = "Bayesian Von Bertalanffy fit (with 95% credible interval)",
x = "Age (years)", y = "Size"
)
# Frequentist VB predictions at the same ages
pred_freq <- data.frame(
Age = pred_grid_b$Age,
Pred = as.numeric(predict(fit_vb, newdata = pred_grid_b)),
Curve = "VB (Frequentist)"
)
# Bayesian posterior mean curve
pred_bayes <- data.frame(
Age = preddf_b$Age,
Pred = preddf_b$Estimate,
Curve = "VB (Bayesian)"
)
pred_lines <- rbind(pred_freq, pred_bayes)
ggplot() +
geom_point(
data = dat,
aes(Age, Size),
alpha = 0.12,
position = position_jitter(width = 0.1, height = 0)
) +
geom_line(
data = pred_lines,
aes(Age, Pred, color = Curve),
linewidth = 1.1
) +
theme_bw() +
labs(
title = "Von Bertalanffy: Bayesian vs Frequentist comparison",
x = "Age (years)", y = "Size",
color = "Curve"
)
cat("\nWELL DONE!\n")
##
## WELL DONE!