Why do we model growth?

Growth models help us translate age into size (or weight).
They are used all over fisheries science, for example to:

In this practical, we will:

  1. Visualize growth curves (conceptual shapes)
  2. Explore a simple age–size dataset
  3. Fit and compare several growth models (frequentist)
  4. Fit one model using a Bayesian approach (Von Bertalanffy)

0) Packages

library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(minpack.lm)  # nlsLM (more stable than base nls)
library(brms)        # Bayesian non-linear models
library(bayesplot)   # MCMC diagnostics

If a package is missing, install it first with:
install.packages("brms") (same logic for others)


1) Conceptual visualization of growth model shapes

Before fitting models to data, we look at what shapes the models can produce.

1.1 Define common growth functions

# Von Bertalanffy classic function
vb_fun <- function(age, Linf, k, t0) {
  Linf * (1 - exp(-k * (age - t0)))
}

# Gompertz function
gompertz_fun <- function(age, Linf, k, t0) {
  Linf * exp(-exp(-k * (age - t0)))
}

# Logistic growth
logistic_fun <- function(age, Linf, k, t0) {
  Linf / (1 + exp(-k * (age - t0)))
}

# Richards function (generalized logistic): extra shape parameter nu (>0)
richards_fun <- function(age, Linf, k, t0, nu) {
  Linf / (1 + exp(-k * (age - t0)))^(1 / nu)
}

1.2 Create example curves (different parameter sets)

We pick a few plausible parameter values to show the typical curve shapes.

param_sets <- tribble(
  ~model,      ~Linf, ~k,   ~t0,  ~nu, ~label,
  "VB",         240,  0.12, -0.8,  NA, "VB: slower",
  "VB",         240,  0.25, -0.8,  NA, "VB: faster",
  "Gompertz",   240,  0.20,  3.0,  NA, "Gompertz",
  "Logistic",   240,  0.50,  6.0,  NA, "Logistic",
  "Richards",   240,  0.40,  6.0,  0.6, "Richards (nu=0.6)",
  "Richards",   240,  0.40,  6.0,  2.0, "Richards (nu=2.0)"
)

age_grid <- tibble(Age = seq(0, 25, length.out = 300))

1.3 Calculate curve values and plot

concept_df <- param_sets %>%
  mutate(tmp = pmap(list(model, Linf, k, t0, nu),
                    function(model, Linf, k, t0, nu) {
    if (model == "VB")            vb_fun(age_grid$Age, Linf, k, t0)
    else if (model == "Gompertz") gompertz_fun(age_grid$Age, Linf, k, t0)
    else if (model == "Logistic") logistic_fun(age_grid$Age, Linf, k, t0)
    else if (model == "Richards") richards_fun(age_grid$Age, Linf, k, t0, nu)
    else rep(NA_real_, nrow(age_grid))
  })) %>%
  select(label, model, tmp) %>%
  unnest_longer(tmp, values_to = "Size") %>%
  mutate(Age = rep(age_grid$Age, times = nrow(param_sets)))

ggplot(concept_df, aes(Age, Size, colour = label)) +
  geom_line(linewidth = 1) +
  theme_bw() +
  labs(
    title = "Conceptual growth curves (different model shapes)",
    x = "Age (years)", y = "Size",
    colour = "Curve"
  )

Class discussion:

- Which models look most similar?

- Which model(s) allow extra flexibility?


2) Data exploration

Here we use a simple (but real) dataset from a large migratory species It is alerady cleaned for missing ages, ageing error, etc.

2.1 Load data

We simulate ages as integers (typical for age readings).

dat <- read.csv("Ages.csv", sep = ",", header = TRUE)
head(dat, 10)
##    Size Sex Age
## 1   190   M   9
## 2   240   F  22
## 3   225   F  16
## 4   217   M  15
## 5   195   M  12
## 6   182   M   9
## 7   212   M  13
## 8   206   M  13
## 9   218   F  14
## 10  229   F  14

2.2 Quick summaries

dat %>%
  summarise(
    n = n(),
    age_min = min(Age), age_max = max(Age),
    size_min = min(Size), size_max = max(Size)
  )
##     n age_min age_max size_min size_max
## 1 544       4      25      136      252

2.3 Plots: age distribution + scatter

# Age distribution
ggplot(dat, aes(Age)) +
  geom_histogram(bins = 20) +
  theme_bw() +
  labs(title = "Age distribution", x = "Age (years)", y = "Count")

# Age–Size scatter + smooth
ggplot(dat, aes(Age, Size)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "loess", se = TRUE) +
  theme_bw() +
  labs(title = "Age–Size data (raw + loess)", x = "Age (years)", y = "Size")

** Class discussion:** - Are there ages with fewer observations? - Is the relationship monotonic (always increasing)?


3) Growth modelling (frequentist approach)

We will fit multiple models using non-linear regression.

Important: non-linear regression needs starting values.

3.1 Starting values

Linf_start <- max(dat$Size) * 1.05
k_start    <- 0.2
t0_start   <- -0.5
nu_start   <- 1.0

3.2 Fit models with nlsLM

fit_vb <- nlsLM(
  Size ~ Linf * (1 - exp(-k * (Age - t0))),
  data  = dat,
  start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
  control = nls.lm.control(maxiter = 500)
)

fit_gompertz <- nlsLM(
  Size ~ Linf * exp(-exp(-k * (Age - t0))),
  data  = dat,
  start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
  control = nls.lm.control(maxiter = 500)
)

fit_logistic <- nlsLM(
  Size ~ Linf / (1 + exp(-k * (Age - t0))),
  data  = dat,
  start = list(Linf = Linf_start, k = k_start, t0 = t0_start),
  control = nls.lm.control(maxiter = 500)
)

fit_richards <- nlsLM(
  Size ~ Linf / (1 + exp(-k * (Age - t0)))^(1 / nu),
  data  = dat,
  start = list(Linf = Linf_start, k = k_start, t0 = t0_start, nu = nu_start),
  control = nls.lm.control(maxiter = 500)
)

fits <- list(
  VB = fit_vb,
  Gompertz = fit_gompertz,
  Logistic = fit_logistic,
  Richards = fit_richards
)

3.3 Summaries (parameter estimates)

for (nm in names(fits)) {
  cat("\n==============================\n")
  cat("Model:", nm, "\n")
  cat("==============================\n")
  print(summary(fits[[nm]]))
}
## 
## ==============================
## Model: VB 
## ==============================
## 
## Formula: Size ~ Linf * (1 - exp(-k * (Age - t0)))
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)    
## Linf 295.238088  11.286524  26.158  < 2e-16 ***
## k      0.061058   0.007406   8.244 1.27e-15 ***
## t0    -6.815973   0.914472  -7.453 3.63e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.979 on 541 degrees of freedom
## 
## Number of iterations to convergence: 13 
## Achieved convergence tolerance: 1.49e-08
## 
## 
## ==============================
## Model: Gompertz 
## ==============================
## 
## Formula: Size ~ Linf * exp(-exp(-k * (Age - t0)))
## 
## Parameters:
##        Estimate Std. Error t value Pr(>|t|)    
## Linf 278.857870   7.501548  37.173   <2e-16 ***
## k      0.088937   0.007594  11.712   <2e-16 ***
## t0    -0.671593   0.228978  -2.933   0.0035 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.963 on 541 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 1.49e-08
## 
## 
## ==============================
## Model: Logistic 
## ==============================
## 
## Formula: Size ~ Linf/(1 + exp(-k * (Age - t0)))
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## Linf 2.692e+02  5.662e+00   47.54   <2e-16 ***
## k    1.168e-01  7.803e-03   14.97   <2e-16 ***
## t0   2.627e+00  2.337e-01   11.24   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.951 on 541 degrees of freedom
## 
## Number of iterations to convergence: 9 
## Achieved convergence tolerance: 1.49e-08
## 
## 
## ==============================
## Model: Richards 
## ==============================
## 
## Formula: Size ~ Linf/(1 + exp(-k * (Age - t0)))^(1/nu)
## 
## Parameters:
##       Estimate Std. Error t value Pr(>|t|)    
## Linf 248.87860    4.89731  50.819  < 2e-16 ***
## k      0.27890    0.07679   3.632 0.000308 ***
## t0    16.12196    1.16033  13.894  < 2e-16 ***
## nu     6.73022    2.65935   2.531 0.011664 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.932 on 540 degrees of freedom
## 
## Number of iterations to convergence: 47 
## Achieved convergence tolerance: 1.49e-08

3.4 Model comparison (AIC)

Lower AIC = better fit after penalizing complexity.

aic_tab <- data.frame(
  Model = names(fits),
  AIC   = sapply(fits, AIC)
)

aic_tab <- aic_tab[order(aic_tab$AIC), ]
aic_tab$deltaAIC <- aic_tab$AIC - min(aic_tab$AIC)
aic_tab
##             Model      AIC deltaAIC
## Richards Richards 3932.074 0.000000
## Logistic Logistic 3933.420 1.345569
## Gompertz Gompertz 3934.946 2.871936
## VB             VB 3936.863 4.788830

3.5 Overlay plot: all fitted curves

pred_grid <- data.frame(Age = seq(min(dat$Age), max(dat$Age), length.out = 250))

pred_all <- do.call(rbind, lapply(names(fits), function(m) {
  data.frame(
    Age = pred_grid$Age,
    Pred = as.numeric(predict(fits[[m]], newdata = pred_grid)),
    Model = m
  )
}))

ggplot(dat, aes(Age, Size)) +
  geom_point(alpha = 0.15, position = position_jitter(width = 0.1, height = 0)) +
  geom_line(data = pred_all, aes(Age, Pred, colour = Model), linewidth = 1.1) +
  theme_bw() +
  labs(title = "Growth model fitting (frequentist)",
       x = "Age (years)", y = "Size", colour = "Model")


4) Model validation: residuals (best model only)

Here we only check residuals for the model with best AIC.

best_model <- aic_tab$Model[1]
best_fit   <- fits[[best_model]]

resid_best  <- residuals(best_fit)
fitted_best <- fitted(best_fit)

4.1 Histogram of residuals

ggplot(data.frame(resid_best), aes(x = resid_best)) +
  geom_histogram(bins = 30) +
  theme_bw() +
  labs(
    title = paste("Histogram of residuals (best by AIC:", best_model, ")"),
    x = "Residual", y = "Count"
  )

4.2 Residuals vs fitted

ggplot(data.frame(fitted_best, resid_best), aes(fitted_best, resid_best)) +
  geom_point(alpha = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_bw() +
  labs(
    title = paste("Residuals vs fitted (best by AIC:", best_model, ")"),
    x = "Fitted", y = "Residual"
  )

4.3 QQ plot (normality check)

ggplot(data.frame(resid_best), aes(sample = resid_best)) +
  stat_qq(alpha = 0.25) +
  stat_qq_line() +
  theme_bw() +
  labs(title = paste("QQ plot of residuals (best by AIC:", best_model, ")"))


5) Bayesian growth modelling (Von Bertalanffy)

Bayesian models use priors + data likelihood to estimate parameters.

5.1 Set simple priors (manual)

Linf_guess <- max(dat$Size) * 1.10
k_guess    <- 0.20
t0_guess   <- -0.5

Linf_sd <- 50
k_sd    <- 0.20
t0_sd   <- 2

Quick check: plot priors (random draws)

n_prior <- 10000
Linf_prior <- rlnorm(n_prior, meanlog = log(Linf_guess), sdlog = 0.20)
k_prior    <- rlnorm(n_prior, meanlog = log(k_guess),    sdlog = 0.50)
t0_prior   <- rnorm(n_prior, mean = t0_guess, sd = t0_sd)

prior_draws <- data.frame(Linf = Linf_prior, k = k_prior, t0 = t0_prior)

prior_long <- pivot_longer(prior_draws,
                           cols = c(Linf, k, t0),
                           names_to = "param", values_to = "value")

ggplot(prior_long, aes(value)) +
  geom_density(linewidth = 1) +
  facet_wrap(~param, scales = "free", ncol = 1) +
  theme_bw() +
  labs(title = "Priors (visual check)", x = "Value", y = "Density")

5.2 Fit Bayesian VB model (brms)

vb_formula <- bf(
  Size ~ Linf * (1 - exp(-k * (Age - t0))),
  Linf ~ 1,
  k    ~ 1,
  t0   ~ 1,
  nl = TRUE
)

vb_priors <- c(
  set_prior(paste0("normal(", Linf_guess, ",", Linf_sd, ")"), nlpar = "Linf", lb = 0),
  set_prior(paste0("normal(", k_guess,    ",", k_sd,    ")"), nlpar = "k",    lb = 0),
  set_prior(paste0("normal(", t0_guess,   ",", t0_sd,   ")"), nlpar = "t0"),
  set_prior("exponential(1)", class = "sigma", lb = 0)
)

fit_vb_bayes <- brm(
  formula = vb_formula,
  data    = dat,
  family  = gaussian(),
  prior   = vb_priors,
  chains  = 2,
  cores   = 1,
  iter    = 1500,
  warmup  = 500,
  thin    = 2,
  seed    = 123,
  control = list(adapt_delta = 0.95)
)
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000531 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 5.31 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 1: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 1: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 1: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 1: Iteration:  501 / 1500 [ 33%]  (Sampling)
## Chain 1: Iteration:  650 / 1500 [ 43%]  (Sampling)
## Chain 1: Iteration:  800 / 1500 [ 53%]  (Sampling)
## Chain 1: Iteration:  950 / 1500 [ 63%]  (Sampling)
## Chain 1: Iteration: 1100 / 1500 [ 73%]  (Sampling)
## Chain 1: Iteration: 1250 / 1500 [ 83%]  (Sampling)
## Chain 1: Iteration: 1400 / 1500 [ 93%]  (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 10.315 seconds (Warm-up)
## Chain 1:                21.519 seconds (Sampling)
## Chain 1:                31.834 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000405 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 4.05 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 1500 [  0%]  (Warmup)
## Chain 2: Iteration:  150 / 1500 [ 10%]  (Warmup)
## Chain 2: Iteration:  300 / 1500 [ 20%]  (Warmup)
## Chain 2: Iteration:  450 / 1500 [ 30%]  (Warmup)
## Chain 2: Iteration:  501 / 1500 [ 33%]  (Sampling)
## Chain 2: Iteration:  650 / 1500 [ 43%]  (Sampling)
## Chain 2: Iteration:  800 / 1500 [ 53%]  (Sampling)
## Chain 2: Iteration:  950 / 1500 [ 63%]  (Sampling)
## Chain 2: Iteration: 1100 / 1500 [ 73%]  (Sampling)
## Chain 2: Iteration: 1250 / 1500 [ 83%]  (Sampling)
## Chain 2: Iteration: 1400 / 1500 [ 93%]  (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 10.766 seconds (Warm-up)
## Chain 2:                16.798 seconds (Sampling)
## Chain 2:                27.564 seconds (Total)
## Chain 2:
fit_vb_bayes
##  Family: gaussian 
##   Links: mu = identity 
## Formula: Size ~ Linf * (1 - exp(-k * (Age - t0))) 
##          Linf ~ 1
##          k ~ 1
##          t0 ~ 1
##    Data: dat (Number of observations: 544) 
##   Draws: 2 chains, each with iter = 1500; warmup = 500; thin = 2;
##          total post-warmup draws = 1000
## 
## Regression Coefficients:
##                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Linf_Intercept   286.41      8.28   272.51   303.68 1.00      439      477
## k_Intercept        0.07      0.01     0.06     0.08 1.00      443      478
## t0_Intercept      -5.98      0.69    -7.37    -4.77 1.00      479      553
## 
## Further Distributional Parameters:
##       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sigma     8.93      0.26     8.47     9.50 1.00      511      519
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

5.3 Quick convergence check (trace plots)

mcmc_trace(
  as_draws_df(fit_vb_bayes),
  pars = c("b_Linf_Intercept", "b_k_Intercept", "b_t0_Intercept", "sigma")
)

5.4 Bayesian fitted curve (mean + 95% interval)

pred_grid_b <- data.frame(Age = seq(min(dat$Age), max(dat$Age), length.out = 200))

fb <- fitted(fit_vb_bayes, newdata = pred_grid_b, probs = c(0.025, 0.975))
preddf_b <- cbind(pred_grid_b, as.data.frame(fb))

ggplot() +
  geom_point(data = dat, aes(Age, Size), alpha = 0.15,
             position = position_jitter(width = 0.1, height = 0)) +
  geom_ribbon(data = preddf_b, aes(x = Age, ymin = Q2.5, ymax = Q97.5), alpha = 0.25) +
  geom_line(data = preddf_b, aes(x = Age, y = Estimate), linewidth = 1.1) +
  theme_bw() +
  labs(
    title = "Bayesian Von Bertalanffy fit (with 95% credible interval)",
    x = "Age (years)", y = "Size"
  )


6) Frequentist vs Bayesian VB comparison

# Frequentist VB predictions at the same ages
pred_freq <- data.frame(
  Age  = pred_grid_b$Age,
  Pred = as.numeric(predict(fit_vb, newdata = pred_grid_b)),
  Curve = "VB (Frequentist)"
)

# Bayesian posterior mean curve
pred_bayes <- data.frame(
  Age  = preddf_b$Age,
  Pred = preddf_b$Estimate,
  Curve = "VB (Bayesian)"
)

pred_lines <- rbind(pred_freq, pred_bayes)

ggplot() +
  geom_point(
    data = dat,
    aes(Age, Size),
    alpha = 0.12,
    position = position_jitter(width = 0.1, height = 0)
  ) +
  geom_line(
    data = pred_lines,
    aes(Age, Pred, color = Curve),
    linewidth = 1.1
  ) +
  theme_bw() +
  labs(
    title = "Von Bertalanffy: Bayesian vs Frequentist comparison",
    x = "Age (years)", y = "Size",
    color = "Curve"
  )

Class Exercises:

-Run a few more Bayesian models with different priors. And try one with very strong priors (low SD). What can you conclude from this exercise?

- Run models for males and females separately. Are the estimated parameters and growth curves the same or different? Does that make sense biologically?


cat("\nWELL DONE!\n")
## 
## WELL DONE!

The end