Click here for other works of the author on RPubs

The von Bertalanffy growth function is used to describe body length of fish as a function of its age. It is defined as \(L(t) = L_{\infty}(1 - e^{- K(t - t_0)})\) where \(L_{\infty}\) is the hypothesized (mean) maximum length, \(K\) is growth rate, \(t\) is current age and \(t_0\) is the hypothesized age when body length is 0. In this assignment, we try to model body length of fish using the following regression model: \(L(t) = L_{\infty}(1 - e^{-K(t - t_0)}) * e^{\epsilon}\) with \(\epsilon\) ~ \(N(0, \sigma^2)\) and estimate the parameters using maximum likelihood estimators (MLE).

Load package

library(knitr)
library(ggplot2)

Load data

age = c(1, 2, 3.3, 4.3, 5.3, 6.3, 7.3, 8.3, 9.3, 10.3, 11.3, 12.3, 13.3)
length_m = c(15.4, 26.93, 42.23, 44.59, 47.63, 49.67, 50.87, 52.3, 54.77, 56.43, 55.88)
length_f = c(15.4, 28.03, 41.18, 46.2, 48.23, 50.26, 51.82, 54.27, 56.98, 58.93, 59, 60.91, 61.83)

#show data
kable(cbind(age, length_f, c(length_m, rep("", 2))), col.names = c("Age (year)", "Female mean length (cm)", "Male mean length (cm)"))
Age (year) Female mean length (cm) Male mean length (cm)
1 15.4 15.4
2 28.03 26.93
3.3 41.18 42.23
4.3 46.2 44.59
5.3 48.23 47.63
6.3 50.26 49.67
7.3 51.82 50.87
8.3 54.27 52.3
9.3 56.98 54.77
10.3 58.93 56.43
11.3 59 55.88
12.3 60.91
13.3 61.83

Explore the relationship between body length and age

#prepare data for plotting
dataset = data.frame(c(age, age[-c(12:13)]), c(length_f, length_m), c(rep("Female", 13), rep("Male", 11)))
colnames(dataset) <- c("Age", "Body length", "Sex")

#draw graph
ggplot(dataset, aes(x = Age, y = `Body length`, col = Sex)) +
    geom_point() +
    scale_color_discrete(name = "Sex")

It seems that growth rate declines as age increases and the von Bertalanffy growth function is appropriate.

The von Bertalanffy growth function

Define function VBGF that calculates estimated length using the von Bertalanffy growth function, which would be used in our regression model.

VBGF <-function(x, Linf, K, t0){
    y = Linf * (1 - exp(- K * (x - t0)))
    return(y)
}

Log-likelihood function for the regression

Define function lognormal_like which calculates the log-likelihood of

lognormal_like = function(x, y, Linf, K, t0, Out){
    
    like = numeric(length(y))
    NLL = numeric(length(y))
    ypred = VBGF(x, Linf, K, t0)
    dev2 =(log(y) - log(ypred)) ^ 2
    sigma = sqrt(mean(dev2))
    
    for (i in 1:length(y)){
        like[i] = (1 / (x[i] * sqrt(2 * pi) * sigma)) * exp(-dev2[i] / (2 * sigma ^ 2))
        NLL[i] = -log(like[i])
    }
    
    #show each iteration if Out is set as TRUE
    if(Out == T) cat("NLL=", sum(NLL), " Linf=", Linf, " K=", K, " t0=", t0, "\n", sep = "")
    
    #calculate various informations about the log-likelihood
        Outs <- NULL
        Outs$Pred <- ypred
        Outs$Length <- y
        Outs$Dev2 <- dev2
        Outs$sigma <- sigma
        Outs$Like <- like
        Outs$NLL <- NLL 
        Outs$Obj <- sum(NLL)
        
    return(Outs)
}

Define objective function: sum of negative log-likelihood

obj_function = function(theda, ...){ 
    Outs <- lognormal_like(Linf = theda[1], K = theda[2], t0 = theda[3], ...)
    obj <- Outs$Obj 
    return(obj)
}

Find the MLE of parameters

Using optimization method, we try to find values of the three parameters (\(L_{\infty}, K, t_0\)) that maximizes the likelihood function (=minimize negative log-likelihood function) for our regression model. Female and male data would be fit seperately.

model_f = optim(c(100, 0.2, 0), obj_function, x = age, y = length_f, Out = F, hessian = T)
model_m = optim(c(100, 0.2, 0), obj_function, x = age[-c(12:13)], y = length_m, Out = F, hessian = T)

Display model results

param_f <- model_f$par
param_m <- model_m$par
sd_f <- sqrt(diag(solve(model_f$hessian)))
sd_m <- sqrt(diag(solve(model_m$hessian)))
model_result_f <- cbind(param_f, sd_f)
model_result_m <- cbind(param_m, sd_m)
rownames(model_result_f) <- c("L inf", "K", "t0")
rownames(model_result_m) <- c("L inf", "K", "t0")

kable(model_result_f, col.names = c("Estimated parameters", "Standard deviation"), caption = "Model for female fish body length", digits = 3)
Model for female fish body length
Estimated parameters Standard deviation
L inf 60.271 1.021
K 0.327 0.022
t0 0.092 0.066
kable(model_result_m, col.names = c("Estimated parameters", "Standard deviation"), caption = "Model for male fish body length", digits = 3)
Model for male fish body length
Estimated parameters Standard deviation
L inf 56.069 1.083
K 0.382 0.030
t0 0.166 0.066

Regression model for female fish: Body length = 60.27(1 - exp(-0.33(Age - 0.09)))

Regression model for male fish: Body length = 56.07(1 - exp(-0.38(Age - 0.17)))

Visualize estimations of regression model

#prepare data for plotting
dataset = data.frame(c(age, age[-c(12:13)]), c(length_f, length_m), c(VBGF(age, param_f[1], param_f[2], param_f[3]), VBGF(age[-c(12:13)], param_m[1], param_m[2], param_m[3])), c(rep("Female", 13), rep("Male", 11)))
colnames(dataset) <- c("Age", "Actual length", "Esimated length", "Sex")

#draw graph
ggplot(dataset, aes(x = Age)) +
    geom_point(aes(y = `Actual length`, col = Sex)) +
    geom_line(aes(y = `Esimated length`, col = Sex)) +
    scale_color_discrete(name = "Sex") +
    labs(title = "Regression model using von Bertalanffy growth function", y = "Fish length")