Learning goals

By the end of this practical, you should be able to:

Important idea: Production models are simple (but powerful). They rely on catch + one or more abundance indices, and they estimate overall stock status with uncertainty.


0) Packages

If you do not have JABBA installed, run the install lines below once.

# If needed (run once):
# install.packages("remotes")
# remotes::install_github("jabbamodel/JABBA")

library(JABBA)
library(dplyr)
library(tidyr)
library(ggplot2)

1) Load data (real assessment format)

We will work with three tables:

cpueN  <- read.csv("cpueN_SMA_IOTC_2024.csv")
catchN <- read.csv("catchN_SMA_IOTC_2024.csv")
seN    <- read.csv("seN_SMA_IOTC_2024.csv")

# Quick checks
str(catchN)
## 'data.frame':    73 obs. of  3 variables:
##  $ Year         : int  1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
##  $ SMA_reported : num  54.7 59.3 63 66.6 70.6 ...
##  $ SMA_estimated: num  96.3 177.6 185.1 193.1 235.7 ...
str(cpueN)
## 'data.frame':    56 obs. of  5 variables:
##  $ Year     : int  1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 ...
##  $ EU.PRT   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ EU.ESP   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ JPN      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ USSR.Hist: num  0.233 1.006 1.091 0.477 0.318 ...
str(seN)
## 'data.frame':    56 obs. of  10 variables:
##  $ Year         : int  1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 ...
##  $ EU.PRT       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ EU.ESP       : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ JPN          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ USSR.Hist    : num  0.255 0.231 0.175 0.176 0.443 ...
##  $ X            : logi  NA NA NA NA NA NA ...
##  $ EU.PRT.min   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ EU.ESP.min   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ JPN.min      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ USSR.Hist.min: num  0.255 0.231 0.2 0.2 0.443 ...
head(catchN)
##   Year SMA_reported SMA_estimated
## 1 1950     54.72097      96.32147
## 2 1951     59.34207     177.64983
## 3 1952     62.98779     185.07945
## 4 1953     66.61357     193.12640
## 5 1954     70.61779     235.71398
## 6 1955     74.39779     251.58905
head(cpueN)
##   Year EU.PRT EU.ESP JPN USSR.Hist
## 1 1967     NA     NA  NA 0.2327182
## 2 1968     NA     NA  NA 1.0057239
## 3 1969     NA     NA  NA 1.0910889
## 4 1970     NA     NA  NA 0.4768928
## 5 1971     NA     NA  NA 0.3178783
## 6 1972     NA     NA  NA 0.3173969
head(seN)
##   Year EU.PRT EU.ESP JPN USSR.Hist  X EU.PRT.min EU.ESP.min JPN.min
## 1 1967     NA     NA  NA 0.2549831 NA         NA         NA      NA
## 2 1968     NA     NA  NA 0.2307038 NA         NA         NA      NA
## 3 1969     NA     NA  NA 0.1750145 NA         NA         NA      NA
## 4 1970     NA     NA  NA 0.1757458 NA         NA         NA      NA
## 5 1971     NA     NA  NA 0.4434348 NA         NA         NA      NA
## 6 1972     NA     NA  NA 0.1661442 NA         NA         NA      NA
##   USSR.Hist.min
## 1     0.2549831
## 2     0.2307038
## 3     0.2000000
## 4     0.2000000
## 5     0.4434348
## 6     0.2000000

2) Choose data inputs

2A) Catch series choice

Use either estimated or reported catches.

# Option A (use reported catches)
catch_use <- catchN %>% dplyr::select(Year, Catch = SMA_reported)

# Option B (use estimated catches)
# catch_use <- catchN %>% dplyr::select(Year, Catch = SMA_estimated)

2B) CPUE indices choice

Choose all CPUE indices (default) or a subset.

# Option A (Base model)
cpue_use <- cpueN %>% dplyr::select(Year, EU.ESP, JPN, USSR.Hist)

# Option B (use ALL indices)
#cpue_use <- cpueN %>% dplyr::select(Year, EU.PRT, EU.ESP, JPN, USSR.Hist)

# Option C (use only JPN)
# cpue_use <- cpueN %>% dplyr::select(Year, JPN)

# Option D (use only EU indices)
# cpue_use <- cpueN %>% dplyr::select(Year, EU.PRT, EU.ESP)

2C) CPUE uncertainty (CV) choice

For this dataset, the .min columns already apply a minimum CV rule.

# Option A (use the ".min" columns that already apply a minimum CV rule)
se_use <- seN %>% dplyr::select(
  Year,
  EU.ESP    = EU.ESP.min,
  JPN       = JPN.min,
  USSR.Hist = USSR.Hist.min
)

# Option B (use raw uncertainty columns directly)
# se_use <- seN %>% dplyr::select(Year, EU.ESP, JPN, USSR.Hist)

3) Exploratory plots

Catch time series

ggplot(catch_use, aes(Year, Catch)) +
  geom_line(linewidth = 1) +
  geom_point() +
  theme_bw() +
  labs(title = "Catch time series", y = "Catch", x = "Year")

CPUE indices

cpue_long <- cpue_use %>%
  pivot_longer(-Year, names_to = "Index", values_to = "CPUE")

ggplot(cpue_long, aes(Year, CPUE)) +
  geom_line(na.rm = TRUE) +
  geom_point(size = 1, na.rm = TRUE) +
  facet_wrap(~Index, scales = "free_y") +
  theme_bw() +
  labs(title = "CPUE indices (raw)", y = "CPUE", x = "Year")

Uncertainty (CV)

se_long <- se_use %>%
  pivot_longer(-Year, names_to = "Index", values_to = "SE")

ggplot(se_long, aes(Year, SE)) +
  geom_line(na.rm = TRUE) +
  geom_point(size = 1, na.rm = TRUE) +
  facet_wrap(~Index, scales = "free_y") +
  theme_bw() +
  labs(title = "Index uncertainty (CV)", y = "CV", x = "Year")


4) CPUE handling: scaling

Scaling CPUE is not required (JABBA estimates q for each index), but scaling helps:

cpue_use_scaled <- cpue_use %>%
  mutate(across(-Year, ~ . / mean(., na.rm = TRUE)))

cpue_use_scaled_long <- cpue_use_scaled %>%
  pivot_longer(-Year, names_to = "Index", values_to = "CPUE")

ggplot(cpue_use_scaled_long, aes(Year, CPUE, colour = Index)) +
  geom_line(linewidth = 0.9, na.rm = TRUE) +
  geom_point(size = 1, na.rm = TRUE) +
  theme_bw() +
  labs(title = "CPUE indices (scaled by mean)", y = "Scaled CPUE", x = "Year")


5) Base model: build + fit (Schaefer)

5A) Align years (simple overlap rule)

Here we keep it simple: use the overlapping years between catch, cpue and uncertainty.

start_year <- max(
  min(catch_use$Year, na.rm = TRUE),
  min(cpue_use_scaled$Year, na.rm = TRUE),
  min(se_use$Year, na.rm = TRUE)
)

catch_fit <- catch_use %>% filter(Year >= start_year)
cpue_fit  <- cpue_use_scaled %>% filter(Year >= start_year)
se_fit    <- se_use %>% filter(Year >= start_year)

cat("Start year used for model fitting:", start_year, "\n")
## Start year used for model fitting: 1967

5B) Priors (the key Bayesian concept)

These priors are from the actual Indian Ocean shortfin mako assessment. Prior are known external information. They are not the truth, but represent plausible ranges and help constrain the model.

# Priors (base model)
r_prior   <- c(0.06, 0.15)   # intrinsic growth rate r (species-dependent)
psi_prior <- c(0.90, 0.05)   # initial depletion (beta distribution parameterization in JABBA)
catch_cv  <- 0.20            # catch uncertainty

5C) Build JABBA input object

Input_base <- build_jabba(
  catch = catch_fit,
  cpue  = cpue_fit,
  se    = se_fit,
  assessment = "TP4-JABBA",
  scenario   = "Base",
  model.type = "Schaefer",
  catch.cv   = catch_cv,
  r.dist     = "lnorm",
  r.prior    = r_prior,
  K.prior    = NULL,
  psi.dist   = "beta",
  psi.prior  = psi_prior,
  sigma.proc = TRUE,
  igamma     = c(0.001, 0.001),
  sigma.est  = TRUE,
  fixed.obsE = 0.05,
  sets.var   = 1:ncol(cpue_fit[,-1])
)

5D) Fit base model

For class speed: keep iterations modest.

fit_base <- fit_jabba(
  Input_base,
  jagsdir = getwd(),
  ni = 15000,
  nb = 3000,
  nt = 3,
  nc = 2
)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 186
##    Unobserved stochastic nodes: 390
##    Total graph size: 3817
## 
## Initializing model

6) Key outputs and diagnostic plots

6A) Summary

jbplot_summary(fit_base)
## 
## ><> jbplot_summary()

6B) CPUE fits and residuals

jbplot_cpuefits(fit_base)
## 
## ><> jbplot_cpue() - fits to CPUE <><
jbplot_residuals(fit_base)

## 
## ><> jbplot_cpue() - fits to CPUE <><

jbplot_runstest(fit_base)
## 
## ><> jbplot_runstest()   <><

6C) Posterior distributions + process deviations

jbplot_ppdist(fit_base)
## 
## ><> jbplot_ppist() - prior and posterior distributions  <><
jbplot_procdev(fit_base)

## 
## ><> jbplot_procdev() - Process error diviations on log(biomass)  <><

6D) Management communication plots

jbplot_kobe(fit_base)
## 
## ><> jbplot_kobe() - Stock Status Plot  <><

jbplot_trj(fit_base, type = "BBmsy")
## 
## ><> jbplot_trj() - BBmsy trajectory  <><

jbplot_trj(fit_base, type = "FFmsy")
## 
## ><> jbplot_trj() - FFmsy trajectory  <><


7) One simple sensitivity analysis

We will run a single sensitivity so you understand the purpose quickly:

Input_no_sigma_est <- build_jabba(
  catch = catch_fit,
  cpue  = cpue_fit,
  se    = se_fit,
  assessment = "TP4-JABBA",
  scenario   = "NoSigmaEst",
  model.type = "Schaefer",
  catch.cv   = catch_cv,
  r.dist     = "lnorm",
  r.prior    = r_prior,
  K.prior    = NULL,
  psi.dist   = "beta",
  psi.prior  = psi_prior,
  sigma.proc = TRUE,
  igamma     = c(0.001, 0.001),
  sigma.est  = FALSE,
  fixed.obsE = 0.05,
  sets.var   = 1:ncol(cpue_fit[,-1])
)

fit_no_sigma_est <- fit_jabba(
  Input_no_sigma_est,
  jagsdir = getwd(),
  ni = 15000,
  nb = 3000,
  nt = 3,
  nc = 2
)
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
## Graph information:
##    Observed stochastic nodes: 186
##    Unobserved stochastic nodes: 390
##    Total graph size: 3794
## 
## Initializing model

Compare base vs sensitivity:

jbplot_summary(list(Base = fit_base, NoSigmaEst = fit_no_sigma_est), plotCIs = TRUE)
## 
## ><> jbplot_summary()

!Save fitted models (we need them for TP-5):

save(fit_base, fit_no_sigma_est, file = "TP4_JABBA_fits.RData")

8) Extra: Retrospective analysis (hindcast)

Retrospective analysis removes the last years (“peels”) and refits to check stability.

This can be slow. Keep peels = 1 if you run it.

hc <- hindcast_jabba(Input_base, fit_base, peels = 1:2)
jbplot_retro(hc)
## 
## ><> jbplot_retro() - retrospective analysis <><

##                  B           F        Bmsy        Fmsy       procB        MSY
## 2022    0.16698623 -0.10430186 -0.02800971 -0.08444678 -0.02660319 0.18734278
## 2021   -0.03728153  0.04894497 -0.10546283  0.05660267 -0.03914575 0.09162808
## rho.mu  0.06485235 -0.02767845 -0.06673627 -0.01392206 -0.03287447 0.13948543

Class discussion:

1) Which CPUE index seems most informative? Which one looks noisy?

2) Which other sensitivity analysis would you run first (if you were limited by time)?

3) Which plot would you show to a manager first (and why)?


Class exercises (options):

1) Run models with different configurations and interpret the results

2) Use different r priors (check was was used in the paper).

3) Use a different production function. What can you conclude in terms of stock production?

4) Experiment with a different catch and/or CPUES series combinations


cat("\nWELL DONE! You fitted your first full stock assessment model! \n")
## 
## WELL DONE! You fitted your first full stock assessment model!