1 Setup

## -- Packages -- ####
library(data.table)
library(tidyverse)
library(ggplot2)
library(knitr)
library(taRifx)
library(zoo)
library(mgcv)
library(Metrics) # rmse
library(grid)
library(gridExtra)
library(ggpubr)
library(boot)
library(ggsci)
## -- Functions -- ####
gm_mean = function(x, na.rm=TRUE) {
    exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}

source("code/FUN_multiplot.R")

2 Data

Buffer species
Species for which the exact geographic coordinates of the population abundance records in the Living Planet Database are known. A buffer with 0.25 degree radius was drawn around these coordinates and abundance trends were calculated for suitable habitat inside these buffers only.

This approach yields a much larger total species sample (52), which allows us to calculate a cutoff year which jointly maximizes the species sample size and the temporal scope of the analysis; to generate a consistent set of species over time.

buffer = "050"
version = "v2"
params=list(lpdfile = "data/biodiversity/Africa/LPI_pops_Africa_spec_loc_observed.csv",
            habfile = paste0("outputs/Africa/Predictions/", version,
                             "/Buffer-", buffer,
                             "-Species-Total-Suitable-Area-by-Year.csv"),
            densfile = paste0("outputs/Africa/Predictions/", version,
                              "/Buffer-", buffer,
                              "-Species-Mean-Density-by-Year.csv"),
            abunfile = paste0("outputs/Africa/Predictions/", version,
                              "/Buffer-", buffer,
                              "-Species-Total-Abundance-by-Year.csv"))
## -- Read data -- ####

years   <- c(1992:2013)

## BUFFER
lpd <- read.csv(params$lpdfile, na.strings = "NULL")
trends.hab <- read.csv(params$habfile)
trends.dens <- read.csv(params$densfile)
trends.abun <- read.csv(params$abunfile)

trends.hab <- trends.hab[complete.cases(trends.hab), ]
trends.dens <- trends.dens[complete.cases(trends.dens), ]
trends.abun <- trends.abun[complete.cases(trends.abun), ]

names(trends.hab)[1] <- "Species.ID"
names(trends.dens)[1] <- "Species.ID"
names(trends.abun)[1] <- "Species.ID"

myspecies <- unique(lpd$Species.ID[lpd$Species.ID %in% unique(trends.abun$Species.ID)])

source("code/FUN_interpolate.R")

lpd$myid <- c(1:nrow(lpd))

# Select data only
lpd.data <- lpd %>%
    filter(Species.ID %in% myspecies) %>%
    select(Binomial, Species.ID, Common_name, myid, X1950:X2015)

lpd.data$Binomial <- as.character(lpd.data$Binomial)

lpd.data[, -c(1:4)] <- apply(lpd.data[, -c(1:4)], 2, as.numeric)
colnames(lpd.data) <- c(colnames(lpd.data[1:4]), 1950:2015)

lpd.interpolated <- lpd.data
lpd.interpolated[, -c(1:4)] <- t(apply(lpd.data[, -c(1:4)], 1, 
                                interpolate))

lpd.interpolated[which(lpd.interpolated[, "1992"] == 0), "1992"] <- NA # first year cannot be 0
lpd.interpolated <- lpd.interpolated[!is.na(lpd.interpolated[, "1992"]), ] #first year cannot be NA

lpd.interpolated <- droplevels(lpd.interpolated)
## -- Plot sample size over time ####
studyyears <- as.character(years)

l <- lpd.interpolated[studyyears]

# Get number of years data for each population (long-format)
t <- data.frame(Species.ID = lpd.interpolated$Species.ID, 
                nyears = max.col(is.na(l), "first")-1) # Last data point == First NA-point -1

# Group number of years per population by species; retain maximum
t2 <- t %>%
  group_by(Species.ID) %>%
  dplyr::summarise(maxyear = max(nyears)+1991) # column 1 = 1992; 1 year of data -> maxyear == 1992 
  
df <- data.frame(year = years, ss = NA)
for (i in years) {
  df[df$year == i, "ss"] <- sum(t2$maxyear >= i)
}

ggplot(df, aes(x=year, y=ss))  +
        geom_vline(xintercept = 2005, col = "grey30", lty = 2) +
        geom_hline(yintercept = df[df$year == 2005, "ss"], col = "grey30", lty = 2) +
        # annotate("segment", x1=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        # annotate("rect", xmin=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        geom_point() +
        # geom_smooth(se=F) +
        xlab("Cutoff year") +
        ylab("Number of species in sample") +
        theme_minimal() +
        theme(axis.line.x = element_line(),
              axis.line.y = element_line())
\label{fig:figs}Potential sample size of species for which specific geographic coordinates of observed records are available

Figure 2.1: Potential sample size of species for which specific geographic coordinates of observed records are available

Dashed lines in figure above show cutoffs where year * sample size is maximized. This yields a cutoff at year 2005. There are 35 species that have data at least up to 2005.

# Set years to the new range
# years   <- c(1992:2005)
years   <- c(1992, 1995, 2000, 2005)

# Subset species
lpd.interpolated <- lpd.interpolated[, c(1:3, which(names(lpd.interpolated) %in% years))]
lpd.interpolated <- lpd.interpolated[complete.cases(lpd.interpolated), ]
names(trends.hab)[2:6] <- c("1992", "1995", "2000", "2005", "2010")
names(trends.dens)[2:6] <- c("1992", "1995", "2000", "2005", "2010")
names(trends.abun)[2:6] <- c("1992", "1995", "2000", "2005", "2010")

# Add NA columns for missing years
addyears <- function(x) {
df <- data.frame(Species.ID = x[["Species.ID"]])
        for (y in years) {
                # if (y %in% names(x)) {
                        df[[as.character(y)]] <- x[[as.character(y)]]
                # } else {df[[as.character(y)]] <- NA}
        }
        return(df)
}
trends.hab <- addyears(trends.hab)
trends.dens <- addyears(trends.dens)
trends.abun <- addyears(trends.abun)

# # Interpolate these years
# trends.abun[, -c(1)] <- t(apply(trends.abun[, -c(1)], 1,
#                                 interpolate))
# trends.hab[, -c(1)] <- t(apply(trends.hab[, -c(1)], 1,
#                                 interpolate))

observedspecies <- unique(lpd.interpolated$Species.ID) # have already intersected with unique output species 
k <- length(observedspecies)

4 Validation metrics

4.1 At the species level

colnames(ccfHab) <- seq(-3,3,1)
colnames(ccfDens) <- seq(-3,3,1)

# t-test
pvals <- c()
for (j in 1:ncol(ccfHab)) {
        p <- (t.test(ccfDens[,j], ccfHab[,j])$p.value)
        pvals <- append(pvals, p)
        }

crosscor <- data.frame(ccf = c(ccfHab[,1], ccfHab[,2], ccfHab[,3], 
                               ccfHab[,4], ccfHab[,5], ccfHab[,6], ccfHab[,7], 
                               ccfDens[,1], ccfDens[,2], ccfDens[,3],  
                               ccfDens[,4],  ccfDens[,5],  ccfDens[,6], ccfDens[,7]),
                   model = factor(c(rep("habitat-only", k*7), c(rep("habitat+density", k*7))),
                                  levels = c("habitat-only", "habitat+density")),
                   lag = factor(c(rep(-3, k), rep(-2, k), rep(-1, k), rep(0, k),
                                  rep(1, k), rep(2, k), rep(3, k), 
                                  rep(-3, k), rep(-2, k), rep(-1, k), rep(0, k),
                                  rep(1, k), rep(2, k), rep(3, k)) ))
levels(crosscor$lag) <- c(paste0("lag ", levels(crosscor$lag)))

ggplot(crosscor, aes(x = model, y = ccf, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "#984ea3")) +
        scale_color_manual(values = c("#4daf4a", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Cross correlation coefficient", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        facet_wrap(~ lag, ncol = 7) +
        theme_minimal() +
        theme(axis.text.x = element_blank(),
                      panel.border = element_rect(fill=F),
              strip.text = element_text(size = 12),
              legend.position = "top") +
        annotate("text", x=1.5, y = -1.1, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals[3:9], 2)))

Root Mean Squared Error
Because correlations have some obscuring symmetries (unsensitive to shifts & scaling), correlation coefficients are are not always good signals of true prediction performance. Root Mean Squared Error is a better metric to track model accuracy.

p-values are calculated between respective models and the habitat-only model.

RMSE <- data.frame(rmse = c(rmseHab, rmseDens, rmseAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvalsD <- t.test(rmseHab, rmseDens)$p.value
pvalsA <- t.test(rmseHab, rmseAbun)$p.value

ggplot(RMSE, aes(x = model, y = rmse, fill = model)) + 
        scale_fill_manual(values = cls3) +
        # scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = cls3) +
        # geom_violin(trim = T, alpha = 0.5) +
        geom_boxplot() +
        labs(y = "Root Mean Squared Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        # geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n")+
        annotate("text", x=2, y = 1.75, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))+
        annotate("text", x=3, y = 1.75, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3)))
\label{fig:figs}Distributions of model root mean squared error

Figure 4.1: Distributions of model root mean squared error

There is 1 species with very poor prediction performance (RMSE > 1; ). This next plot zooms in on RMSE up to 1.00:

pvalsD <- t.test(rmseDens[rmseDens<1], rmseHab[rmseHab<1])$p.value
pvalsA <- t.test(rmseAbun[rmseAbun<1], rmseHab[rmseHab<1])$p.value

ggplot(RMSE, aes(x = model, y = rmse, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Root Mean Squared Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))+
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3)))
\label{fig:figs}Distributions of model root mean squared errors up to RMSE=1

Figure 4.2: Distributions of model root mean squared errors up to RMSE=1

NB: Slight, but not statistically significant, difference between RMSE distributions of two model types.

Mean Absolute Error
RMSE gives a relatively high weight to large errors as it increases with the variance of the frequency distribution of error magnitudes, not with the variance of the errors. Mean Absolute Error (MAE) captures the variance of errors.

RMSE could be more useful when large errors are particularly undesirable (i.e., if being off by 1 is more than twice as bad as being off by 0.5). If not (as is our case I think, as being off 20% in abundance change, is twice as bad as being off 10% in abundance chagne), Mean Absolute Error is more appropriate.

NB: MAE plotted for MAE<1 ().

MAE <- data.frame(mae = c(maeHab, maeDens, maeAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvalsD <- t.test(maeDens[maeDens<1], maeHab[maeHab<1])$p.value
pvalsA <- t.test(maeDens[maeDens<1], maeHab[maeAbun<1])$p.value

ggplot(MAE, aes(x = model, y = mae, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = c("#4daf4a","blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Mean Absolute Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3))) +
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))
\label{fig:figs}Distributions of model mean absolute errors; p-value for MAE<1

Figure 4.3: Distributions of model mean absolute errors; p-value for MAE<1

Bias
I also compute bias here, as the difference between obsered-predicted, as a measure of model tendencies to systematically under- or over-predict changes in GMA.

NB: Bias plotted without 3 really bad predictions (). p-values below violins from test whether distributions are different from 0.

BIAS <- data.frame(bias = c(biasHab, biasDens, biasAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvals1 <- wilcox.test(biasDens, biasHab)$p.value
pvals2 <- wilcox.test(biasAbun, biasHab)$p.value
pvalN <- wilcox.test(biasHab)$p.value
pvalD <- wilcox.test(biasDens)$p.value
pvalA <- wilcox.test(biasAbun)$p.value

ggplot(BIAS, aes(x = model, y = bias, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(-1,1))) +
        scale_color_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Bias", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals1, 3))) +
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals2, 3))) +
        annotate("text", x=1, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalN, 3))) +
        annotate("text", x=2, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalD, 3)))+
        annotate("text", x=3, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalA, 3)))
\label{fig:figs}Distributions of model bias; p-value for bias<1

Figure 4.4: Distributions of model bias; p-value for bias<1


4.3 All metrics

linear.trends.diff <- data.frame(Modelled.habitat = slopes$Observed.abundance -
                                                         slopes$Modelled.habitat,
                                 Modelled.density = slopes$Observed.abundance -
                                                         slopes$Modelled.density,
                                 Modelled.abundance = slopes$Observed.abundance -
                                                         slopes$Modelled.abundance)

linear.trends.long <- gather(linear.trends.diff, "model", "value")
linear.trends.long$model <- factor(linear.trends.long$model, 
                                   levels = unique(linear.trends.long$model),
                                   labels = lbls[2:4])

names(RMSE)[1] <- "value"
names(BIAS)[1] <- "value"
names(MAE)[1] <- "value"

vm <- rbind(RMSE, BIAS, MAE, linear.trends.long)
vm$metric <- c(rep("RMSE", 105), rep("Bias", 105), rep("MAE", 105), rep("Difference linear slopes", 105))
vm$metric <- factor(vm$metric, levels = c("RMSE", "MAE", "Bias", "Difference linear slopes"))

# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]

pvalN <- wilcox.test(biasHab)$p.value
pvalD <- wilcox.test(biasDens)$p.value
pvalA <- wilcox.test(biasAbun)$p.value

# Visualize: Specify the comparisons you want
my_comparisons <- list( c("Habitat model", "Density model"), 
                        c("Density model", "Combined model"),
                        c("Habitat model", "Combined model") )
dat_text <- data.frame(metric = rep("Bias", 3),
                       model = levels(vm$model),
                       label = c(round(pvalN, 2), round(pvalD, 4), round(pvalA, 3)))

ranges <- as.data.frame(rbind(range(filter(vm, metric == "RMSE")$value), 
                   range(filter(vm, metric == "MAE")$value),
                   range(filter(vm, metric == "Bias")$value),
                   range(filter(vm, metric == "Difference linear slopes")$value)))

ranges <- data.frame(ymax = c(2.5, 2, 1, 3), 
                     ymin = c(0, 0, -.75, 0.1),
                     metric = c("RMSE", "MAE", "Bias", "Difference linear slopes"),
                     model = "Habitat model")
ranges <- gather(ranges, "key", "value", -metric, -model) %>% select(., -key)

# level1 <- c(2, 1.6, 0.6, 2.3)
# level2 <- level2-level1*0.05
# 
# x = c(1, 1, 2, 2), c(1, 1, 3, 3), 
# 
# arcs <- data.frame(value = c(level1, level2), 
#                      metric = c("RMSE", "MAE", "Bias", "Difference linear slopes"),
#                      model = lbls[2:4])
# 
# compare_means(data = vm, value~model, method = "wilcox", group.by = "metric",
#                            # ref.group = "habitat model",
#                            label = "p.format",
#                            # method.args = list(alternative = "greater"),
#                            comparisons = my_comparisons) 
        
ggplot(vm, aes(x = model, y = value, color = model),
          xlab = "") +
        geom_boxplot() + 
        geom_jitter(shape = 1) +
        scale_colour_manual(values = cls3, drop = F) +
        geom_hline(aes(yintercept=0), lty = 2) +
        facet_wrap(~metric, scales = "free_y") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1),
              strip.background = element_rect(fill="white", colour = "white"),
              legend.position = "top") +
        scale_y_continuous(name = "") +
        geom_blank(data = ranges) +
        geom_text(data = dat_text, aes(y=-.65, label = paste("p = ", label)), color = 1) +

        stat_compare_means(label.y.npc = 0.9, method = "wilcox",
                           # ref.group = "Habitat model",
                           label = "p.format", 
                           # method.args = list(alternative = "greater"),
                           comparisons = my_comparisons,
                           tip.length = 0.05) # Add pairwise comparisons p-value


4.4 At the index level

These metrics are calculated on the aggregated trends across all species, but I don’t think these tell us much if predictions for individual species are no good.

rsq = function(y,f) { 1 - sum((y-f)^2)/sum((y-mean(y))^2) }
cor0 <- cor(trends$Observed.abundance, trends$Modelled.habitat)
cor1 <- cor(trends$Observed.abundance, trends$Modelled.density)
cor2 <- cor(trends$Observed.abundance, trends$Modelled.abundance)

rsq0 <- rsq(trends$Observed.abundance, trends$Modelled.habitat)
rsq1 <- rsq(trends$Observed.abundance, trends$Modelled.density)
rsq2 <- rsq(trends$Observed.abundance, trends$Modelled.abundance)

rmse0 <- rmse(trends$Observed.abundance, trends$Modelled.habitat)
rmse1 <- rmse(trends$Observed.abundance, trends$Modelled.density)
rmse2 <- rmse(trends$Observed.abundance, trends$Modelled.abundance)

mae0 <- mae(trends$Observed.abundance, trends$Modelled.habitat)
mae1 <- mae(trends$Observed.abundance, trends$Modelled.density)
mae2 <- mae(trends$Observed.abundance, trends$Modelled.abundance)

bias0 <- bias(trends$Observed.abundance, trends$Modelled.habitat)
bias1 <- bias(trends$Observed.abundance, trends$Modelled.density)
bias2 <- bias(trends$Observed.abundance, trends$Modelled.abundance)

# ccf.all <- cbind(ccf(trends$Observed.abundance, 
#                    trends$Modelled.habitat, 
#                    na.action = na.pass, plot = F, 
#                    lag.max = 5)$acf[,,1]^2,
#         ccf(trends$Observed.abundance,
#                    trends$Modelled.abundance, 
#                    na.action = na.pass, plot = F, 
#                    lag.max = 5)$acf[,,1]^2)
## Figs for manuscript
# Linear
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = lm, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/linear.pdf", width = 12, height = 7.5, units = "cm")

# LOESS
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " Africa mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/gam.pdf", width = 12, height = 7.5, units = "cm")

Quantifying the agreement between observed and modelled trends in geometric mean abundance across all species.

Root Mean Squared Error
Observed vs. Habitat-only model: 0.1035566
Observed vs. Density-only model: 0.2849386
Observed vs. Habitat+density model: 0.2511758

Mean Absolute Error
Observed vs. Habitat-only model: 0.0740524
Observed vs. Density-only model: 0.2088959
Observed vs. Habitat+density model: 0.1796566

Bias
Observed vs. Habitat-only model: 0.0740524
Observed vs. Density-only model: 0.2088959
Observed vs. Habitat+density model: 0.1752529

Slope of linear trend
Linear slope observed data: 0.00383
Linear slope Habitat-only model: 0.00323
Linear slope Density-only model: -0.01626
Linear slope Habitat+density model: -0.01567

gamdat <- trends %>%
        select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "model", value = "index",
                          'Modelled.habitat':'Observed.abundance') 
gamdat$model <- factor(gamdat$model)

smooth_diff <- function(model, newdata, f0, f1, var, alpha = 0.05,
                        unconditional = FALSE) {
    xp <- predict(model, newdata = newdata, type = 'lpmatrix')
    c0 <- grepl(f0, colnames(xp))
    c1 <- grepl(f1, colnames(xp))
    r0 <- newdata[[var]] == f0
    r1 <- newdata[[var]] == f1
    ## difference rows of xp for data from comparison
    X <- xp[r1, ] - xp[r0, ]
    ## zero out cols of X related to splines for other lochs
    X[, ! (c0 | c1)] <- 0
    ## zero out the parametric cols
    X[, !grepl('^s\\(', colnames(xp))] <- 0
    dif <- X %*% coef(model)
    se <- sqrt(rowSums((X %*% vcov(model, unconditional = unconditional)) * X))
    crit <- qt(alpha/2, df.residual(model), lower.tail = FALSE)
    upr <- dif + (crit * se)
    lwr <- dif - (crit * se)
    data.frame(pair = paste(f0, f1, sep = ' - '),
               diff = dif,
               se = se,
               upper = upr,
               lower = lwr)
}

SmoothPar <- 5
m <- gam(index ~ s(year, by = model, k = SmoothPar), data = gamdat)
pdat <- expand.grid(year = seq(1992, 2005, length = 400),
                    model = c("Modelled.habitat", "Modelled.abundance", "Observed.abundance"))
# plot(m, shade = T, ylim = c(-.2, .2))

comp1 <- smooth_diff(m, pdat, 'Observed.abundance', 'Modelled.habitat', 'model')
comp2 <- smooth_diff(m, pdat, 'Observed.abundance', 'Modelled.abundance', 'model')
comp <- cbind(date = seq(1992, 2005, length = 400),
              rbind(comp1, comp2))

levels(comp$pair) <- c('Observed data - "Habitat-only" model', 'Observed data - "Habitat+density" model')

ggplot(comp, aes(x = date, y = diff, group = pair)) +
        geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, 
                    fill = c(rep("#4daf4a", 400), rep("#984ea3", 400))) +
        annotate("segment", x = 1992, xend = 2005, y = 0, yend = 0, lty = 2, cex = 0.5) +
        scale_x_continuous(breaks = seq(1992, 2004, by = 2)) +
        ylim(-.2, .2) +
        geom_line(col = c(rep("#4daf4a", 400), rep("#984ea3", 400))) +
        facet_wrap(~ pair, ncol = 2) +
        # coord_cartesian(ylim = c(-.2, .2)) +
        labs(x = NULL, y = 'Difference in GMA trend') +
        theme_minimal() + 
        theme(strip.text = element_text(size = 10)) 
SmoothPar <- 2
gam0 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Observed.abundance", ])
gam1 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Modelled.habitat", ])
gam2 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Modelled.abundance", ])

lm0 <- lm(index ~ year, 
            data = gamdat[gamdat$model == "Observed.abundance", ])
lm1 <- lm(index ~ year,  
            data = gamdat[gamdat$model == "Modelled.habitat", ])
lm2 <- lm(index ~ year, 
            data = gamdat[gamdat$model == "Modelled.abundance", ])

paste0("R-squared [Observed GAM] = ",        round(summary(gam0)$r.sq, 3))
paste0("R-squared [Habitat-only GAM] = ",    round(summary(gam1)$r.sq, 3))
paste0("R-squared [Habitat+density GAM] = ", round(summary(gam2)$r.sq, 3))
paste0("R-squared [Observed LM] = ",         round(summary(lm0)$adj.r.sq, 3))
paste0("R-squared [Habitat-only LM] = ",     round(summary(lm1)$adj.r.sq, 3))
paste0("R-squared [Habitat+density LM] = ",  round(summary(lm2)$adj.r.sq, 3))
# Points only
ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        # geom_smooth(lwd=0.5, se=T, method = lm, data = obs) +
        # geom_smooth(lwd=0.5, se=F, method = lm, data = mods) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal()

# Observed abundance
tmp3 <- trend_data_long %>%
    filter(Trend.type %in% c("Observed data"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=T, method = lm, data = tmp3, col = "#ff7f00") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = tmp3, col = "#ff7f00") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Habitat only
tmp1 <- trend_data_long %>%
    filter(Trend.type == "Habitat-only model")

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = tmp1, col = "#4daf4a") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp1, col = "#4daf4a") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Precip & Temp
tmp2 <- trend_data_long %>%
    filter(Trend.type %in% c("Precipitation (WQ)"))
tmp5 <- trend_data_long %>%
    filter(Trend.type %in% c("Temperature (SD)"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "#377eb8", "#e41a1c")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp2, col = "#377eb8") +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp5, col = "#e41a1c") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Modelled abundance
tmp4 <- trend_data_long %>%
    filter(Trend.type %in% c("Habitat+density model"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "#984ea3", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = tmp4, col = "#984ea3") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                expression("Change from 1992 (Nt/N"[1992])) +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "#984ea3", "#4daf4a", "#377eb8", "#e41a1c")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp4, col = "#984ea3") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                expression("Change from 1992 (Nt/N"[1992])) +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# t3 <- ggplot(data=tmp3,
#             aes(x=year, y=index, colour=Trend.type )) +
#     geom_smooth(lwd=1, se=F) +
#     # geom_point(cex=0.5) +
#     # geom_abline(intercept = 1, slope = 0, lty =2) +
#     scale_color_manual(values=c("#ff7f00")) +
#     # geom_line(data=trendRM_data_long, 
#     #           aes(x=year, y=trend, colour=Trend.type )) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
#     # minor_breaks = seq(0.25, 1.75, 0.025),
#     ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
#                    length(unique(lpd.interpolated$Species.ID)), " African mammals"),
#             "Change from 1992 (Nt/'N'[1992])") +
#     theme(plot.title = element_text(hjust = 0.5)) +
#     # scale_color_discrete(name = "Variable") +
#     theme_minimal() +
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# print(t3)
# 
# 
# 
# t4 <- ggplot(data=tmp4,
#             aes(x=year, y=index, colour=Trend.type )) +
#     geom_smooth(lwd=1, se=F) +
#     # geom_point(cex=0.5) +
#     # geom_abline(intercept = 1, slope = 0, lty =2) +
#     scale_color_manual(values=c("#984ea3")) +
#     # geom_line(data=trendRM_data_long, 
#     #           aes(x=year, y=trend, colour=Trend.type )) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
#     # minor_breaks = seq(0.25, 1.75, 0.025),
#     ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
#                    length(unique(lpd.interpolated$Species.ID)), " African mammals"),
#             "Change from 1992 (Nt/'N'[1992])") +
#     theme(plot.title = element_text(hjust = 0.5)) +
#     # scale_color_discrete(name = "Variable") +
#     theme_minimal() +
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# print(t4)

5 Subsets

5.1 Threat

Threatened by land use change
Species and populations listed as threatened by “Habitat degradation/change” or “Habitat loss” in the Living Planet Database

#Load species traits and taxonomy
# Traits <- read.csv('H:/MyDocuments/Luca-Density/M_Traits.csv')
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

# Select data only
lpd.myspecies <- lpd %>%
        filter(Species.ID %in% observedspecies) %>%
        # filter(myid %in% unique(lpd.interpolated$myid)) %>%
        left_join(., Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                  by = c('Species.ID' ='tax_id')) 

lpd.myspecies <- lpd.myspecies[as.numeric(rownames(lpd.interpolated)), ]

lpd.threat <- lpd.myspecies %>%
    filter(Primary_threat %in% c("Habitat degradation/change", "Habitat loss"))

# no exploitation / disease / pollution 
lpd.nootherthreat <- lpd.myspecies %>%
    filter(!Primary_threat %in% c("Exploitation", "Pollution", "Disease"))

lpd.Unthreat <- lpd.myspecies %>%
    filter(!Primary_threat %in% c("Habitat degradation/change", "Habitat loss",
                                 "Exploitation", "Pollution", "Disease"))

threatspecies <- unique(lpd.threat$Species.ID)
Unthreatspecies <- unique(lpd.Unthreat$Species.ID)
No.oth.threatspecies <- unique(lpd.nootherthreat$Species.ID)

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)

for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    dat <- subset(dat, Species.ID %in% threatspecies)
    trends.thr[, ii] <- apply(dat[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
            dat2 <- subset(dat2, Species.ID %in% threatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
        lambdas.thr[is.nan(lambdas.thr)] <- NA
    
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
    geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
    ggtitle(paste0("Species threatened by habitat degradation/change/loss (", 
                   nrow(lpd.threat), 
                   " populations; ",
                   length(threatspecies), " species)"),
            "Geometric mean species abundance, relative to 1992") +
    theme(plot.title = element_text(hjust = 0.5)) +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

# Lambdas
l <- ggplot(data=allmodslam,
       aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
 ggtitle("", "Annual multiplicative change (lambda)") +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
\label{fig:figs}Species threatened by habitat degradation / change / loss

Figure 5.1: Species threatened by habitat degradation / change / loss


No other threats
Species and populations NOT listed in the Living Planet Database as threatened by other causes (“Exploitation”, “Pollution”, or “Disease”). This set should have greatest agreement between models and observed trends.

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)

for (i in c(1:4)) {
        ii <- i+1
        dat <- all.trends[[i]]
        dat <- subset(dat, Species.ID %in% No.oth.threatspecies)
        trends.thr[, ii] <- apply(dat[-c(1:3)],
                                  MARGIN = 2,
                                  FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
        dat2 <- subset(dat2, Species.ID %in% No.oth.threatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.thr[is.nan(lambdas.thr)] <- NA
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        ggtitle(paste0("Species NOT threatened by exploitation/pollution/disease (", 
                       nrow(lpd.nootherthreat), 
                       " populations; ",
                       length(No.oth.threatspecies), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
         ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

l <- ggplot(data=allmodslam,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls) + 
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
\label{fig:figs}Species not threatened by exploitation / pollution / disease

Figure 5.2: Species not threatened by exploitation / pollution / disease


Not-threatened only
Species and populations listed in the Living Planet Database as NOT threatened by ANY cause (“Habitat degradation/change”, “Habitat loss”, “Exploitation”, “Pollution”, or “Disease”)

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)
 
for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    dat <- subset(dat, Species.ID %in% Unthreatspecies)
    trends.thr[, ii] <- apply(dat[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
    dat2 <- subset(dat2, Species.ID %in% Unthreatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.thr[is.nan(lambdas.thr)] <- NA
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        ggtitle(paste0("Species NOT threatened by ANY cause (", 
                       nrow(lpd.Unthreat), 
                       " populations; ",
                       length(Unthreatspecies), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
         ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

l <- ggplot(data=allmodslam,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls) + 
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
\label{fig:figs}Species without listed threat

Figure 5.3: Species without listed threat


5.2 IUCN Red List Status

#Load species traits and taxonomy
IUCNmamms <- read.csv("data/biodiversity/IUCN-RedList/MammalsJune2018.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, IUCNmamms, 
                by.x = 'Spec.ID', by.y = 'Species.ID', all.x=TRUE)

allspecies$qnt<- ifelse(allspecies$Red.List.status == "LC" | allspecies$Red.List.status == "NT", "Least Concern or Near Threatened", ifelse(
        allspecies$Red.List.status == "DD", "DD",  "Vulnerable or worse"))

## Modelled trends
plots <- list()
lplots <- list()
for (j in unique(allspecies$Red.List.status)) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(allspecies, Red.List.status == j)
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))
                    lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Red List Status: ", j,
                       " (", nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}


5.3 Diet

Subsets for herbivores, carnivores, and omnivores

#Load species traits and taxonomy
# Traits <- read.csv('H:/MyDocuments/Luca-Density/M_Traits.csv')
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

# Select data only
lpd.myspecies <- lpd %>%
    filter(Species.ID %in% observedspecies) %>%
    left_join(., Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by = c('Species.ID' ='tax_id'))

Herbs <- allspecies %>%
    filter(Diet == "Herbivore") %>%
    droplevels()
Carne <- allspecies %>%
    filter(Diet == "Carnivore")  %>%
    droplevels()
Omniv <- allspecies %>%
    filter(Diet == "Omnivore")  %>%
    droplevels()

## Modelled trends
trends.herbs <- data.frame(year = plotyears)
trends.carne <- data.frame(year = plotyears)
trends.omniv <- data.frame(year = plotyears)
lambdas.herbs <- data.frame(year = plotyears)

for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    datH <- subset(dat, Species.ID %in% Herbs$Spec.ID)
    datC <- subset(dat, Species.ID %in% Carne$Spec.ID)
    datO <- subset(dat, Species.ID %in% Omniv$Spec.ID)
    trends.herbs[, ii] <- apply(datH[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    trends.carne[, ii] <- apply(datC[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    trends.omniv[, ii] <- apply(datO[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat <- all.lambdas[[i]]
    datH <- subset(dat, Species.ID %in% Herbs$Spec.ID)
    # datC <- subset(dat, Species.ID %in% Carne$Spec.ID)
    # datO <- subset(dat, Species.ID %in% Omniv$Spec.ID)
    lambdas.herbs[, ii] <- apply(datH[-c(1:3)],
                             MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.herbs[is.nan(lambdas.herbs)] <- NA
    # lambdas.carne[, ii] <- apply(datC[-c(1:3)],
    #                          MARGIN = 2,
    #                          FUN = function(x) mean(x, na.rm = T))
    # lambdas.omniv[, ii] <- apply(datO[-c(1:3)],
    #                          MARGIN = 2,
    #                          FUN = function(x) mean(x, na.rm = T))
}

names(trends.herbs) <- nms
names(trends.carne) <- nms
names(trends.omniv) <- nms
names(lambdas.herbs) <- nms

trend_long_herbs <- gather(trends.herbs, key = "Trend.type", value = "trend", 
                          'Observed.abundance':'Modelled.abundance')
trend_long_carne <- gather(trends.carne, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')
trend_long_omniv <- gather(trends.omniv, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')
lambda_long_herbs <- gather(lambdas.herbs, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')

trend_long_herbs$Trend.type <- factor(trend_long_herbs$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
trend_long_carne$Trend.type <- factor(trend_long_carne$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
trend_long_omniv$Trend.type <- factor(trend_long_omniv$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
lambda_long_herbs$Trend.type <- factor(lambda_long_herbs$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
Table 5.1: Primary threat by diet group (# of populations)
Climate change Disease Exploitation Habitat degradation/change Habitat loss Invasive spp/genes Pollution
Carnivore 0 1 3 0 1 0 0
Herbivore 0 5 63 19 29 0 1
Omnivore 0 0 0 0 0 0 0
# h <- ggplot(data=trend_long_herbs,
#             aes(x=year, y=trend, colour=Trend.type )) +
#     geom_smooth(se=F, formula=y~x-1) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(minor_breaks = seq(0.25, 1.75, 0.025), 
#                        breaks = seq(0.2, 1.8, 0.1)) +
#     ggtitle(paste0("Trends for ", 
#                    nrow(Herbs), " African herbivores"))+
#     theme(plot.title = element_text(hjust = 0.5)) +
#     scale_color_discrete(name = "Trend type") +
#     theme_minimal()

herbs1 <- trend_long_herbs %>%
    filter(Trend.type %in% c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))
herbs2 <- lambda_long_herbs %>%
    filter(Trend.type %in% c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

h <- ggplot(data=herbs1,
            aes(x=year, y=trend, colour=Trend.type )) +
    geom_smooth(lwd=0.5, se=T, method = loess) +
    geom_point(cex=0.5) +
    # geom_line(cex=0.1) +
    geom_abline(intercept = 1, slope = 0, lty =2) +
    scale_color_manual(values=cls) +
    ggtitle(paste0("Herbivores (", 
                   nrow(Herbs), " species)"),
           "Geometric mean species abundance, relative to 1992") +
 ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

h2 <- ggplot(data=herbs2,
            aes(x=year, y=trend, colour=Trend.type )) +
    geom_smooth(lwd=0.5, se=T, method = loess) +
    geom_point(cex=0.5) +
    # geom_line(cex=0.1) +
    geom_abline(intercept = 1, slope = 0, lty =2) +
    scale_color_manual(values=cls) +
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(h, h2, ncol = 2, widths = c(1,1.5))

carns1 <- trend_long_carne %>% 
    filter(Trend.type %in%  c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

c <- ggplot(data=carns1,
            aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Carnivores (", 
                       nrow(Carne), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        annotate("text", x=2004, y = 1.5, hjust = 1, cex = 3,
                 label = paste0("and ", unique(lpd[lpd$Species.ID %in% Carne$Spec.ID,
                                           "Common_name"])[1], ")")) +
        annotate("text", x=2004, y = 1.75, hjust = 1, cex = 3,
                 label = paste0("(", 
                                unique(lpd[lpd$Species.ID %in% Carne$Spec.ID, 
                                           "Common_name"])[2]))

omns1 <- trend_long_omniv %>%
    filter(Trend.type %in%  c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

o <- ggplot(data=omns1,
            aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Omnivores (", 
                       nrow(Omniv), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold")) +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        annotate("text", x=2004, y = 1.15, hjust = 1, cex = 3,
                 label = paste0("(", 
                                lpd[lpd$Species.ID == Omniv$Spec.ID, "Common_name"][1], 
                                ")"))

grid.arrange(c, o, ncol = 2, widths = c(1,1.4))

# k <- median(allspecies$BM_g)

Trends for herbivores are not bad. Especially when inspecting the annual change (lambda), which is relatively on-point, except between 1995-1999. But again, does it make sense to interpret the fit between models and observed data after aggregating to the index, if predictions for individual species are not good?

NB: I didnt plot lambdas for carnivores/omnivores


5.4 Body size

#Load species traits and taxonomy
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

props <- seq(0, 1, 0.25)

allspecies$qnt<- cut(allspecies$BM_g , breaks=quantile(allspecies$BM_g, props),
                                    labels=1:(length(props)-1), include.lowest=TRUE)

## Modelled trends
plots <- list()
lplots <- list()
for (j in 1:nlevels(allspecies$qnt)) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(allspecies, qnt == j)
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))
                    lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Species body mass ",
                       round(quantile(allspecies$BM_g, props)[j]/1000, 0), "-", 
                       round(quantile(allspecies$BM_g, props)[j+1]/1000, 0), 
                       "kg (",
                       nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}


5.5 RMSE

RMSE1 <- RMSE[1:(nrow(RMSE)/3), ]
RMSE1$Spec.ID <- observedspecies
# ids <- as.numeric(rownames(subset(RMSE1, rmse < k)))  
# habspec <- observedspecies[ids[ids < 36]]

cuts <- c(0.05, 0.1, 0.2, 0.5)

## Modelled trends
plots <- list()
lplots <- list()
for (j in 1:4) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(RMSE1, value < cuts[j])
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))        
                lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Species model RMSE < ",
                       cuts[j], " (",
                       nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}

6 Species accuracy ranking

dt <- data.frame(Species = unique(lpd[lpd$Species.ID %in% observedspecies,
                                           "Common_name"])[-4],
                 Taxid = unique(lpd[lpd$Species.ID %in% observedspecies, 
                                            "Species.ID"]),
                 "LinearSlope_observed" = lmObs,
                 "LinearSlope_habitat" = lmHab,
                 "LinearSlope_density" = lmDens,
                 "LinearSlope_combined" = lmAbun,
                 'RMSE_habitat' = (rmseHab[, 1]),
                 'RMSE_density' = (rmseDens[, 1]),
                 'RMSE_combined' = (rmseAbun[, 1]),
                 "MAE_habitat" = (maeHab[, 1]),
                 'MAE_density' = (maeDens[, 1]),
                 "MAE_combined" = (maeAbun[, 1]))
dt <- dt[order(dt$'RMSE_habitat'), ]
rownames(dt) <- NULL

write.csv(dt, "outputs/Africa/Africa_goodness_of_fit_metrics.csv", row.names = F)

dt[, -c(1,2)] <- lapply(dt[, -c(1,2)], round, 2)
kable(dt, caption = "Mean species abundance error across years ordered by habitat-only RMSE") 
Table 6.1: Mean species abundance error across years ordered by habitat-only RMSE
Species Taxid LinearSlope_observed LinearSlope_habitat LinearSlope_density LinearSlope_combined RMSE_habitat RMSE_density RMSE_combined MAE_habitat MAE_density MAE_combined
Roan antelope 10167 0.01 0.00 0.00 0.01 0.01 0.17 0.18 0.01 0.14 0.15
Burchell’s zebra / Plains zebra / Common zebra 41013 0.01 0.00 -0.02 -0.02 0.02 0.06 0.10 0.01 0.05 0.07
Guereza / Magistrate colobus / Eastern black-and-white colobus 5143 0.01 0.00 0.00 0.00 0.02 0.06 0.07 0.02 0.05 0.06
Grey-cheeked mangabey 12309 0.01 0.00 -0.01 -0.01 0.02 0.05 0.05 0.02 0.04 0.04
African buffalo / Cape buffalo 21251 0.02 0.01 -0.02 -0.02 0.02 0.08 0.10 0.02 0.06 0.06
Giraffe 9194 -0.01 0.00 -0.02 -0.02 0.03 0.04 0.07 0.03 0.03 0.06
Greater kudu 22054 0.02 0.00 -0.02 -0.02 0.03 0.08 0.11 0.02 0.07 0.07
Impala / Black-faced impala 550 0.02 0.00 -0.03 -0.03 0.03 0.09 0.13 0.03 0.08 0.11
Nyala 22052 0.02 0.00 0.02 0.02 0.03 0.09 0.09 0.03 0.09 0.09
Sable antelope 10170 -0.01 0.01 0.00 0.01 0.04 0.19 0.20 0.03 0.17 0.18
Grant’s gazelle 8971 -0.02 0.00 -0.03 -0.03 0.05 0.13 0.13 0.04 0.12 0.12
Blue monkey 4221 0.01 0.00 -0.01 -0.01 0.05 0.12 0.11 0.05 0.09 0.08
Desert warthog / Cape warthog 41767 0.02 0.00 -0.02 -0.02 0.06 0.18 0.17 0.05 0.14 0.13
Southern white rhinoceros / Northern white rhinoceros 4185 0.03 0.00 -0.02 -0.02 0.06 0.09 0.11 0.06 0.08 0.09
White-bearded wildebeest / Blue wildebeest 5229 0.00 0.00 0.01 0.01 0.06 0.18 0.18 0.05 0.17 0.17
Kob 11036 -0.02 0.00 -0.02 -0.02 0.06 0.06 0.06 0.06 0.06 0.06
Common warthog / Desert warthog 41768 0.01 0.01 0.03 0.08 0.06 0.08 0.10 0.06 0.07 0.07
Sitatunga 22050 -0.01 -0.01 -0.01 -0.02 0.07 0.14 0.14 0.07 0.11 0.11
Thomson’s gazelle 8982 0.00 0.00 -0.03 -0.03 0.08 0.20 0.20 0.07 0.17 0.17
Bushpig 41770 0.06 0.00 -0.02 -0.02 0.09 0.17 0.18 0.08 0.12 0.13
Black rhinoceros 6557 -0.03 0.00 0.00 0.00 0.09 0.14 0.12 0.08 0.12 0.10
African lion / Asiatic lion 15951 0.06 0.00 -0.03 -0.03 0.10 0.21 0.21 0.08 0.16 0.15
Giant eland / Derby’s eland 44172 0.09 0.00 0.02 0.02 0.12 0.09 0.09 0.12 0.08 0.08
Mountain gorilla / Eastern lowland gorilla 39994 0.01 0.12 -0.01 0.10 0.13 0.16 0.23 0.12 0.15 0.18
Hartebeest / Swayne’s hartebeest 811 -0.03 0.01 -0.02 -0.02 0.13 0.14 0.16 0.10 0.11 0.13
Waterbuck 11035 0.08 0.01 -0.02 -0.02 0.14 0.19 0.20 0.13 0.17 0.18
Beisa / East African oryx 15571 0.00 0.00 -0.03 -0.03 0.16 0.31 0.30 0.14 0.28 0.27
Steenbok / Steenbuck 19308 -0.03 0.00 -0.01 -0.01 0.17 0.22 0.23 0.17 0.19 0.19
Common eland / Eland 22055 0.05 0.00 -0.02 -0.02 0.19 0.22 0.26 0.18 0.21 0.25
Common duiker / Grey duiker 21203 -0.05 0.00 0.03 0.03 0.19 0.28 0.28 0.18 0.22 0.22
Gerenuk 12142 -0.06 0.00 -0.03 -0.03 0.25 0.16 0.16 0.22 0.14 0.15
Cheetah 219 0.44 0.00 -0.03 -0.03 0.33 0.50 0.49 0.26 0.41 0.40
Bushbuck 22051 0.07 0.01 0.03 0.08 0.38 0.48 0.48 0.34 0.44 0.40
Klipspringer / Western klipspringer 15485 0.79 0.00 -0.03 -0.03 0.45 0.55 0.58 0.32 0.38 0.41
Hippopotamus 10103 1.73 0.09 -0.05 0.02 0.51 1.45 1.45 0.46 1.09 1.17

7 Conclusions so far

From these results, it seems that:

  1. Including density-dynamics does not improve predictions of mammal abundance trends: there is not a great difference between model types (neither are very good). This is potentially due to the specification of the density model itself. Luca reran his model selection procedure on his African data, and this resulted in inclusion of annual mean temperature (amT) and precipitaion of the warmest quarter (Pwq) as environemntal predictors (compared to temperature seasonality for North America). With Africa centered on the equator, amT doesn’t actually vary that much over the study period, resulting in similar trend predictions between the habitat-only and habitat+density models.

  2. There are still some obvious issues with the GLOBIO-4 landuse data (e.g., forests on the border between Egypt and Sudan). PBL will rerun the land allocation, but there isn’t an ETA for this. I may have to manually check whether my population coordinates are in obvious problem areas, but such visual inspection wouldn’t fully put me at ease about this.

  3. I might need to look into what detirmines prediction accuracy for individual species, (i.e., is RMSE predictable from species traits, landscape features, known species history, etc.) Would like to discuss ideas for this…