1 Setup

## -- Packages -- ####
library(data.table)
library(tidyverse)
library(ggplot2)
library(knitr)
library(taRifx)
library(zoo)
library(mgcv)
library(Metrics) # rmse
library(grid)
library(gridExtra)
library(ggpubr)
library(boot)
library(ggsci)

## -- Functions -- ####
gm_mean = function(x, na.rm=TRUE) {
    exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}

source("code/FUN_multiplot.R")

2 Data

Buffer species
Species for which the exact geographic coordinates of the population abundance records in the Living Planet Database are known. A buffer with 0.25 degree radius was drawn around these coordinates and abundance trends were calculated for suitable habitat inside these buffers only.

This approach yields a much larger total species sample (52), which allows us to calculate a cutoff year which jointly maximizes the species sample size and the temporal scope of the analysis; to generate a consistent set of species over time.

buffer = "050"
version = "v2"
params=list(lpdfile = "data/biodiversity/Africa/LPI_pops_Africa_spec_loc_observed.csv",
            habfile = paste0("outputs/Africa/Predictions/", version,
                             "/Buffer-", buffer,
                             "-Species-Total-Suitable-Area-by-Year.csv"),
            densfile = paste0("outputs/Africa/Predictions/", version,
                              "/Buffer-", buffer,
                              "-Species-Mean-Density-by-Year.csv"),
            abunfile = paste0("outputs/Africa/Predictions/", version,
                              "/Buffer-", buffer,
                              "-Species-Total-Abundance-by-Year.csv"))

## -- Read data -- ####

years   <- c(1992:2013)

## BUFFER
lpd <- read.csv(params$lpdfile, na.strings = "NULL")
trends.hab <- read.csv(params$habfile)
trends.dens <- read.csv(params$densfile)
trends.abun <- read.csv(params$abunfile)

trends.hab <- trends.hab[complete.cases(trends.hab), ]
trends.dens <- trends.dens[complete.cases(trends.dens), ]
trends.abun <- trends.abun[complete.cases(trends.abun), ]

names(trends.hab)[1] <- "Species.ID"
names(trends.dens)[1] <- "Species.ID"
names(trends.abun)[1] <- "Species.ID"

myspecies <- unique(lpd$Species.ID[lpd$Species.ID %in% unique(trends.abun$Species.ID)])

source("code/FUN_interpolate.R")

lpd$myid <- c(1:nrow(lpd))

# Select data only
lpd.data <- lpd %>%
    filter(Species.ID %in% myspecies) %>%
    select(Binomial, Species.ID, Common_name, myid, X1950:X2015)

lpd.data$Binomial <- as.character(lpd.data$Binomial)

lpd.data[, -c(1:4)] <- apply(lpd.data[, -c(1:4)], 2, as.numeric)
colnames(lpd.data) <- c(colnames(lpd.data[1:4]), 1950:2015)

lpd.interpolated <- lpd.data
lpd.interpolated[, -c(1:4)] <- t(apply(lpd.data[, -c(1:4)], 1, 
                                interpolate))

lpd.interpolated[which(lpd.interpolated[, "1992"] == 0), "1992"] <- NA # first year cannot be 0
lpd.interpolated <- lpd.interpolated[!is.na(lpd.interpolated[, "1992"]), ] #first year cannot be NA

lpd.interpolated <- droplevels(lpd.interpolated)

## -- Plot sample size over time ####
studyyears <- as.character(years)

l <- lpd.interpolated[studyyears]

# Get number of years data for each population (long-format)
t <- data.frame(Species.ID = lpd.interpolated$Species.ID, 
                nyears = max.col(is.na(l), "first")-1) # Last data point == First NA-point -1

# Group number of years per population by species; retain maximum
t2 <- t %>%
  group_by(Species.ID) %>%
  dplyr::summarise(maxyear = max(nyears)+1991) # column 1 = 1992; 1 year of data -> maxyear == 1992 
  
df <- data.frame(year = years, ss = NA)
for (i in years) {
  df[df$year == i, "ss"] <- sum(t2$maxyear >= i)
}

ggplot(df, aes(x=year, y=ss))  +
        geom_vline(xintercept = 2005, col = "grey30", lty = 2) +
        geom_hline(yintercept = df[df$year == 2005, "ss"], col = "grey30", lty = 2) +
        # annotate("segment", x1=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        # annotate("rect", xmin=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        geom_point() +
        # geom_smooth(se=F) +
        xlab("Cutoff year") +
        ylab("Number of species in sample") +
        theme_minimal() +
        theme(axis.line.x = element_line(),
              axis.line.y = element_line())

$\label{fig:figs}Potential sample size of species for which specific geographic coordinates of observed records are available$

Figure 2.1: Potential sample size of species for which specific geographic coordinates of observed records are available

Dashed lines in figure above show cutoffs where year * sample size is maximized. This yields a cutoff at year 2005. There are 35 species that have data at least up to 2005.

# Set years to the new range
# years   <- c(1992:2005)
years   <- c(1992, 1995, 2000, 2005)

# Subset species
lpd.interpolated <- lpd.interpolated[, c(1:3, which(names(lpd.interpolated) %in% years))]
lpd.interpolated <- lpd.interpolated[complete.cases(lpd.interpolated), ]

names(trends.hab)[2:6] <- c("1992", "1995", "2000", "2005", "2010")
names(trends.dens)[2:6] <- c("1992", "1995", "2000", "2005", "2010")
names(trends.abun)[2:6] <- c("1992", "1995", "2000", "2005", "2010")

# Add NA columns for missing years
addyears <- function(x) {
df <- data.frame(Species.ID = x[["Species.ID"]])
        for (y in years) {
                # if (y %in% names(x)) {
                        df[[as.character(y)]] <- x[[as.character(y)]]
                # } else {df[[as.character(y)]] <- NA}
        }
        return(df)
}
trends.hab <- addyears(trends.hab)
trends.dens <- addyears(trends.dens)
trends.abun <- addyears(trends.abun)

# # Interpolate these years
# trends.abun[, -c(1)] <- t(apply(trends.abun[, -c(1)], 1,
#                                 interpolate))
# trends.hab[, -c(1)] <- t(apply(trends.hab[, -c(1)], 1,
#                                 interpolate))

observedspecies <- unique(lpd.interpolated$Species.ID) # have already intersected with unique output species 
k <- length(observedspecies)

3 Species trends

3.1 Plots for individual species

Trends in species abundance over time relative to 1992. When more than one population was available for a species, the average species trend was calculated as the trend in geometric mean abundance across populations. Values in parentheses refer to number of observed population timeseries (n) available for each species.

Do we expect these to be good for individual species?

trend.obs <- NULL
lambda.obs <- NULL

ccfHab <- NULL
ccfDens <- NULL
ccfAbun <- NULL

rmseHab <- NULL
rmseDens <- NULL
rmseAbun <- NULL

maeHab <- NULL
maeDens <- NULL
maeAbun <- NULL

biasHab <- NULL
biasDens <- NULL
biasAbun <- NULL

lmObs <- NULL
lmHab <- NULL
lmDens <- NULL
lmAbun <- NULL

lmObs.p <- NULL
lmHab.p <- NULL
lmDens.p <- NULL
lmAbun.p <- NULL

plotyears <- c(1992, 1995, 2000, 2005)

# source("code/FUN_getlambdas.R")

out <- list(trends.hab, trends.dens, trends.abun)
nms  <- c("year", 'Observed.abundance', "Modelled.habitat", 
          "Modelled.density", "Modelled.abundance")
lvls <- c("Observed.abundance", "Modelled.habitat", "Modelled.density", "Modelled.abundance")
lbls <- c("Observed trend", "Habitat model", "Density model", "Combined model")
cls  <- c("#ff7f00", "#4daf4a", "#377eb8", "#984ea3")
cls3 <- pal_locuszoom()(4)[c(3,2,4)]
cls1 <- pal_jco()(3)[c(3)]
cls <- c(cls1, cls3)
show_col(cls)

for (sp in observedspecies) {
    ## Modelled trends
    trends <- data.frame(year = years)
    lambdas <- data.frame(year = years)
    
    # Observed trends
    lpd.d <- subset(lpd.interpolated, Species.ID == sp)
    
    trends$'Observed.abundance' = apply(lpd.d[,-c(1,2,3)],
                                        MARGIN = 2,
                                        FUN = function(x) {
                                                # gm_mean accross populations:
                                                # gm_mean(x)/gm_mean(y) == gm_mean(x/y)
                                  if (all(is.na(x))) return(NA) else gm_mean(x/lpd.d[,4]) 
                                  })
    
    ## Observed lambdas
    # lpd.dat <- subset(lpd.interpolated, Species.ID == sp)
    # dat <- select(lpd.dat, 1950:2015)
    # names(dat) <- 1950:2015
    temp <- list()
    for (pop in 1:nrow(lpd.d)){
        datpop <- as.numeric(lpd.d[pop, -c(1,2,3)])
        # temp[[pop]] <- as.numeric(getlambdas(datpop))
        temp[[pop]] <- diff(log10(datpop))
    }
    test <- as.data.frame(do.call(rbind, temp))
    # names(test) <- 1993:2005
    names(test) <- plotyears[2:length(plotyears)]
    lambdas$'Observed.abundance' <- c(NA, colMeans(test, na.rm = T))
    
    for (i in 1:length(out)) {
        ii <- i+2
        
        dat <- as.data.frame(out[[i]])
        # dat$Species.ID <- species
        dat <- subset(dat, Species.ID == sp)
        
        trends[, ii] <- apply(dat[-which(names(dat)=="Species.ID")],
                              MARGIN = 2, # over each year (i.e. column)
                              FUN = function(x) x/dat[,2]) # gm_mean is superfluous here
        
        dat <- dat[-which(names(dat)=="Species.ID")]
        if (all(is.na(dat))) {
            lambdas[, ii] <- rep(NA, 4) # 14 for full window
        } else {
            names(dat) <- years
            dat <- apply(dat, 2, as.numeric)
            lambda <- diff(log10((dat)))
            lambdas[, ii] <- c(NA, lambda)
        }
    }
    
    names(trends) <- nms
    names(lambdas) <- nms

    # # This to verify lambdas compound back to the trends
    # indices <- data.frame(year = years)
    # for (i in 1:3){
    #     ii <- i+1
    #     indices[1, ii] <- 1
    #     for (t in c(2:14)) {
    #         indices[t, ii] <- indices[(t-1), ii]*(10^(lambdas[t, ii]))
    #     }}
    # names(indices) <- c("year", #"Temperature.(SD)", "Precipitation.(WQ)",
    #                        "Modelled.habitat", "Modelled.abundance",
    #                        "Observed.abundance")
    
    trends[is.na(trends$Observed.abundance), 3:5] <- NA
    lambdas[is.na(lambdas$Observed.abundance), 3:5] <- NA
    
    # Key to not compare interpolated trends (can only be linear)
    trends <- subset(trends, year %in% plotyears)
    lambdas <- subset(lambdas, year %in% plotyears)
    
    ## Row bind trends for all species
    t1 <- cbind(lpd.d[1, c(1,2,3)], t(trends$Observed.abundance))
    t2 <- cbind(lpd.d[1, c(1,2,3)], t(trends$Modelled.habitat))
    t3 <- cbind(lpd.d[1, c(1,2,3)], t(trends$Modelled.density))
    t4 <- cbind(lpd.d[1, c(1,2,3)], t(trends$Modelled.abundance))
    # t4 <- cbind(lpd.d[1,c(1,2,3)], t(trends$`Temperature.(SD)`))
    # t5 <- cbind(lpd.d[1,c(1,2,3)], t(trends$`Precipitation.(WQ)`))
    
    if (is.null(trend.obs)) {
        trend.obs <- t1
        trend.hab <- t2
        trend.dens <- t3
        trend.abun <- t4
        # trend.Tsd <- t4
        # trend.Pwq <- t5
    } else {
        trend.obs <- rbind(trend.obs, t1)
        trend.hab <- rbind(trend.hab, t2)
        trend.dens <- rbind(trend.dens, t3)
        trend.abun <- rbind(trend.abun, t4)
        # trend.Tsd <- rbind(trend.Tsd, t4)
        # trend.Pwq <- rbind(trend.Pwq, t5)
    }
    
    # ## Row bind lambdas for all species
    l1 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$Observed.abundance))
    l2 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$Modelled.habitat))
    l3 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$Modelled.density))
    l4 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$Modelled.abundance))
    # l4 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$`Temperature.(SD)`))
    # l5 <- cbind(lpd.d[1,c(1,2,3)], t(lambdas$`Precipitation.(WQ)`))
    
    if (is.null(lambda.obs)) {
        lambda.obs <- l1
        lambda.hab <- l2
        lambda.dens <- l3
        lambda.abun <- l4
    } else {
        lambda.obs <- rbind(lambda.obs, l1)
        lambda.hab <- rbind(lambda.hab, l2)
        lambda.dens <- rbind(lambda.dens, l3)
        lambda.abun <- rbind(lambda.abun, l4)
    }
    
    # # Cross-correlations: estimates the correlation between x[t+k] and y[t]
    # ccfHab.i <- t(ccf(trends$Observed.abundance, 
    #                    trends$Modelled.habitat, 
    #                    na.action = na.pass, plot = F, 
    #                    lag.max = 5)$acf[,,1])
    # ccfDens.i <- t(ccf(trends$Observed.abundance,
    #                    trends$Modelled.abundance,
    #                    na.action = na.pass, plot = F,
    #                    lag.max = 5)$acf[,,1])
    # 
    # if (is.null(ccfHab)) {
    #     ccfHab <- ccfHab.i
    #     ccfDens <- ccfDens.i
    # } else {
    #     ccfHab <- rbind(ccfHab, ccfHab.i)
    #     ccfDens <- rbind(ccfDens, ccfDens.i)
    # }

    llambdas <- lambdas[2:4, ]  
    
    if (is.null(biasHab)) {
            biasHab <- -bias(llambdas$Observed.abundance, llambdas$Modelled.habitat)
            biasDens <- -bias(llambdas$Observed.abundance, llambdas$Modelled.density)
            biasAbun <- -bias(llambdas$Observed.abundance, llambdas$Modelled.abundance)
    } else {
            biasHab <- rbind(biasHab, 
                             -bias(llambdas$Observed.abundance, llambdas$Modelled.habitat))
            biasDens <- rbind(biasDens, 
                              -bias(llambdas$Observed.abundance, llambdas$Modelled.density))
            biasAbun <- rbind(biasAbun, 
                              -bias(llambdas$Observed.abundance, llambdas$Modelled.abundance))
    }
    
    if (is.null(maeHab)) {
            maeHab <- mae(llambdas$Observed.abundance, llambdas$Modelled.habitat)
            maeDens <- mae(llambdas$Observed.abundance, llambdas$Modelled.density)
            maeAbun <- mae(llambdas$Observed.abundance, llambdas$Modelled.abundance)
    } else {
            maeHab <- rbind(maeHab, 
                            mae(llambdas$Observed.abundance, llambdas$Modelled.habitat))
            maeDens <- rbind(maeDens, 
                             mae(llambdas$Observed.abundance, llambdas$Modelled.density))
            maeAbun <- rbind(maeAbun, 
                             mae(llambdas$Observed.abundance, llambdas$Modelled.abundance))
    }
    
    if (is.null(rmseHab)) {
            rmseHab <- rmse(llambdas$Observed.abundance, llambdas$Modelled.habitat)
            rmseDens <- rmse(llambdas$Observed.abundance, llambdas$Modelled.density)
            rmseAbun <- rmse(llambdas$Observed.abundance, llambdas$Modelled.abundance)
    } else {
            rmseHab <- rbind(rmseHab, 
                             rmse(llambdas$Observed.abundance, llambdas$Modelled.habitat))
            rmseDens <- rbind(rmseDens, 
                              rmse(llambdas$Observed.abundance, llambdas$Modelled.density))
            rmseAbun <- rbind(rmseAbun, 
                              rmse(llambdas$Observed.abundance, llambdas$Modelled.abundance))
    }
    
    if (is.null(lmObs)) {
            lmObs <- lm(trends$Observed.abundance~plotyears)$coefficients[[2]]
            lmHab <- lm(trends$Modelled.habitat~plotyears)$coefficients[[2]]
            lmDens <- lm(trends$Modelled.density~plotyears)$coefficients[[2]]
            lmAbun <- lm(trends$Modelled.abundance~plotyears)$coefficients[[2]]
    } else {
            lmObs <- rbind(lmObs,
                           lm(trends$Observed.abundance~plotyears)$coefficients[[2]])
            lmHab <- rbind(lmHab,
                           lm(trends$Modelled.habitat~plotyears)$coefficients[[2]])
            lmDens <- rbind(lmDens,
                            lm(trends$Modelled.density~plotyears)$coefficients[[2]])
            lmAbun <- rbind(lmAbun,
                           lm(trends$Modelled.abundance~plotyears)$coefficients[[2]])
    }
    
    if (is.null(lmObs.p)) {
            lmObs.p <- summary(lm(trends$Observed.abundance~plotyears))$coefficients[2,4]
            lmHab.p <- summary(lm(trends$Modelled.habitat~plotyears))$coefficients[2,4]
            lmDens.p <- summary(lm(trends$Modelled.density~plotyears))$coefficients[2,4]
            lmAbun.p <- summary(lm(trends$Modelled.abundance~plotyears))$coefficients[2,4]
    } else {
            lmObs.p <- rbind(lmObs.p,
                           summary(lm(trends$Observed.abundance~plotyears))$coefficients[2,4])
            lmHab.p <- rbind(lmHab.p,
                           summary(lm(trends$Modelled.habitat~plotyears))$coefficients[2,4])
            lmDens.p <- rbind(lmDens.p,
                            summary(lm(trends$Modelled.habitat~plotyears))$coefficients[2,4])
            lmAbun.p <- rbind(lmAbun.p,
                           summary(lm(trends$Modelled.habitat~plotyears))$coefficients[2,4])
    }
    
    ## Trends
    trends_data_long <- gather(trends, key = "Trend.type", value = "index",
                               'Observed.abundance':'Modelled.abundance')

    trends_data_long$Trend.type <- factor(trends_data_long$Trend.type,
                                          levels=lvls,
                                          labels=lbls)

    g <- ggplot(data=trends_data_long,
                aes(x=year, y=index, colour=Trend.type)) +
            geom_smooth(lwd=0.5, se=F, method = loess) +
            geom_point(cex=0.5) +
            scale_color_manual(values = cls) +
            # scale_x_continuous(breaks = years) +
            ggtitle(paste0(lpd.d$Common_name[1], " (n = ", #lpd.d$Binomial[1],
                           nrow(lpd.d), ")"),
                    "Geometric Mean Species Abundance relative to 1992") +
            labs(y = expression(GMA[t]~"/"~GMA[1992])) +
            theme_minimal() +
            theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

    ## Rate of change (lambda)
    lambdas_data_long <- gather(lambdas, key = "Lambda.type", value = "lambda",
                               'Observed.abundance':'Modelled.abundance')

    lambdas_data_long$Lambda.type <- factor(lambdas_data_long$Lambda.type,
                                            levels=lvls,
                                            labels=lbls)

    l <- ggplot(data=lambdas_data_long,
                aes(x=year, y=lambda, colour=Lambda.type)) +
    geom_smooth(lwd=0.5, se=F, method = loess) +
    geom_point(cex=0.5) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), 
              legend.position = "right") +
            labs(colour = "Trend type")
    
    grid.arrange(g, l, ncol = 2, widths = c(1,1.5))

    # ## Rate of change (lambda)
    # indices_long <- gather(indices, key = "Index.type", value = "index",
    #                        'Modelled.habitat':'Observed.abundance')
    # indices_long$Index.type <- factor(indices_long$Index.type,
    #                                   levels=c("Observed.abundance",
    #                                            "Modelled.abundance",
    #                                            "Modelled.habitat"),
    #                                   labels=c("Observed abundance",
    #                                            "Modelled abundance",
    #                                            "Modelled habitat"))

    # idx <- ggplot(data=indices_long,
    #               aes(x=year, y=index, colour=Index.type )) +
    #     geom_smooth(lwd=0.5, se=F) +
    #     geom_point(cex=0.5) +
    #     scale_x_continuous(breaks = years) +
    #     # scale_y_continuous(minor_breaks = seq(0.25, 1.75, 0.025),
    #     #                    breaks = seq(0.2, 1.8, 0.1)) +
    #     ggtitle(paste0(lpd.d$Common_name[1]," (", lpd.d$Binomial[1],
    #                    ") n =", nrow(lpd.d)),
    #             "Change from 1992 (index calculated from lambdas)") +
    #     theme(plot.title = element_text(hjust = 0.5)) +
    #     scale_color_discrete(name = "Index") +
    #     theme_minimal()
    # print(idx)
}
    
all.trends  <- list(trend.obs, trend.hab, trend.dens, trend.abun)
all.lambdas <- list(lambda.obs, lambda.hab, lambda.dens, lambda.abun)

3.2 Index across all species

Trends in geometric mean abundance across all species
With bootstrapped confidence intervals (10000 random samples of species)

# Bootstrap 95% CI for regression coefficients 

# function to obtain regression weights 
bs <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  gma <- apply(d[, -c(1:4)],
                          MARGIN = 2,
                          FUN = function(x) gm_mean(x))
  return(gma) 
} 

# # bootstrapping with 1000 replications 
# results <- boot(data=dat, statistic=bs, R=1000)
# 
# # get 95% confidence intervals 
# ci.1995 <- boot.ci(results, type="norm", index=1)$normal[2:3]
# ci.2000 <- boot.ci(results, type="norm", index=2)$normal[2:3]
# ci.2005 <- boot.ci(results, type="norm", index=2)$normal[2:3]

is.nan.data.frame <- function(x) {
do.call(cbind, lapply(x, is.nan)) }

R <- 1000

## Modelled trends
trends <- data.frame(year = plotyears)
std.err <- data.frame(year = plotyears)
lambdas <- data.frame(year = plotyears)
betas <- data.frame(row.names = lvls, "index" = rep(NA, 4), 
                    "lower" = rep(NA, 4), "upper" = rep(NA, 4))
ci.lower <- data.frame(year = plotyears)
ci.upper <- data.frame(year = plotyears)

# indices <- data.frame(year = years) 

for (i in c(1:4)) {
    dat <- all.trends[[i]]
    dat <- subset(dat, Species.ID %in% observedspecies)
    names(dat)[4:7] <- c(1992, 1995, 2000, 2005)
    ii <- i+1
    
    # bootstrapping with 1000 replications 
    results <- boot(data=dat, statistic=bs, R=R)
    
    # get 95% confidence intervals 
    ci.lower[1, ii] <- 1
    ci.upper[1, ii] <- 1
    for (y in 1:length(results$t0)) {
            ci.lower[y+1, ii] <- boot.ci(results, type="norm", index=y)$normal[2]
            ci.upper[y+1, ii] <- boot.ci(results, type="norm", index=y)$normal[3]
            }
    
    trends[1, ii] <- 1
    trends[2:4, ii] <- results$t0
    
    std.err[1, ii] <- 0
    std.err[2:4, ii] <- apply(results$t, 2, sd)
    
    # get 95% slopes
    beta.tmp <- apply(results$t, 1, FUN = function(x) coefficients(lm(c(1, x)~plotyears))[[2]])
    # take 2.5 and 97.5 quantile
    betas[i, 2:3] <- quantile(beta.tmp, probs = c(0.025, 0.975))
    betas[i, "index"] <- coefficients(lm(c(1, results$t0)~plotyears))[[2]]
    
    dat2 <- all.lambdas[[i]]
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas[is.nan(lambdas)] <- NA

    # indices[1, ii] <- 1
    # for (t in c(2:14)) {
    #     indices[t, ii] <- indices[(t-1), ii]*(10^(lambdas[t, ii]))
    # }
}

names(trends) <- nms
names(std.err) <- nms
names(lambdas) <- nms
names(ci.lower) <- nms
names(ci.upper) <- nms

simtrends <- function (series) {
        sims <- data.frame(row.names = 1:R)
        for (y in years) {
                sims[, as.character(y)] <- rnorm(n = R, 
                             mean = log(trends[trends$year == y, series]), 
                             sd = std.err[trends$year == y, series]/trends[trends$year == y, series])
        }
        # sims <- exp(sims)
        # return(sims)
        
        fits <- t(apply(sims, 1, FUN = function(x) {
                lm <- loess(x~years, span = 1)
                pred <- predict(lm, newdata = c(1992:2005))#, se=T)
                # mtcars1$lwl <- pred$fit-1.96*pred$se.fit
                # mtcars1$upl <- pred$fit+1.96*pred$se.fit
                }))
        
        # return(c(exp(colMeans(fits))), apply(fits, 2, sd)*exp(colMeans(fits)))
        
        return(fits)
}

sims <- lapply(nms[2:5], simtrends)
names(sims) <- nms[2:5]

fit <- data.frame(year = as.character(c(1992:2005)))
se <- data.frame(year = as.character(c(1992:2005)))
for (x in 1:4){
        fit[, nms[x+1]] <- exp(colMeans(sims[[x]]))
        se[, nms[x+1]] <- apply(sims[[x]], 2, sd)*exp(colMeans(sims[[x]]))}

sims_long <- gather(fit, key = "Trend.type", value = "index",
                               'Observed.abundance':'Modelled.abundance')
sims_long$Trend.type <- factor(sims_long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
std.err_long <- gather(se, key = "Trend.type", value = "se",
                               'Observed.abundance':'Modelled.abundance')
std.err_long$Trend.type <- factor(std.err_long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
sims_long$se <- std.err_long$se

sims_long$year <- as.numeric(as.character(sims_long$year))

# Transpose lambdas and trend values to long
# sims_long <- gather(sims[[1]], key = "year", value = "Observed.abundance", -"sim")
# for (i in nms[3:5]){
# sims_long[[i]] <- gather(sims[[2]], key = "year", value = "tmp", -"sim")$tmp}
# sims_long <- gather(sims_long, key = "Trend.type", value = "index",
#                                'Observed.abundance':'Modelled.abundance')
# sims_long$Trend.type <- factor(sims_long$Trend.type,
#                                      levels=lvls,
#                                      labels=lbls)

trend_data_long <- gather(trends, key = "Trend.type", value = "index",
                               'Observed.abundance':'Modelled.abundance')
trend_data_long$Trend.type <- factor(trend_data_long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
std.err_long <- gather(std.err, key = "Trend.type", value = "se",
                               'Observed.abundance':'Modelled.abundance')
std.err_long$Trend.type <- factor(std.err_long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
trend_data_long$se <- std.err_long$se



lambda_data_long <- gather(lambdas, key = "Lambda.type", value = "lambda",
                               'Observed.abundance':'Modelled.abundance')
lambda_data_long$Lambda.type <- factor(lambda_data_long$Lambda.type,
                                     levels=lvls,
                                     labels=lbls)

# Transpose Confidence Intervals to long format
ci.low.long <- gather(ci.lower, key = "Trend.type", value = "index",
                               'Observed.abundance':'Modelled.abundance')
ci.low.long$Trend.type <- factor(ci.low.long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
ci.upp.long <- gather(ci.upper, key = "Trend.type", value = "index",
                               'Observed.abundance':'Modelled.abundance')
ci.upp.long$Trend.type <- factor(ci.upp.long$Trend.type,
                                     levels=lvls,
                                     labels=lbls)

names(ci.low.long) <- c("year", "Trend.type", "lower")
names(ci.upp.long) <- c("year", "Trend.type", "upper")
trend_data_long <- cbind(trend_data_long, ci.low.long$lower, ci.upp.long$upper)
names(trend_data_long) <- c("year", "Trend.type", "index", "se", "lower", "upper")

# obs <- trends %>%
#         select(year, Observed.abundance) %>%
#         gather(key = "Trend.type", value = "index",
#                           'Observed.abundance') 
# obs$Trend.type <- factor(obs$Trend.type,
#                                      levels=c("Observed.abundance"),
#                                      labels=c("Observed trend"))
# 
# mods <- trends %>%
#         select(year, Modelled.habitat:Modelled.abundance) %>%
#         gather(key = "Trend.type", value = "index",
#                           "Modelled.habitat":"Modelled.abundance") 
# mods$Trend.type <- factor(mods$Trend.type,
#                                      levels=lvls[2:4],
#                                      labels=lbls[2:4])
# 
# allmods <- trends %>%
#         select(year, Observed.abundance:Modelled.abundance) %>%
#         gather(key = "Trend.type", value = "index",
#                           'Observed.abundance':"Modelled.abundance") 
# allmods$Trend.type <- factor(allmods$Trend.type,
#                                      levels=lvls,
#                                      labels=lbls)
# 
# lamsobs <- lambdas %>%
#         select(year, Observed.abundance) %>%
#         gather(key = "Lambda.type", value = "lambda",
#                           'Observed.abundance')
# lamsobs$Lambda.type <- factor(lamsobs$Lambda.type,
#                                      levels=c("Observed.abundance"),
#                                      labels=c("Observed trend"))
# 
# lamsmods <- lambdas %>%
#         select(year, Modelled.habitat:Modelled.abundance) %>%
#         gather(key = "Lambda.type", value = "lambda",
#                           "Modelled.habitat":"Modelled.abundance")
# lamsmods$Lambda.type <- factor(lamsmods$Lambda.type,
#                                      levels=lvls[2:4],
#                                      labels=lbls[2:4])
# alllams <- lambdas %>%
#         select(year, Observed.abundance:Modelled.abundance) %>%
#         gather(key = "Lambda.type", value = "lambda",
#                           'Observed.abundance':"Modelled.abundance")
# alllams$Lambda.type <- factor(alllams$Lambda.type,
#                                      levels=lvls,
#                                      labels=lbls)

t <- trend_data_long %>% filter(Trend.type == "Observed trend") %>%
        select(., -Trend.type)
s <- sims_long %>% filter(Trend.type == "Observed trend") %>%
         select(., -Trend.type)

trend_data_long <- trend_data_long %>% filter(Trend.type != "Observed trend")
sims_long <- sims_long %>% filter(Trend.type != "Observed trend")

Linear trends

# Linear  
polys <- data.frame(Trend.type = rep(lvls, 2),
                    year = c(rep(1992, 4), rep(2005, 4)),
                    lower = c(1,1,1,1,
                          1-(1992*betas$lower)+betas$lower*2005),
                    upper = c(1,1,1,1,
                          1-(1992*betas$upper)+betas$upper*2005))
polys$Trend.type <- factor(polys$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
lns <- data.frame(Trend.type = rep(lvls, 2),
                    year = c(rep(1992, 4), rep(2005, 4)),
                    index = c(1,1,1,1,
                          1-(1992*betas$index)+betas$index*2005))
lns$Trend.type <- factor(lns$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
l <- lns %>% filter(Trend.type == "Observed trend") %>%
        select(., -Trend.type)
p <- polys %>% filter(Trend.type == "Observed trend") %>%
        select(., -Trend.type)
lns <- lns %>% filter(Trend.type != "Observed trend")
polys <- polys %>% filter(Trend.type != "Observed trend")


ggplot(data = trend_data_long,
       aes(x=year)) +

        # # observed geom line
        geom_point(data = t, aes(y=index), color = cls1) +
        geom_errorbar(data = t,
                      aes(ymin=index-se, ymax=index+se, width = 0.2),
                          color = cls1) +
        geom_line(data = l, aes(y=index), lwd=0.5, color = cls1) +
        geom_ribbon(data = p, lwd=0.5,
                    aes(ymin = lower,
                        ymax = upper),
                    color = alpha(cls1, 0),
                    fill = alpha(cls1, 0.2)) +
        
        geom_point(aes(colour=Trend.type, x=year, y=index, group=Trend.type)) +
        geom_errorbar(aes(ymin=index-se, ymax=index+se, width = 0.2,
                          colour=Trend.type, group=Trend.type), data = trend_data_long) +
        geom_line(lwd=0.5, data = lns, aes(colour=Trend.type, group=Trend.type, y = index)) +
        geom_ribbon(data = polys, lwd=0.5,
                    mapping = aes(ymin = lower,
                                  ymax = upper,
                                  fill = Trend.type,
                                  group = Trend.type,
                                  colour = Trend.type),
                    color = alpha(1, 0)) +
        
        scale_colour_manual(values = cls, drop = F) +
        scale_fill_manual(values = alpha(cls, 0.2), drop = F) +
        facet_wrap(~Trend.type) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Geometric mean species abundance, relative to 1992") +
        # ylab(expression(GMA[t]~"/"~GMA[1992])) +
        ylab("Multi-species index") +
        # scale_x_continuous(breaks = c(1992:2005)) +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "top")

# Linear  
polys <- data.frame(Trend.type = rep(lvls, 2),
                    x = c(rep(1992, 4), rep(2005, 4)),
                    ymin = c(1,1,1,1,
                          1-(1992*betas$lower)+betas$lower*2005),
                    ymax = c(1,1,1,1,
                          1-(1992*betas$upper)+betas$upper*2005))
polys$Trend.type <- factor(polys$Trend.type,
                                     levels=lvls,
                                     labels=lbls)
lns <- data.frame(Trend.type = rep(lvls, 2),
                    x = c(rep(1992, 4), rep(2005, 4)),
                    y = c(1,1,1,1,
                          1-(1992*betas$index)+betas$index*2005))
lns$Trend.type <- factor(lns$Trend.type,
                                     levels=lvls,
                                     labels=lbls)

g <- ggplot(data=trend_data_long,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        # geom_smooth(lwd=0.5, se=T, stat = "smooth") +
        # geom_smooth(lwd=0.5, se=T, method = lm) +
        geom_polygon(data = polys, aes(x=x, y=y, fill = Trend.type,
                                       colour = Trend.type ),
                     color = alpha(rep(cls, 3), 0)) +
        geom_line(data = lns, aes(x=x, y=y, colour=Trend.type)) +
        scale_color_manual(values = cls) +
        scale_fill_manual(values = alpha(cls, 0.2)) +
        scale_x_continuous(breaks = plotyears) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Geometric mean species abundance, relative to 1992") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        theme_minimal() +
        # theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")
        theme(plot.title = element_text(hjust = 0, face = "bold"))

# # Lambdas
# l <- ggplot(data=alllams,
#             aes(x=year, y=lambda, colour=Lambda.type )) +
#     geom_point(cex=0.5) +
#     geom_smooth(lwd=0.5, se=F, method = lm, data = lamsobs) +
#     geom_smooth(lwd=0.5, se=F, method = lm, data = lamsmods) +
#         scale_color_manual(values = cls) +
#     scale_x_continuous(breaks = plotyears) +
#     ggtitle("", "Annual multiplicative change (lambda)") +
#     theme_minimal() +
#     theme(plot.title = element_text(hjust = 0))

# grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
grid.arrange(g, ncol = 1, widths = c(1))

g <- ggplot(data=trend_data_long,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_errorbar(aes(x=year, ymin=index-se, ymax=index+se, colour=Trend.type, width = 0.2))+
        geom_line(lwd=0.5, data = trend_data_long) +
        geom_ribbon(lwd=0.5,
                    aes(x = trend_data_long$year,
                        ymin = trend_data_long$lower,
                        ymax = trend_data_long$upper,
                        fill = Trend.type,
                        group = Trend.type,
                        colour = Trend.type),
                    color = alpha(1, 0)) +
        # geom_smooth(lwd=0.5, se=T, method = loess, data = trend_data_long) +
        scale_colour_manual(values = cls, aesthetics = c("colour"), drop = F) +
        scale_fill_manual(values = alpha(cls, 0.2), aesthetics = c("fill"), drop = F) +
        # scale_alpha_manual(values = cls) +
        scale_x_continuous(breaks = plotyears) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Geometric mean species abundance, relative to 1992") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        theme_minimal() +
        # theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")
        theme(plot.title = element_text(hjust = 0, face = "bold"))

# # Lambdas
# l <- ggplot(data=alllams,
#             aes(x=year, y=lambda, colour=Lambda.type )) +
#     geom_point(cex=0.5) +
#     geom_smooth(lwd=0.5, se=T, method = loess, data = lamsobs) +
#     geom_smooth(lwd=0.5, se=T, method = loess, data = lamsmods) +
#         scale_color_manual(values = cls) +
#     scale_x_continuous(breaks = plotyears) +
#     ggtitle("", "Annual multiplicative change (lambda)") +
#     theme_minimal() +
#     theme(plot.title = element_text(hjust = 0))

# grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
grid.arrange(g, ncol = 1, widths = c(1))

Loess

ggplot(data = trend_data_long,
       aes(x=year, y=index)) +

        # # observed geom smooth
        geom_point(data = t, color = cls1) +
        geom_errorbar(data = t,
                      aes(ymin=index-se, ymax=index+se, width = 0.2),
                          color = cls1) +
        geom_line(data = s, lwd=0.5, color = cls1) +
        geom_ribbon(data = t, lwd=0.5,
                    aes(x = year,
                        ymin = index-1.96*se,
                        ymax = index+1.96*se),
                    color = alpha(cls1, 0),
                    fill = alpha(cls1, 0.2)) +
        
        geom_point(aes(colour=Trend.type, group=Trend.type)) +
        geom_errorbar(aes(ymin=index-se, ymax=index+se, width = 0.2,
        colour=Trend.type, group=Trend.type), data = trend_data_long) +
        geom_line(lwd=0.5, data = sims_long, aes(colour=Trend.type, group=Trend.type)) +
        geom_ribbon(data = sims_long, lwd=0.5,
                    aes(x = year,
                        ymin = index-1.96*se,
                        ymax = index+1.96*se,
                        fill = Trend.type,
                        group = Trend.type,
                        colour = Trend.type),
                    color = alpha(1, 0)) +
        
        scale_colour_manual(values = cls, drop = F) +
        scale_fill_manual(values = alpha(cls, 0.2), drop = F) +
        facet_wrap(~Trend.type) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Geometric mean species abundance, relative to 1992") +
        # ylab(expression(GMA[t]~"/"~GMA[1992])) +
        ylab("Multi-species index") +
        # scale_x_continuous(breaks = c(1992:2005)) +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "top")

4 Validation metrics

4.1 At the species level

colnames(ccfHab) <- seq(-3,3,1)
colnames(ccfDens) <- seq(-3,3,1)

# t-test
pvals <- c()
for (j in 1:ncol(ccfHab)) {
        p <- (t.test(ccfDens[,j], ccfHab[,j])$p.value)
        pvals <- append(pvals, p)
        }

crosscor <- data.frame(ccf = c(ccfHab[,1], ccfHab[,2], ccfHab[,3], 
                               ccfHab[,4], ccfHab[,5], ccfHab[,6], ccfHab[,7], 
                               ccfDens[,1], ccfDens[,2], ccfDens[,3],  
                               ccfDens[,4],  ccfDens[,5],  ccfDens[,6], ccfDens[,7]),
                   model = factor(c(rep("habitat-only", k*7), c(rep("habitat+density", k*7))),
                                  levels = c("habitat-only", "habitat+density")),
                   lag = factor(c(rep(-3, k), rep(-2, k), rep(-1, k), rep(0, k),
                                  rep(1, k), rep(2, k), rep(3, k), 
                                  rep(-3, k), rep(-2, k), rep(-1, k), rep(0, k),
                                  rep(1, k), rep(2, k), rep(3, k)) ))
levels(crosscor$lag) <- c(paste0("lag ", levels(crosscor$lag)))

ggplot(crosscor, aes(x = model, y = ccf, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "#984ea3")) +
        scale_color_manual(values = c("#4daf4a", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Cross correlation coefficient", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        facet_wrap(~ lag, ncol = 7) +
        theme_minimal() +
        theme(axis.text.x = element_blank(),
                      panel.border = element_rect(fill=F),
              strip.text = element_text(size = 12),
              legend.position = "top") +
        annotate("text", x=1.5, y = -1.1, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals[3:9], 2)))

Root Mean Squared Error
Because correlations have some obscuring symmetries (unsensitive to shifts & scaling), correlation coefficients are are not always good signals of true prediction performance. Root Mean Squared Error is a better metric to track model accuracy.

p-values are calculated between respective models and the habitat-only model.

RMSE <- data.frame(rmse = c(rmseHab, rmseDens, rmseAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvalsD <- t.test(rmseHab, rmseDens)$p.value
pvalsA <- t.test(rmseHab, rmseAbun)$p.value

ggplot(RMSE, aes(x = model, y = rmse, fill = model)) + 
        scale_fill_manual(values = cls3) +
        # scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = cls3) +
        # geom_violin(trim = T, alpha = 0.5) +
        geom_boxplot() +
        labs(y = "Root Mean Squared Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        # geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n")+
        annotate("text", x=2, y = 1.75, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))+
        annotate("text", x=3, y = 1.75, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3)))

$\label{fig:figs}Distributions of model root mean squared error$

Figure 4.1: Distributions of model root mean squared error

There is 1 species with very poor prediction performance (RMSE > 1; ). This next plot zooms in on RMSE up to 1.00:

pvalsD <- t.test(rmseDens[rmseDens<1], rmseHab[rmseHab<1])$p.value
pvalsA <- t.test(rmseAbun[rmseAbun<1], rmseHab[rmseHab<1])$p.value

ggplot(RMSE, aes(x = model, y = rmse, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Root Mean Squared Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))+
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3)))

$\label{fig:figs}Distributions of model root mean squared errors up to RMSE=1$

Figure 4.2: Distributions of model root mean squared errors up to RMSE=1

NB: Slight, but not statistically significant, difference between RMSE distributions of two model types.

Mean Absolute Error
RMSE gives a relatively high weight to large errors as it increases with the variance of the frequency distribution of error magnitudes, not with the variance of the errors. Mean Absolute Error (MAE) captures the variance of errors.

RMSE could be more useful when large errors are particularly undesirable (i.e., if being off by 1 is more than twice as bad as being off by 0.5). If not (as is our case I think, as being off 20% in abundance change, is twice as bad as being off 10% in abundance chagne), Mean Absolute Error is more appropriate.

NB: MAE plotted for MAE<1 ().

MAE <- data.frame(mae = c(maeHab, maeDens, maeAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvalsD <- t.test(maeDens[maeDens<1], maeHab[maeHab<1])$p.value
pvalsA <- t.test(maeDens[maeDens<1], maeHab[maeAbun<1])$p.value

ggplot(MAE, aes(x = model, y = mae, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(0,1))) +
        scale_color_manual(values = c("#4daf4a","blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Mean Absolute Error", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsA, 3))) +
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalsD, 3)))

$\label{fig:figs}Distributions of model mean absolute errors; p-value for MAE<1$

Figure 4.3: Distributions of model mean absolute errors; p-value for MAE<1

Bias
I also compute bias here, as the difference between obsered-predicted, as a measure of model tendencies to systematically under- or over-predict changes in GMA.

NB: Bias plotted without 3 really bad predictions (). p-values below violins from test whether distributions are different from 0.

BIAS <- data.frame(bias = c(biasHab, biasDens, biasAbun),
                   model = factor(c(rep("Habitat model", k), 
                                    c(rep("Density model", k), 
                                    c(rep("Combined model", k)))),
                                  levels = c("Habitat model", 
                                             "Density model", 
                                             "Combined model")))

# t-test
pvals1 <- wilcox.test(biasDens, biasHab)$p.value
pvals2 <- wilcox.test(biasAbun, biasHab)$p.value
pvalN <- wilcox.test(biasHab)$p.value
pvalD <- wilcox.test(biasDens)$p.value
pvalA <- wilcox.test(biasAbun)$p.value

ggplot(BIAS, aes(x = model, y = bias, fill = model)) + 
        scale_fill_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        scale_y_continuous(limits = (c(-1,1))) +
        scale_color_manual(values = c("#4daf4a", "blue", "#984ea3")) +
        geom_violin(trim = T, alpha = 0.5) +
        labs(y = "Bias", x = "") +
        # geom_dotplot(binaxis = 'y', stackdir = 'center', dotsize = 0.5) +
        geom_jitter(shape=16, position=position_jitter(0.2)) +
        theme_minimal() +
        theme(panel.border = element_rect(fill=F),
              # axis.text.x = element_blank(),
              strip.text = element_text(size = 12),
              legend.position = "n") +
        annotate("text", x=2, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals1, 3))) +
        annotate("text", x=3, y = 0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvals2, 3))) +
        annotate("text", x=1, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalN, 3))) +
        annotate("text", x=2, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalD, 3)))+
        annotate("text", x=3, y = -0.9, hjust = 0.5, cex = 4,
                 label = paste0("p = ", round(pvalA, 3)))

$\label{fig:figs}Distributions of model bias; p-value for bias<1$

Figure 4.4: Distributions of model bias; p-value for bias<1

4.2 Sign of Linear Trends

Proportion of corresponding signs

linear.trends <- as.data.frame(cbind(lmObs, lmHab, lmDens, lmAbun))
row.names(linear.trends) <- observedspecies
colnames(linear.trends)  <- nms[2:5]
linear.pvals <- as.data.frame(cbind(lmObs.p, lmHab.p, lmDens.p, lmAbun.p))
row.names(linear.pvals) <- observedspecies
colnames(linear.pvals)  <- nms[2:5]

slopes <- linear.trends
linear.trends[linear.pvals > 0.05] <- 0

signs <- sign(linear.trends)

signs$cathab <- ifelse(sign(linear.trends$Observed.abundance) == sign(linear.trends$Modelled.habitat), "Correct",
                      ifelse(sign(linear.trends$Observed.abundance)*-1 ==  sign(linear.trends$Modelled.habitat), "Opposing", 
                             ifelse(sign(linear.trends$Observed.abundance) == 1 & sign(linear.trends$Modelled.habitat) == 0, "Missed Positive", 
                                    ifelse(sign(linear.trends$Observed.abundance) == -1 & sign(linear.trends$Modelled.habitat) == 0, "Missed Negative", 
                                           ifelse(sign(linear.trends$Observed.abundance) == 0 & sign(linear.trends$Modelled.habitat) == 1, "False Positive", "False Negative")))))

signs$catdens <- ifelse(sign(linear.trends$Observed.abundance) == sign(linear.trends$Modelled.density), "Correct",
                      ifelse(sign(linear.trends$Observed.abundance)*-1 ==  sign(linear.trends$Modelled.density), "Opposing", 
                             ifelse(sign(linear.trends$Observed.abundance) == 1 & sign(linear.trends$Modelled.density) == 0, "Missed Positive", 
                                    ifelse(sign(linear.trends$Observed.abundance) == -1 & sign(linear.trends$Modelled.density) == 0, "Missed Negative", 
                                           ifelse(sign(linear.trends$Observed.abundance) == 0 & sign(linear.trends$Modelled.density) == 1, "False Positive", "False Negative")))))

signs$catabun <- ifelse(sign(linear.trends$Observed.abundance) == sign(linear.trends$Modelled.abundance), "Correct",
                      ifelse(sign(linear.trends$Observed.abundance)*-1 ==  sign(linear.trends$Modelled.abundance), "Opposing", 
                             ifelse(sign(linear.trends$Observed.abundance) == 1 & sign(linear.trends$Modelled.abundance) == 0, "Missed Positive", 
                                    ifelse(sign(linear.trends$Observed.abundance) == -1 & sign(linear.trends$Modelled.abundance) == 0, "Missed Negative", 
                                           ifelse(sign(linear.trends$Observed.abundance) == 0 & sign(linear.trends$Modelled.abundance) == 1, "False Positive", "False Negative")))))

sign.comparison <- rbind(round(table(signs$cathab)/length(lmObs), 2),
                         round(table(signs$catdens)/length(lmObs), 2),
                         round(table(signs$catabun)/length(lmObs), 2))

row.names(sign.comparison) <- c("Habitat-only model", "Density-only model", "Combined model")

kable(sign.comparison, caption = "Sign comparison between linear trends (proportion)")

Table 4.1: Sign comparison between linear trends (proportion)
	Correct	False Negative	False Positive	Missed Negative	Missed Positive	Opposing
Habitat-only model	0.57	0.03	0.09	0.11	0.17	0.03
Density-only model	0.54	0.09	0.03	0.11	0.17	0.06
Combined model	0.57	0.06	0.06	0.11	0.17	0.03

4.3 All metrics

linear.trends.diff <- data.frame(Modelled.habitat = slopes$Observed.abundance -
                                                         slopes$Modelled.habitat,
                                 Modelled.density = slopes$Observed.abundance -
                                                         slopes$Modelled.density,
                                 Modelled.abundance = slopes$Observed.abundance -
                                                         slopes$Modelled.abundance)

linear.trends.long <- gather(linear.trends.diff, "model", "value")
linear.trends.long$model <- factor(linear.trends.long$model, 
                                   levels = unique(linear.trends.long$model),
                                   labels = lbls[2:4])

names(RMSE)[1] <- "value"
names(BIAS)[1] <- "value"
names(MAE)[1] <- "value"

vm <- rbind(RMSE, BIAS, MAE, linear.trends.long)
vm$metric <- c(rep("RMSE", 105), rep("Bias", 105), rep("MAE", 105), rep("Difference linear slopes", 105))
vm$metric <- factor(vm$metric, levels = c("RMSE", "MAE", "Bias", "Difference linear slopes"))

# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]
# vm <- vm[-which.max(vm$value), ]

pvalN <- wilcox.test(biasHab)$p.value
pvalD <- wilcox.test(biasDens)$p.value
pvalA <- wilcox.test(biasAbun)$p.value

# Visualize: Specify the comparisons you want
my_comparisons <- list( c("Habitat model", "Density model"), 
                        c("Density model", "Combined model"),
                        c("Habitat model", "Combined model") )
dat_text <- data.frame(metric = rep("Bias", 3),
                       model = levels(vm$model),
                       label = c(round(pvalN, 2), round(pvalD, 4), round(pvalA, 3)))

ranges <- as.data.frame(rbind(range(filter(vm, metric == "RMSE")$value), 
                   range(filter(vm, metric == "MAE")$value),
                   range(filter(vm, metric == "Bias")$value),
                   range(filter(vm, metric == "Difference linear slopes")$value)))

ranges <- data.frame(ymax = c(2.5, 2, 1, 3), 
                     ymin = c(0, 0, -.75, 0.1),
                     metric = c("RMSE", "MAE", "Bias", "Difference linear slopes"),
                     model = "Habitat model")
ranges <- gather(ranges, "key", "value", -metric, -model) %>% select(., -key)

# level1 <- c(2, 1.6, 0.6, 2.3)
# level2 <- level2-level1*0.05
# 
# x = c(1, 1, 2, 2), c(1, 1, 3, 3), 
# 
# arcs <- data.frame(value = c(level1, level2), 
#                      metric = c("RMSE", "MAE", "Bias", "Difference linear slopes"),
#                      model = lbls[2:4])
# 
# compare_means(data = vm, value~model, method = "wilcox", group.by = "metric",
#                            # ref.group = "habitat model",
#                            label = "p.format",
#                            # method.args = list(alternative = "greater"),
#                            comparisons = my_comparisons) 
        
ggplot(vm, aes(x = model, y = value, color = model),
          xlab = "") +
        geom_boxplot() + 
        geom_jitter(shape = 1) +
        scale_colour_manual(values = cls3, drop = F) +
        geom_hline(aes(yintercept=0), lty = 2) +
        facet_wrap(~metric, scales = "free_y") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1),
              strip.background = element_rect(fill="white", colour = "white"),
              legend.position = "top") +
        scale_y_continuous(name = "") +
        geom_blank(data = ranges) +
        geom_text(data = dat_text, aes(y=-.65, label = paste("p = ", label)), color = 1) +

        stat_compare_means(label.y.npc = 0.9, method = "wilcox",
                           # ref.group = "Habitat model",
                           label = "p.format", 
                           # method.args = list(alternative = "greater"),
                           comparisons = my_comparisons,
                           tip.length = 0.05) # Add pairwise comparisons p-value

4.4 At the index level

These metrics are calculated on the aggregated trends across all species, but I don’t think these tell us much if predictions for individual species are no good.

rsq = function(y,f) { 1 - sum((y-f)^2)/sum((y-mean(y))^2) }
cor0 <- cor(trends$Observed.abundance, trends$Modelled.habitat)
cor1 <- cor(trends$Observed.abundance, trends$Modelled.density)
cor2 <- cor(trends$Observed.abundance, trends$Modelled.abundance)

rsq0 <- rsq(trends$Observed.abundance, trends$Modelled.habitat)
rsq1 <- rsq(trends$Observed.abundance, trends$Modelled.density)
rsq2 <- rsq(trends$Observed.abundance, trends$Modelled.abundance)

rmse0 <- rmse(trends$Observed.abundance, trends$Modelled.habitat)
rmse1 <- rmse(trends$Observed.abundance, trends$Modelled.density)
rmse2 <- rmse(trends$Observed.abundance, trends$Modelled.abundance)

mae0 <- mae(trends$Observed.abundance, trends$Modelled.habitat)
mae1 <- mae(trends$Observed.abundance, trends$Modelled.density)
mae2 <- mae(trends$Observed.abundance, trends$Modelled.abundance)

bias0 <- bias(trends$Observed.abundance, trends$Modelled.habitat)
bias1 <- bias(trends$Observed.abundance, trends$Modelled.density)
bias2 <- bias(trends$Observed.abundance, trends$Modelled.abundance)

# ccf.all <- cbind(ccf(trends$Observed.abundance, 
#                    trends$Modelled.habitat, 
#                    na.action = na.pass, plot = F, 
#                    lag.max = 5)$acf[,,1]^2,
#         ccf(trends$Observed.abundance,
#                    trends$Modelled.abundance, 
#                    na.action = na.pass, plot = F, 
#                    lag.max = 5)$acf[,,1]^2)

## Figs for manuscript
# Linear
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = lm, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/linear.pdf", width = 12, height = 7.5, units = "cm")

# LOESS
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " Africa mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/gam.pdf", width = 12, height = 7.5, units = "cm")

Quantifying the agreement between observed and modelled trends in geometric mean abundance across all species.

Root Mean Squared Error
Observed vs. Habitat-only model: 0.1035566
Observed vs. Density-only model: 0.2849386
Observed vs. Habitat+density model: 0.2511758

Mean Absolute Error
Observed vs. Habitat-only model: 0.0740524
Observed vs. Density-only model: 0.2088959
Observed vs. Habitat+density model: 0.1796566

Bias
Observed vs. Habitat-only model: 0.0740524
Observed vs. Density-only model: 0.2088959
Observed vs. Habitat+density model: 0.1752529

Slope of linear trend
Linear slope observed data: 0.00383
Linear slope Habitat-only model: 0.00323
Linear slope Density-only model: -0.01626
Linear slope Habitat+density model: -0.01567

gamdat <- trends %>%
        select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "model", value = "index",
                          'Modelled.habitat':'Observed.abundance') 
gamdat$model <- factor(gamdat$model)

smooth_diff <- function(model, newdata, f0, f1, var, alpha = 0.05,
                        unconditional = FALSE) {
    xp <- predict(model, newdata = newdata, type = 'lpmatrix')
    c0 <- grepl(f0, colnames(xp))
    c1 <- grepl(f1, colnames(xp))
    r0 <- newdata[[var]] == f0
    r1 <- newdata[[var]] == f1
    ## difference rows of xp for data from comparison
    X <- xp[r1, ] - xp[r0, ]
    ## zero out cols of X related to splines for other lochs
    X[, ! (c0 | c1)] <- 0
    ## zero out the parametric cols
    X[, !grepl('^s\\(', colnames(xp))] <- 0
    dif <- X %*% coef(model)
    se <- sqrt(rowSums((X %*% vcov(model, unconditional = unconditional)) * X))
    crit <- qt(alpha/2, df.residual(model), lower.tail = FALSE)
    upr <- dif + (crit * se)
    lwr <- dif - (crit * se)
    data.frame(pair = paste(f0, f1, sep = ' - '),
               diff = dif,
               se = se,
               upper = upr,
               lower = lwr)
}

SmoothPar <- 5
m <- gam(index ~ s(year, by = model, k = SmoothPar), data = gamdat)
pdat <- expand.grid(year = seq(1992, 2005, length = 400),
                    model = c("Modelled.habitat", "Modelled.abundance", "Observed.abundance"))
# plot(m, shade = T, ylim = c(-.2, .2))

comp1 <- smooth_diff(m, pdat, 'Observed.abundance', 'Modelled.habitat', 'model')
comp2 <- smooth_diff(m, pdat, 'Observed.abundance', 'Modelled.abundance', 'model')
comp <- cbind(date = seq(1992, 2005, length = 400),
              rbind(comp1, comp2))

levels(comp$pair) <- c('Observed data - "Habitat-only" model', 'Observed data - "Habitat+density" model')

ggplot(comp, aes(x = date, y = diff, group = pair)) +
        geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2, 
                    fill = c(rep("#4daf4a", 400), rep("#984ea3", 400))) +
        annotate("segment", x = 1992, xend = 2005, y = 0, yend = 0, lty = 2, cex = 0.5) +
        scale_x_continuous(breaks = seq(1992, 2004, by = 2)) +
        ylim(-.2, .2) +
        geom_line(col = c(rep("#4daf4a", 400), rep("#984ea3", 400))) +
        facet_wrap(~ pair, ncol = 2) +
        # coord_cartesian(ylim = c(-.2, .2)) +
        labs(x = NULL, y = 'Difference in GMA trend') +
        theme_minimal() + 
        theme(strip.text = element_text(size = 10))

SmoothPar <- 2
gam0 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Observed.abundance", ])
gam1 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Modelled.habitat", ])
gam2 <- gam(index ~ s(year, by = model, k = SmoothPar), 
            data = gamdat[gamdat$model == "Modelled.abundance", ])

lm0 <- lm(index ~ year, 
            data = gamdat[gamdat$model == "Observed.abundance", ])
lm1 <- lm(index ~ year,  
            data = gamdat[gamdat$model == "Modelled.habitat", ])
lm2 <- lm(index ~ year, 
            data = gamdat[gamdat$model == "Modelled.abundance", ])

paste0("R-squared [Observed GAM] = ",        round(summary(gam0)$r.sq, 3))
paste0("R-squared [Habitat-only GAM] = ",    round(summary(gam1)$r.sq, 3))
paste0("R-squared [Habitat+density GAM] = ", round(summary(gam2)$r.sq, 3))
paste0("R-squared [Observed LM] = ",         round(summary(lm0)$adj.r.sq, 3))
paste0("R-squared [Habitat-only LM] = ",     round(summary(lm1)$adj.r.sq, 3))
paste0("R-squared [Habitat+density LM] = ",  round(summary(lm2)$adj.r.sq, 3))

# Points only
ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        # geom_smooth(lwd=0.5, se=T, method = lm, data = obs) +
        # geom_smooth(lwd=0.5, se=F, method = lm, data = mods) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal()

# Observed abundance
tmp3 <- trend_data_long %>%
    filter(Trend.type %in% c("Observed data"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=T, method = lm, data = tmp3, col = "#ff7f00") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "NA", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = tmp3, col = "#ff7f00") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Habitat only
tmp1 <- trend_data_long %>%
    filter(Trend.type == "Habitat-only model")

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = tmp1, col = "#4daf4a") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp1, col = "#4daf4a") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Precip & Temp
tmp2 <- trend_data_long %>%
    filter(Trend.type %in% c("Precipitation (WQ)"))
tmp5 <- trend_data_long %>%
    filter(Trend.type %in% c("Temperature (SD)"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "NA", "#4daf4a", "#377eb8", "#e41a1c")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp2, col = "#377eb8") +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp5, col = "#e41a1c") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# Modelled abundance
tmp4 <- trend_data_long %>%
    filter(Trend.type %in% c("Habitat+density model"))

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "#984ea3", "#4daf4a", "NA", "NA")) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = tmp4, col = "#984ea3") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                expression("Change from 1992 (Nt/N"[1992])) +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplot(data=trend_data_long,
       aes(x=year, y=index, colour=Trend.type )) +
        annotate("segment", x = 1992, xend = 2013, y = 1, yend = 1, lty = 2, cex = 0.5) +
        geom_point(cex=0.5) +
        scale_color_manual(values = c("#ff7f00", "#984ea3", "#4daf4a", "#377eb8", "#e41a1c")) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = tmp4, col = "#984ea3") +
        scale_x_continuous(breaks = years) +
        scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), " African mammals"),
                expression("Change from 1992 (Nt/N"[1992])) +
        ylab("Geometric Mean Abundance") +
        theme(plot.title = element_text(hjust = 0.5)) +
        theme_minimal() +
        theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

# t3 <- ggplot(data=tmp3,
#             aes(x=year, y=index, colour=Trend.type )) +
#     geom_smooth(lwd=1, se=F) +
#     # geom_point(cex=0.5) +
#     # geom_abline(intercept = 1, slope = 0, lty =2) +
#     scale_color_manual(values=c("#ff7f00")) +
#     # geom_line(data=trendRM_data_long, 
#     #           aes(x=year, y=trend, colour=Trend.type )) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
#     # minor_breaks = seq(0.25, 1.75, 0.025),
#     ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
#                    length(unique(lpd.interpolated$Species.ID)), " African mammals"),
#             "Change from 1992 (Nt/'N'[1992])") +
#     theme(plot.title = element_text(hjust = 0.5)) +
#     # scale_color_discrete(name = "Variable") +
#     theme_minimal() +
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# print(t3)
# 
# 
# 
# t4 <- ggplot(data=tmp4,
#             aes(x=year, y=index, colour=Trend.type )) +
#     geom_smooth(lwd=1, se=F) +
#     # geom_point(cex=0.5) +
#     # geom_abline(intercept = 1, slope = 0, lty =2) +
#     scale_color_manual(values=c("#984ea3")) +
#     # geom_line(data=trendRM_data_long, 
#     #           aes(x=year, y=trend, colour=Trend.type )) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(limits=c(0.7, 1.2), breaks = seq(0.8, 1.2, 0.1)) +
#     # minor_breaks = seq(0.25, 1.75, 0.025),
#     ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
#                    length(unique(lpd.interpolated$Species.ID)), " African mammals"),
#             "Change from 1992 (Nt/'N'[1992])") +
#     theme(plot.title = element_text(hjust = 0.5)) +
#     # scale_color_discrete(name = "Variable") +
#     theme_minimal() +
#     theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
# print(t4)

5 Subsets

5.1 Threat

Threatened by land use change
Species and populations listed as threatened by “Habitat degradation/change” or “Habitat loss” in the Living Planet Database

#Load species traits and taxonomy
# Traits <- read.csv('H:/MyDocuments/Luca-Density/M_Traits.csv')
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

# Select data only
lpd.myspecies <- lpd %>%
        filter(Species.ID %in% observedspecies) %>%
        # filter(myid %in% unique(lpd.interpolated$myid)) %>%
        left_join(., Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                  by = c('Species.ID' ='tax_id')) 

lpd.myspecies <- lpd.myspecies[as.numeric(rownames(lpd.interpolated)), ]

lpd.threat <- lpd.myspecies %>%
    filter(Primary_threat %in% c("Habitat degradation/change", "Habitat loss"))

# no exploitation / disease / pollution 
lpd.nootherthreat <- lpd.myspecies %>%
    filter(!Primary_threat %in% c("Exploitation", "Pollution", "Disease"))

lpd.Unthreat <- lpd.myspecies %>%
    filter(!Primary_threat %in% c("Habitat degradation/change", "Habitat loss",
                                 "Exploitation", "Pollution", "Disease"))

threatspecies <- unique(lpd.threat$Species.ID)
Unthreatspecies <- unique(lpd.Unthreat$Species.ID)
No.oth.threatspecies <- unique(lpd.nootherthreat$Species.ID)

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)

for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    dat <- subset(dat, Species.ID %in% threatspecies)
    trends.thr[, ii] <- apply(dat[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
            dat2 <- subset(dat2, Species.ID %in% threatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
        lambdas.thr[is.nan(lambdas.thr)] <- NA
    
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
    geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
    ggtitle(paste0("Species threatened by habitat degradation/change/loss (", 
                   nrow(lpd.threat), 
                   " populations; ",
                   length(threatspecies), " species)"),
            "Geometric mean species abundance, relative to 1992") +
    theme(plot.title = element_text(hjust = 0.5)) +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

# Lambdas
l <- ggplot(data=allmodslam,
       aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
 ggtitle("", "Annual multiplicative change (lambda)") +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))

$\label{fig:figs}Species threatened by habitat degradation / change / loss$

Figure 5.1: Species threatened by habitat degradation / change / loss

No other threats
Species and populations NOT listed in the Living Planet Database as threatened by other causes (“Exploitation”, “Pollution”, or “Disease”). This set should have greatest agreement between models and observed trends.

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)

for (i in c(1:4)) {
        ii <- i+1
        dat <- all.trends[[i]]
        dat <- subset(dat, Species.ID %in% No.oth.threatspecies)
        trends.thr[, ii] <- apply(dat[-c(1:3)],
                                  MARGIN = 2,
                                  FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
        dat2 <- subset(dat2, Species.ID %in% No.oth.threatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.thr[is.nan(lambdas.thr)] <- NA
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        ggtitle(paste0("Species NOT threatened by exploitation/pollution/disease (", 
                       nrow(lpd.nootherthreat), 
                       " populations; ",
                       length(No.oth.threatspecies), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
         ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

l <- ggplot(data=allmodslam,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls) + 
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))

$\label{fig:figs}Species not threatened by exploitation / pollution / disease$

Figure 5.2: Species not threatened by exploitation / pollution / disease

Not-threatened only
Species and populations listed in the Living Planet Database as NOT threatened by ANY cause (“Habitat degradation/change”, “Habitat loss”, “Exploitation”, “Pollution”, or “Disease”)

## Modelled trends
trends.thr <- data.frame(year = plotyears)
lambdas.thr <- data.frame(year = plotyears)
 
for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    dat <- subset(dat, Species.ID %in% Unthreatspecies)
    trends.thr[, ii] <- apply(dat[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat2 <- all.lambdas[[i]]
    dat2 <- subset(dat2, Species.ID %in% Unthreatspecies)
    dat2[is.nan(dat2)] <- NA
    dat2[dat2 == Inf] <- NA
    lambdas.thr[, ii] <- apply(dat2[-c(1:3)],
                           MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.thr[is.nan(lambdas.thr)] <- NA
}

names(trends.thr) <- nms

obs <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obs$Trend.type <- factor(obs$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

mods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance") 
mods$Trend.type <- factor(mods$Trend.type,
                          levels=lvls[2:4],
                          labels=lbls[2:4])

allmods <- trends.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance':"Modelled.abundance") 
allmods$Trend.type <- factor(allmods$Trend.type,
                             levels=lvls,
                             labels=lbls)

names(lambdas.thr) <- nms

obslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          'Observed.abundance') 
obslam$Trend.type <- factor(obslam$Trend.type,
                                     levels=c("Observed.abundance"),
                                     labels=c("Observed trend"))

modslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                          "Modelled.habitat":"Modelled.abundance")
modslam$Trend.type <- factor(modslam$Trend.type,
                                     levels=lvls[2:4],
                               labels=lbls[2:4])

allmodslam <- lambdas.thr %>%
        # select(year, Modelled.habitat:Observed.abundance) %>%
        gather(key = "Trend.type", value = "index",
                         'Observed.abundance':"Modelled.abundance")
allmodslam$Trend.type <- factor(allmodslam$Trend.type,
                              levels=lvls,
                              labels=lbls)

g <- ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        ggtitle(paste0("Species NOT threatened by ANY cause (", 
                       nrow(lpd.Unthreat), 
                       " populations; ",
                       length(Unthreatspecies), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        scale_color_manual(values = cls, 
                           name = "Trend type") +
         ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

l <- ggplot(data=allmodslam,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obslam) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = modslam) +
        # scale_x_continuous(breaks = years) +
        scale_color_manual(values = cls) + 
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(g, l, ncol = 2, widths = c(1,1.5))

$\label{fig:figs}Species without listed threat$

Figure 5.3: Species without listed threat

5.2 IUCN Red List Status

#Load species traits and taxonomy
IUCNmamms <- read.csv("data/biodiversity/IUCN-RedList/MammalsJune2018.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, IUCNmamms, 
                by.x = 'Spec.ID', by.y = 'Species.ID', all.x=TRUE)

allspecies$qnt<- ifelse(allspecies$Red.List.status == "LC" | allspecies$Red.List.status == "NT", "Least Concern or Near Threatened", ifelse(
        allspecies$Red.List.status == "DD", "DD",  "Vulnerable or worse"))

## Modelled trends
plots <- list()
lplots <- list()
for (j in unique(allspecies$Red.List.status)) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(allspecies, Red.List.status == j)
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))
                    lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Red List Status: ", j,
                       " (", nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}

5.3 Diet

Subsets for herbivores, carnivores, and omnivores

#Load species traits and taxonomy
# Traits <- read.csv('H:/MyDocuments/Luca-Density/M_Traits.csv')
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

# Select data only
lpd.myspecies <- lpd %>%
    filter(Species.ID %in% observedspecies) %>%
    left_join(., Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by = c('Species.ID' ='tax_id'))

Herbs <- allspecies %>%
    filter(Diet == "Herbivore") %>%
    droplevels()
Carne <- allspecies %>%
    filter(Diet == "Carnivore")  %>%
    droplevels()
Omniv <- allspecies %>%
    filter(Diet == "Omnivore")  %>%
    droplevels()

## Modelled trends
trends.herbs <- data.frame(year = plotyears)
trends.carne <- data.frame(year = plotyears)
trends.omniv <- data.frame(year = plotyears)
lambdas.herbs <- data.frame(year = plotyears)

for (i in c(1:4)) {
    ii <- i+1
    dat <- all.trends[[i]]
    datH <- subset(dat, Species.ID %in% Herbs$Spec.ID)
    datC <- subset(dat, Species.ID %in% Carne$Spec.ID)
    datO <- subset(dat, Species.ID %in% Omniv$Spec.ID)
    trends.herbs[, ii] <- apply(datH[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    trends.carne[, ii] <- apply(datC[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    trends.omniv[, ii] <- apply(datO[-c(1:3)],
                             MARGIN = 2,
                             FUN = function(x) gm_mean(x))
    dat <- all.lambdas[[i]]
    datH <- subset(dat, Species.ID %in% Herbs$Spec.ID)
    # datC <- subset(dat, Species.ID %in% Carne$Spec.ID)
    # datO <- subset(dat, Species.ID %in% Omniv$Spec.ID)
    lambdas.herbs[, ii] <- apply(datH[-c(1:3)],
                             MARGIN = 2,
                           FUN = function(x) mean(x, na.rm = T))
    lambdas.herbs[is.nan(lambdas.herbs)] <- NA
    # lambdas.carne[, ii] <- apply(datC[-c(1:3)],
    #                          MARGIN = 2,
    #                          FUN = function(x) mean(x, na.rm = T))
    # lambdas.omniv[, ii] <- apply(datO[-c(1:3)],
    #                          MARGIN = 2,
    #                          FUN = function(x) mean(x, na.rm = T))
}

names(trends.herbs) <- nms
names(trends.carne) <- nms
names(trends.omniv) <- nms
names(lambdas.herbs) <- nms

trend_long_herbs <- gather(trends.herbs, key = "Trend.type", value = "trend", 
                          'Observed.abundance':'Modelled.abundance')
trend_long_carne <- gather(trends.carne, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')
trend_long_omniv <- gather(trends.omniv, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')
lambda_long_herbs <- gather(lambdas.herbs, key = "Trend.type", value = "trend",
                          'Observed.abundance':'Modelled.abundance')

trend_long_herbs$Trend.type <- factor(trend_long_herbs$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
trend_long_carne$Trend.type <- factor(trend_long_carne$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
trend_long_omniv$Trend.type <- factor(trend_long_omniv$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)
lambda_long_herbs$Trend.type <- factor(lambda_long_herbs$Trend.type, 
                                     levels=lvls,
                                     labels=lbls)

Table 5.1: Primary threat by diet group (# of populations)
	Disease	Exploitation	Habitat degradation/change	Habitat loss	Pollution
Carnivore	1	3	0	1	0
Herbivore	5	63	19	29	1
Omnivore	0	0	0	0	0

# h <- ggplot(data=trend_long_herbs,
#             aes(x=year, y=trend, colour=Trend.type )) +
#     geom_smooth(se=F, formula=y~x-1) +
#     scale_x_continuous(breaks = years) +
#     scale_y_continuous(minor_breaks = seq(0.25, 1.75, 0.025), 
#                        breaks = seq(0.2, 1.8, 0.1)) +
#     ggtitle(paste0("Trends for ", 
#                    nrow(Herbs), " African herbivores"))+
#     theme(plot.title = element_text(hjust = 0.5)) +
#     scale_color_discrete(name = "Trend type") +
#     theme_minimal()

herbs1 <- trend_long_herbs %>%
    filter(Trend.type %in% c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))
herbs2 <- lambda_long_herbs %>%
    filter(Trend.type %in% c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

h <- ggplot(data=herbs1,
            aes(x=year, y=trend, colour=Trend.type )) +
    geom_smooth(lwd=0.5, se=T, method = loess) +
    geom_point(cex=0.5) +
    # geom_line(cex=0.1) +
    geom_abline(intercept = 1, slope = 0, lty =2) +
    scale_color_manual(values=cls) +
    ggtitle(paste0("Herbivores (", 
                   nrow(Herbs), " species)"),
           "Geometric mean species abundance, relative to 1992") +
 ylab(expression(GMA[t]~"/"~GMA[1992])) +
    theme_minimal()+
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")

h2 <- ggplot(data=herbs2,
            aes(x=year, y=trend, colour=Trend.type )) +
    geom_smooth(lwd=0.5, se=T, method = loess) +
    geom_point(cex=0.5) +
    # geom_line(cex=0.1) +
    geom_abline(intercept = 1, slope = 0, lty =2) +
    scale_color_manual(values=cls) +
        ggtitle("", "Annual multiplicative change (lambda)") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0)) +
        labs(y = "lambda")

grid.arrange(h, h2, ncol = 2, widths = c(1,1.5))

carns1 <- trend_long_carne %>% 
    filter(Trend.type %in%  c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

c <- ggplot(data=carns1,
            aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Carnivores (", 
                       nrow(Carne), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        annotate("text", x=2004, y = 1.5, hjust = 1, cex = 3,
                 label = paste0("and ", unique(lpd[lpd$Species.ID %in% Carne$Spec.ID,
                                           "Common_name"])[1], ")")) +
        annotate("text", x=2004, y = 1.75, hjust = 1, cex = 3,
                 label = paste0("(", 
                                unique(lpd[lpd$Species.ID %in% Carne$Spec.ID, 
                                           "Common_name"])[2]))

omns1 <- trend_long_omniv %>%
    filter(Trend.type %in%  c("Observed trend", "Habitat model",
                             "Density model", "Combined model"))

o <- ggplot(data=omns1,
            aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Omnivores (", 
                       nrow(Omniv), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold")) +
        ylab(expression(GMA[t]~"/"~GMA[1992])) +
        annotate("text", x=2004, y = 1.15, hjust = 1, cex = 3,
                 label = paste0("(", 
                                lpd[lpd$Species.ID == Omniv$Spec.ID, "Common_name"][1], 
                                ")"))

grid.arrange(c, o, ncol = 2, widths = c(1,1.4))

# k <- median(allspecies$BM_g)

Trends for herbivores are not bad. Especially when inspecting the annual change (lambda), which is relatively on-point, except between 1995-1999. But again, does it make sense to interpret the fit between models and observed data after aggregating to the index, if predictions for individual species are not good?

NB: I didnt plot lambdas for carnivores/omnivores

5.4 Body size

#Load species traits and taxonomy
Traits <- read.csv("data/density/fromLuca/M_Traits.csv")

Spec   <- data.frame(Spec.ID = observedspecies)
allspecies <- merge(Spec, Traits[,c('tax_id', 'Order', 'Family', 'BM_g', 'Diet')], 
                by.x = 'Spec.ID', by.y = 'tax_id', all.x=TRUE)

props <- seq(0, 1, 0.25)

allspecies$qnt<- cut(allspecies$BM_g , breaks=quantile(allspecies$BM_g, props),
                                    labels=1:(length(props)-1), include.lowest=TRUE)

## Modelled trends
plots <- list()
lplots <- list()
for (j in 1:nlevels(allspecies$qnt)) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(allspecies, qnt == j)
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))
                    lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Species body mass ",
                       round(quantile(allspecies$BM_g, props)[j]/1000, 0), "-", 
                       round(quantile(allspecies$BM_g, props)[j+1]/1000, 0), 
                       "kg (",
                       nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_line(cex=0.1) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}

5.5 RMSE

RMSE1 <- RMSE[1:(nrow(RMSE)/3), ]
RMSE1$Spec.ID <- observedspecies
# ids <- as.numeric(rownames(subset(RMSE1, rmse < k)))  
# habspec <- observedspecies[ids[ids < 36]]

cuts <- c(0.05, 0.1, 0.2, 0.5)

## Modelled trends
plots <- list()
lplots <- list()
for (j in 1:4) {
        trends.set <- data.frame(year = plotyears)
        lambdas.set <- data.frame(year = plotyears)
        set <- subset(RMSE1, value < cuts[j])
        for (i in c(1:4)) {
                ii <- i+1
                dat <- all.trends[[i]]
                datS <- subset(dat, Species.ID %in% set$Spec.ID)
                trends.set[, ii] <- apply(datS[-c(1:3)],
                                          MARGIN = 2,
                                          FUN = function(x) gm_mean(x))
                dat <- all.lambdas[[i]]
                datH <- subset(dat, Species.ID %in% set$Spec.ID)
                lambdas.set[, ii] <- apply(datH[-c(1:3)],
                                           MARGIN = 2,
                                           FUN = function(x) mean(x, na.rm = T))        
                lambdas.set[is.nan(lambdas.set)] <- NA
        }
        names(trends.set) <- nms
        trends.set.long <- gather(trends.set, key = "Trend.type", value = "trend", 
                                  'Observed.abundance':'Modelled.abundance')
        trends.set.long$Trend.type <- factor(trends.set.long$Trend.type, 
                                             levels=lvls,
                                             labels=lbls)
        names(lambdas.set) <- nms
        lambdas.set.long <- gather(lambdas.set, key = "Trend.type", value = "trend", 
                                   'Observed.abundance':'Modelled.abundance')
        lambdas.set.long$Trend.type <- factor(lambdas.set.long$Trend.type, 
                                              levels=lvls,
                                              labels=lbls)

g <- ggplot(data=trends.set.long,
             aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle(paste0("Species model RMSE < ",
                       cuts[j], " (",
                       nrow(datS), " species)"),
                "Geometric mean species abundance, relative to 1992") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n") +
        ylab(expression(GMA[t]~"/"~GMA[1992]))

l <- ggplot(data=lambdas.set.long,
                      aes(x=year, y=trend, colour=Trend.type )) +
        geom_smooth(lwd=0.5, se=T, method = loess) +
        geom_point(cex=0.5) +
        geom_abline(intercept = 1, slope = 0, lty =2) +
        scale_color_manual(values=cls) +
        ggtitle("",
                "Annual multiplicative change (lambda)") +
        theme_minimal() +
        labs(y = "lambda") +
        theme(plot.title = element_text(hjust = 0, face = "bold"))

grid.arrange(g, l, ncol = 2, widths = c(1,1.4))

}

6 Species accuracy ranking

dt <- data.frame(Species = unique(lpd[lpd$Species.ID %in% observedspecies,
                                           "Common_name"])[-4],
                 Taxid = unique(lpd[lpd$Species.ID %in% observedspecies, 
                                            "Species.ID"]),
                 "LinearSlope_observed" = lmObs,
                 "LinearSlope_habitat" = lmHab,
                 "LinearSlope_density" = lmDens,
                 "LinearSlope_combined" = lmAbun,
                 'RMSE_habitat' = (rmseHab[, 1]),
                 'RMSE_density' = (rmseDens[, 1]),
                 'RMSE_combined' = (rmseAbun[, 1]),
                 "MAE_habitat" = (maeHab[, 1]),
                 'MAE_density' = (maeDens[, 1]),
                 "MAE_combined" = (maeAbun[, 1]))
dt <- dt[order(dt$'RMSE_habitat'), ]
rownames(dt) <- NULL

write.csv(dt, "outputs/Africa/Africa_goodness_of_fit_metrics.csv", row.names = F)

dt[, -c(1,2)] <- lapply(dt[, -c(1,2)], round, 2)
kable(dt, caption = "Mean species abundance error across years ordered by habitat-only RMSE")

Table 6.1: Mean species abundance error across years ordered by habitat-only RMSE
Species	Taxid	LinearSlope_observed	LinearSlope_habitat	LinearSlope_density	LinearSlope_combined	RMSE_habitat	RMSE_density	RMSE_combined	MAE_habitat	MAE_density	MAE_combined
Roan antelope	10167	0.01	0.00	0.00	0.01	0.01	0.17	0.18	0.01	0.14	0.15
Burchell’s zebra / Plains zebra / Common zebra	41013	0.01	0.00	-0.02	-0.02	0.02	0.06	0.10	0.01	0.05	0.07
Guereza / Magistrate colobus / Eastern black-and-white colobus	5143	0.01	0.00	0.00	0.00	0.02	0.06	0.07	0.02	0.05	0.06
Grey-cheeked mangabey	12309	0.01	0.00	-0.01	-0.01	0.02	0.05	0.05	0.02	0.04	0.04
African buffalo / Cape buffalo	21251	0.02	0.01	-0.02	-0.02	0.02	0.08	0.10	0.02	0.06	0.06
Giraffe	9194	-0.01	0.00	-0.02	-0.02	0.03	0.04	0.07	0.03	0.03	0.06
Greater kudu	22054	0.02	0.00	-0.02	-0.02	0.03	0.08	0.11	0.02	0.07	0.07
Impala / Black-faced impala	550	0.02	0.00	-0.03	-0.03	0.03	0.09	0.13	0.03	0.08	0.11
Nyala	22052	0.02	0.00	0.02	0.02	0.03	0.09	0.09	0.03	0.09	0.09
Sable antelope	10170	-0.01	0.01	0.00	0.01	0.04	0.19	0.20	0.03	0.17	0.18
Grant’s gazelle	8971	-0.02	0.00	-0.03	-0.03	0.05	0.13	0.13	0.04	0.12	0.12
Blue monkey	4221	0.01	0.00	-0.01	-0.01	0.05	0.12	0.11	0.05	0.09	0.08
Desert warthog / Cape warthog	41767	0.02	0.00	-0.02	-0.02	0.06	0.18	0.17	0.05	0.14	0.13
Southern white rhinoceros / Northern white rhinoceros	4185	0.03	0.00	-0.02	-0.02	0.06	0.09	0.11	0.06	0.08	0.09
White-bearded wildebeest / Blue wildebeest	5229	0.00	0.00	0.01	0.01	0.06	0.18	0.18	0.05	0.17	0.17
Kob	11036	-0.02	0.00	-0.02	-0.02	0.06	0.06	0.06	0.06	0.06	0.06
Common warthog / Desert warthog	41768	0.01	0.01	0.03	0.08	0.06	0.08	0.10	0.06	0.07	0.07
Sitatunga	22050	-0.01	-0.01	-0.01	-0.02	0.07	0.14	0.14	0.07	0.11	0.11
Thomson’s gazelle	8982	0.00	0.00	-0.03	-0.03	0.08	0.20	0.20	0.07	0.17	0.17
Bushpig	41770	0.06	0.00	-0.02	-0.02	0.09	0.17	0.18	0.08	0.12	0.13
Black rhinoceros	6557	-0.03	0.00	0.00	0.00	0.09	0.14	0.12	0.08	0.12	0.10
African lion / Asiatic lion	15951	0.06	0.00	-0.03	-0.03	0.10	0.21	0.21	0.08	0.16	0.15
Giant eland / Derby’s eland	44172	0.09	0.00	0.02	0.02	0.12	0.09	0.09	0.12	0.08	0.08
Mountain gorilla / Eastern lowland gorilla	39994	0.01	0.12	-0.01	0.10	0.13	0.16	0.23	0.12	0.15	0.18
Hartebeest / Swayne’s hartebeest	811	-0.03	0.01	-0.02	-0.02	0.13	0.14	0.16	0.10	0.11	0.13
Waterbuck	11035	0.08	0.01	-0.02	-0.02	0.14	0.19	0.20	0.13	0.17	0.18
Beisa / East African oryx	15571	0.00	0.00	-0.03	-0.03	0.16	0.31	0.30	0.14	0.28	0.27
Steenbok / Steenbuck	19308	-0.03	0.00	-0.01	-0.01	0.17	0.22	0.23	0.17	0.19	0.19
Common eland / Eland	22055	0.05	0.00	-0.02	-0.02	0.19	0.22	0.26	0.18	0.21	0.25
Common duiker / Grey duiker	21203	-0.05	0.00	0.03	0.03	0.19	0.28	0.28	0.18	0.22	0.22
Gerenuk	12142	-0.06	0.00	-0.03	-0.03	0.25	0.16	0.16	0.22	0.14	0.15
Cheetah	219	0.44	0.00	-0.03	-0.03	0.33	0.50	0.49	0.26	0.41	0.40
Bushbuck	22051	0.07	0.01	0.03	0.08	0.38	0.48	0.48	0.34	0.44	0.40
Klipspringer / Western klipspringer	15485	0.79	0.00	-0.03	-0.03	0.45	0.55	0.58	0.32	0.38	0.41
Hippopotamus	10103	1.73	0.09	-0.05	0.02	0.51	1.45	1.45	0.46	1.09	1.17

7 Conclusions so far

From these results, it seems that:

Including density-dynamics does not improve predictions of mammal abundance trends: there is not a great difference between model types (neither are very good). This is potentially due to the specification of the density model itself. Luca reran his model selection procedure on his African data, and this resulted in inclusion of annual mean temperature (amT) and precipitaion of the warmest quarter (Pwq) as environemntal predictors (compared to temperature seasonality for North America). With Africa centered on the equator, amT doesn’t actually vary that much over the study period, resulting in similar trend predictions between the habitat-only and habitat+density models.
There are still some obvious issues with the GLOBIO-4 landuse data (e.g., forests on the border between Egypt and Sudan). PBL will rerun the land allocation, but there isn’t an ETA for this. I may have to manually check whether my population coordinates are in obvious problem areas, but such visual inspection wouldn’t fully put me at ease about this.
I might need to look into what detirmines prediction accuracy for individual species, (i.e., is RMSE predictable from species traits, landscape features, known species history, etc.) Would like to discuss ideas for this…

Predicting mammal population trends using GLOBIO-4 in Africa

in 0.50 degree buffer around specific LPI coordinates – v2

Judith Ament

Monday 22 April 2019