1 Setup

## -- Packages -- ####
library(data.table)
library(tidyverse)
library(ggplot2)
library(knitr)
library(taRifx)
library(zoo)
library(mgcv)
library(Metrics) # rmse
library(grid)
library(gridExtra)
library(ggpubr)
library(boot)
library(ggsci)
library(ggsignif)

## -- Functions -- ####
gm_mean = function(x, na.rm=TRUE) {
    exp(sum(log(x[x > 0]), na.rm = na.rm) / length(x))
}

source("code/FUN_multiplot.R")

2 Data

Predictions in buffer around observed location
Species for which the exact geographic coordinates of the population abundance records in the Living Planet Database are known. A buffer with 0.5 degree radius was drawn around these coordinates and abundance trends were calculated for suitable habitat inside these buffers only.

buffer = "050"
# version = "v2-by-popid"
version = ""
# dir <- "outputs/Africa/Predictions/ESA-CCI-Buffer-05-byPop-SpeciesSpecific"
dir <- "outputs/Africa/Predictions/ESA-CCI-Buffer-05-byPop-SpeciesSpecific/"
params=list(lpdfile = "data/biodiversity/Africa/LPI_pops_Africa_spec_loc_observed.csv",
            habfile = paste0(dir, version,
                             "/Buffer-", buffer,
                             "-Total-Suitable-Area-by-Year-InSuit.csv"),
            densfile_insuit = paste0(dir, version,
                              "/Buffer-", buffer,
                              "-Mean-Density-by-Year-InSuit.csv"),
            abunfile_insuit = paste0(dir, version,
                              "/Buffer-", buffer,
                              "-Total-Abundance-by-Year-InSuit.csv"),
            densfile_allLUC = paste0(dir, version,
                              "/Buffer-", buffer,
                              "-Mean-Density-by-Year-allLcc.csv"),
            abunfile_allLUC = paste0(dir, version,
                              "/Buffer-", buffer,
                              "-Total-Abundance-by-Year-allLcc.csv"))
rm(buffer, version, dir)

## -- Read data -- ####
years <- c(1992:2013)
source("code/FUN_interpolate.R")
lpd <- read.csv(params$lpdfile, na.strings = "NULL")
lpd$myid <- c(1:nrow(lpd))

## BUFFER
raw.hab <- read.csv(params$habfile)
raw.dens.suit <- read.csv(params$densfile_insuit)
raw.abun.suit <- read.csv(params$abunfile_insuit)
raw.dens.allLU <- read.csv(params$densfile_allLUC)
raw.abun.allLU <- read.csv(params$abunfile_allLUC)

# trends.hab <- trends.hab[complete.cases(trends.hab), ]
# trends.dens.suit  <- trends.dens.suit [complete.cases(trends.dens.suit ), ]
# trends.abun.suit  <- trends.abun.suit [complete.cases(trends.abun.suit ), ]
# trends.dens.allLU <- trends.dens.allLU[complete.cases(trends.dens.allLU), ]
# trends.abun.allLU <- trends.abun.allLU[complete.cases(trends.abun.allLU), ]

# First column contains pop.id from LPI
names(raw.hab)[1] <- "Pop.ID"
names(raw.dens.suit )[1] <- "Pop.ID"
names(raw.abun.suit )[1] <- "Pop.ID"
names(raw.dens.allLU)[1] <- "Pop.ID"
names(raw.abun.allLU)[1] <- "Pop.ID"

# Populations for which we have all predictions
mypops <- raw.hab[raw.hab$X1992 >0, "Pop.ID"]
mypops <- mypops[mypops %in% raw.dens.suit [raw.dens.suit $X1992 >0, "Pop.ID"]]
mypops <- mypops[mypops %in% raw.abun.suit [raw.abun.suit $X1992 >0, "Pop.ID"]]
mypops <- mypops[mypops %in% raw.dens.allLU[raw.dens.allLU$X1992 >0, "Pop.ID"]]
mypops <- mypops[mypops %in% raw.abun.allLU[raw.abun.allLU$X1992 >0, "Pop.ID"]]

myspecies <- lpd$Species.ID[lpd$ID %in% mypops]

raw.hab        <- raw.hab %>% filter(Pop.ID %in% mypops) %>% mutate(Species.ID = myspecies)
raw.dens.suit  <- raw.dens.suit %>% filter(Pop.ID %in% mypops) %>% mutate(Species.ID = myspecies)
raw.abun.suit  <- raw.abun.suit %>% filter(Pop.ID %in% mypops) %>% mutate(Species.ID = myspecies)
raw.dens.allLU <- raw.dens.allLU %>% filter(Pop.ID %in% mypops) %>% mutate(Species.ID = myspecies)
raw.abun.allLU <- raw.abun.allLU %>% filter(Pop.ID %in% mypops) %>% mutate(Species.ID = myspecies)

# Select data only
lpd.data <- lpd %>%
    # filter(Species.ID %in% myspecies) %>%
    dplyr::select(ID, Binomial, Species.ID, Common_name, myid, X1950:X2015)

lpd.data$Binomial <- as.character(lpd.data$Binomial)

lpd.data[, -c(1:5)] <- apply(lpd.data[, -c(1:5)], 2, as.numeric)
colnames(lpd.data) <- c(colnames(lpd.data[1:5]), 1950:2015)

lpd.interpolated <- lpd.data
lpd.interpolated[, -c(1:5)] <- t(apply(lpd.data[, -c(1:5)], 1, 
                                interpolate))

lpd.interpolated[which(lpd.interpolated[, "1992"] == 0), "1992"] <- NA # first year cannot be 0
lpd.interpolated <- lpd.interpolated[!is.na(lpd.interpolated[, "1992"]), ] #first year cannot be NA

lpd.interpolated <- droplevels(lpd.interpolated)

# subset species
lpd.interpolated <- lpd.interpolated[lpd.interpolated$Species.ID %in% myspecies, ]

## -- Plot sample size over time ####
studyyears <- as.character(years)

l <- lpd.interpolated[studyyears]

# Get number of years data for each population (long-format)
t <- data.frame(Species.ID = lpd.interpolated$Species.ID, 
                # Pop.ID = lpd.interpolated$ID, 
                # nyears = rowSums(!is.na(l))) # Last data point == First NA-point -1
                nyears = max.col(is.na(l), "first")-1) # Last data point == First NA-point -1

# Group number of years per population by species; retain maximum
t2 <- t %>%
  dplyr::group_by(Species.ID) %>%
  dplyr::summarise(maxyear = max(nyears)+1991) # column 1 = 1992; 1 year of data -> maxyear == 1992 
  
df <- data.frame(year = years, ss = NA)
for (i in years) {
  df[df$year == i, "ss"] <- sum(t2$maxyear >= i)
}

ggplot(df, aes(x=year, y=ss))  +
        geom_vline(xintercept = 2005, col = "grey30", lty = 2) +
        geom_hline(yintercept = df[df$year == 2005, "ss"], col = "grey30", lty = 2) +
        # annotate("segment", x1=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        # annotate("rect", xmin=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        geom_point() +
        # geom_smooth(se=F) +
        xlab("Cutoff year") +
        ylab("Number of species in sample") +
        theme_minimal() +
        theme(axis.line.x = element_line(),
              axis.line.y = element_line())

## -- Plot sample size over time ####
studyyears <- as.character(years)

l <- lpd.interpolated[studyyears]

# Get number of years data for each population (long-format)
t <- data.frame(Pop.ID = lpd.interpolated$ID, 
                # Pop.ID = lpd.interpolated$ID, 
                # nyears = rowSums(!is.na(l))) # Last data point == First NA-point -1
                nyears = max.col(is.na(l), "first")-1) # Last data point == First NA-point -1

# Group number of years per population by species; retain maximum
t2 <- t %>%
  dplyr::group_by(Pop.ID) %>%
  dplyr::summarise(maxyear = max(nyears)+1991) # column 1 = 1992; 1 year of data -> maxyear == 1992 
  
df <- data.frame(year = years, ss = NA)
for (i in years) {
  df[df$year == i, "ss"] <- sum(t2$maxyear >= i)
}

ggplot(df, aes(x=year, y=ss))  +
        geom_vline(xintercept = 2005, col = "grey30", lty = 2) +
        geom_hline(yintercept = df[df$year == 2005, "ss"], col = "grey30", lty = 2) +
        # annotate("segment", x1=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        # annotate("rect", xmin=1992, xmax=2005, ymin=0, ymax=35, fill="grey30", alpha=0.5) +
        geom_point() +
        # geom_smooth(se=F) +
        xlab("Cutoff year") +
        ylab("Number of populations in sample") +
        theme_minimal() +
        theme(axis.line.x = element_line(),
              axis.line.y = element_line())

# Set years to the new range
years   <- c(1992:2005)
# years   <- c(1992, 1995, 2000, 2005)

# Subset years
lpd.interpolated <- lpd.interpolated[, c(1:4, which(names(lpd.interpolated) %in% years))]

lpd.interpolated <- lpd.interpolated[complete.cases(lpd.interpolated), ]

observedspecies <- unique(lpd.interpolated$Species.ID) # have already intersected with unique output species 
observedpops   <- unique(lpd.interpolated$ID)

selectyears <- function(x) {
df <- data.frame(Species.ID = x[["Species.ID"]], Pop.ID = x[["Pop.ID"]])
        for (y in years) {
                # if (y %in% names(x)) {
                        df[[as.character(y)]] <- x[[paste0("X", as.character(y))]]
                # } else {df[[as.character(y)]] <- NA}
        }
        return(df)
}

raw.hab        <- selectyears(raw.hab)
raw.dens.suit  <- selectyears(raw.dens.suit )
raw.abun.suit  <- selectyears(raw.abun.suit )
raw.dens.allLU <- selectyears(raw.dens.allLU)
raw.abun.allLU <- selectyears(raw.abun.allLU)

raw.hab        <- raw.hab %>% filter(Pop.ID %in% observedpops)
raw.dens.suit  <- raw.dens.suit  %>% filter(Pop.ID %in% observedpops) 
raw.abun.suit  <- raw.abun.suit  %>% filter(Pop.ID %in% observedpops)
raw.dens.allLU <- raw.dens.allLU %>% filter(Pop.ID %in% observedpops) 
raw.abun.allLU <- raw.abun.allLU %>% filter(Pop.ID %in% observedpops)

finalpops <- unique(raw.hab$Pop.ID)
finalspecies <- unique(raw.hab$Species.ID)

lpd.interpolated <- filter(lpd.interpolated, ID %in% finalpops)

3 Predictions of population trends

index.obs <- NULL
lambda.obs <- NULL
  
rmseHab <- NULL
rmseDens.s <- NULL
rmseAbun.s <- NULL
rmseDens.a <- NULL
rmseAbun.a <- NULL

biasHab <- NULL
biasDens.s <- NULL
biasAbun.s <- NULL
biasDens.a <- NULL
biasAbun.a <- NULL

lmObs <- NULL
lmHab <- NULL
lmDens.s <- NULL
lmAbun.s <- NULL
lmDens.a <- NULL
lmAbun.a <- NULL

lmObs.p <- NULL
lmHab.p <- NULL
lmDens.p.s <- NULL
lmAbun.p.s <- NULL
lmDens.p.a <- NULL
lmAbun.p.a <- NULL

plotyears <- c(1992:2005)

# source("code/FUN_getlambdas.R")

out <- list(raw.hab, raw.dens.allLU, raw.dens.suit, raw.abun.suit)
nms  <- c("year", 'Observed.abundance', 
          "Modelled.habitat", "Modelled.dens.allLU", 
          "Modelled.dens.insuit", "Modelled.abun.insuit") 
#, "Modelled.abun.allLU"
lvls <- c('Observed.abundance', 
          "Modelled.habitat", "Modelled.dens.allLU", 
          "Modelled.dens.insuit", "Modelled.abun.insuit") #, "Modelled.abun.allLU"
lbls <- c("Observed", 
          "ESH", "Density", 
          "Density in ESH", "Abundance in ESH") #, "Abundance model (all LU classes)"
# cls  <- c("#ff7f00", "#377eb8", "#4daf4a", "#984ea3", "#fc8d59", "#1f78b4")
cls3 <- pal_locuszoom()(4)[c(3,2,4)]
cls1 <- pal_jco()(3)[c(3)]
cls5 <- c(cls3, "#ef6548", "#1f78b4")
cls <- c(cls1, cls3, "#ef6548", "#1f78b4")[c(1,2,5,3,4)]
# scales::show_col(cls)

for (sp in finalpops) {         # sp = selected population
        
        ## Modelled trends
        
        index <- data.frame(year = years)
        lambdas <- data.frame(year = years)
        # print(sp)
        
        # OBSERVED TIMESERIES
        
        # lpd.d <- subset(lpd.interpolated, Species.ID == sp)
        lpd.d <- subset(lpd.interpolated, ID == sp)
        
        index$'Observed.abundance' = apply(lpd.d[,-c(1,2,3,4)],
                                            MARGIN = 2,
                                            FUN = function(x) {
                                                    # gm_mean accross populations:
                                                    # gm_mean(x)/gm_mean(y) == gm_mean(x/y)
                                                    if (all(is.na(x))) return(NA) 
                                                    else gm_mean(x/lpd.d[,5]) 
                                            })
        
        ## Observed lambdas
        temp <- list()
        for (pop in 1:nrow(lpd.d)){
                datpop <- as.numeric(lpd.d[pop, -c(1,2,3,4)])
                # temp[[pop]] <- as.numeric(getlambdas(datpop))
                temp[[pop]] <- diff(log10(datpop))
        }
        test <- as.data.frame(do.call(rbind, temp))
        names(test) <- plotyears[2:length(plotyears)]
        lambdas$'Observed.abundance' <- c(NA, colMeans(test, na.rm = T))
        
        # MODELLED TIMESERIES
        
        for (i in 1:length(out)) {
                ii <- i+2
                
                dat <- as.data.frame(out[[i]])
                dat <- subset(dat, Pop.ID == sp)
                
                index[, ii] <- apply(dat[-which(names(dat)%in% c("Species.ID", "Pop.ID"))],
                                      MARGIN = 2, # over each year (i.e. column)
                                      FUN = function(x) gm_mean(x/dat[,3]))
                
                dat <- dat[-which(names(dat)%in% c("Species.ID", "Pop.ID"))]
                temp <- list()
                for (p in 1:nrow(dat)) {
                        if (all(is.na(dat[p, ]))) {
                                temp[[p]] <- rep(NA, 4) # 14 for full window
                        } else {
                                names(dat[p, ]) <- years
                                datpop <- apply(dat[p, ], 2, as.numeric)
                                temp[[p]] <- diff(log10(datpop))
                        }
                }
                test <- as.data.frame(do.call(rbind, temp))
                # names(test) <- 1993:2005
                names(test) <- plotyears[2:length(plotyears)]
                lambdas[, ii] <- c(NA, colMeans(test, na.rm = T))
        }
        
        names(index) <- nms
        names(lambdas) <- nms
        
        index[is.na(index$Observed.abundance), 3:length(index)] <- NA
        lambdas[is.na(lambdas$Observed.abundance), 3:length(index)] <- NA
        
        # Key to not compare interpolated trends (can only be linear)
        index <- subset(index, year %in% plotyears)
        lambdas <- subset(lambdas, year %in% plotyears)
        
        ## Row bind trends for all species
        t1 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Observed.abundance))
        t2 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Modelled.habitat))
        t3 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Modelled.dens.insuit))
        t4 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Modelled.abun.insuit))
        t5 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Modelled.dens.allLU))
        # t6 <- cbind(lpd.d[1, c(1,2,3,4)], t(index$Modelled.abun.allLU))
        # t4 <- cbind(lpd.d[1,c(1,2,3)], t(trends$`Temperature.(SD)`))
        # t5 <- cbind(lpd.d[1,c(1,2,3)], t(trends$`Precipitation.(WQ)`))
        
        if (is.null(index.obs)) {
                index.obs <- t1
                index.hab <- t2
                index.dens.insuit <- t3
                index.abun.insuit <- t4
                index.dens.allLU <- t5
                # index.abun.allLU <- t6
        } else {
                index.obs               <- rbind(index.obs, t1)
                index.hab               <- rbind(index.hab, t2)
                index.dens.insuit       <- rbind(index.dens.insuit, t3)
                index.abun.insuit       <- rbind(index.abun.insuit, t4)
                index.dens.allLU        <- rbind(index.dens.allLU, t5)
                # index.abun.allLU        <- rbind(index.abun.allLU, t6)
        }
        
        ## Row bind lambdas for all species
        l1 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Observed.abundance))
        l2 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Modelled.habitat))
        l3 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Modelled.dens.insuit))
        l4 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Modelled.abun.insuit))
        l5 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Modelled.dens.allLU))
        # l6 <- cbind(lpd.d[1, c(1,2,3,4)], t(lambdas$Modelled.abun.allLU))
        
        if (is.null(lambda.obs)) {
                lambda.obs <- l1
                lambda.hab <- l2
                lambda.dens.insuit <- l3
                lambda.abun.insuit <- l4
                lambda.dens.allLU <- l5
                # lambda.abun.allLU <- l6
        } else {
                lambda.obs <- rbind(lambda.obs, l1)
                lambda.hab <- rbind(lambda.hab, l2)
                lambda.dens.insuit <- rbind(lambda.dens.insuit, l3)
                lambda.abun.insuit <- rbind(lambda.abun.insuit, l4)
                lambda.dens.allLU  <- rbind(lambda.dens.allLU, l5)
                # lambda.abun.allLU  <- rbind(lambda.abun.allLU, l6)
        }
        
        # -- calculate validation metrics
        
        llambdas <- lambdas[-1, ]  # first year doesnt have lambda
        
        if (is.null(biasHab)) {
                biasHab     <- -bias(llambdas$Observed.abundance, llambdas$Modelled.habitat)
                biasDens.s  <- -bias(llambdas$Observed.abundance, llambdas$Modelled.dens.insuit)
                biasAbun.s  <- -bias(llambdas$Observed.abundance, llambdas$Modelled.abun.insuit)
                biasDens.a  <- -bias(llambdas$Observed.abundance, llambdas$Modelled.dens.allLU)
                # biasAbun.a  <- -bias(llambdas$Observed.abundance, llambdas$Modelled.abun.allLU)
        } else {
                biasHab    <- c(biasHab,    -bias(llambdas$Observed.abundance, llambdas$Modelled.habitat))
                biasDens.s <- c(biasDens.s, -bias(llambdas$Observed.abundance, llambdas$Modelled.dens.insuit))
                biasAbun.s <- c(biasAbun.s, -bias(llambdas$Observed.abundance, llambdas$Modelled.abun.insuit))
                biasDens.a <- c(biasDens.a, -bias(llambdas$Observed.abundance, llambdas$Modelled.dens.allLU))
                # biasAbun.a <- c(biasAbun.a, -bias(llambdas$Observed.abundance, llambdas$Modelled.abun.allLU))
        }
        
        if (is.null(rmseHab)) {
                rmseHab     <- rmse(llambdas$Observed.abundance, llambdas$Modelled.habitat)
                rmseDens.s  <- rmse(llambdas$Observed.abundance, llambdas$Modelled.dens.insuit)
                rmseAbun.s  <- rmse(llambdas$Observed.abundance, llambdas$Modelled.abun.insuit)
                rmseDens.a  <- rmse(llambdas$Observed.abundance, llambdas$Modelled.dens.allLU)
                # rmseAbun.a  <- rmse(llambdas$Observed.abundance, llambdas$Modelled.abun.allLU)
        } else {
                rmseHab    <- c(rmseHab,    rmse(llambdas$Observed.abundance, llambdas$Modelled.habitat))
                rmseDens.s <- c(rmseDens.s, rmse(llambdas$Observed.abundance, llambdas$Modelled.dens.insuit))
                rmseAbun.s <- c(rmseAbun.s, rmse(llambdas$Observed.abundance, llambdas$Modelled.abun.insuit))
                rmseDens.a <- c(rmseDens.a, rmse(llambdas$Observed.abundance, llambdas$Modelled.dens.allLU))
                # rmseAbun.a <- c(rmseAbun.a, rmse(llambdas$Observed.abundance, llambdas$Modelled.abun.allLU))
        }
        
        # if (is.null(rmseHab)) {
        #         rmseHab     <- rmse(index$Observed.abundance, index$Modelled.habitat)
        #         rmseDens.s  <- rmse(index$Observed.abundance, index$Modelled.dens.insuit)
        #         rmseAbun.s  <- rmse(index$Observed.abundance, index$Modelled.abun.insuit)
        #         rmseDens.a  <- rmse(index$Observed.abundance, index$Modelled.dens.allLU)
        #         # rmseAbun.a  <- rmse(index$Observed.abundance, index$Modelled.abun.allLU)
        # } else {
        #         rmseHab    <- c(rmseHab,    rmse(index$Observed.abundance, index$Modelled.habitat))
        #         rmseDens.s <- c(rmseDens.s, rmse(index$Observed.abundance, index$Modelled.dens.insuit))
        #         rmseAbun.s <- c(rmseAbun.s, rmse(index$Observed.abundance, index$Modelled.abun.insuit))
        #         rmseDens.a <- c(rmseDens.a, rmse(index$Observed.abundance, index$Modelled.dens.allLU))
        #         # rmseAbun.a <- c(rmseAbun.a, rmse(index$Observed.abundance, index$Modelled.abun.allLU))
        # }
        
        # -- linear trends
        if (is.null(lmObs)) {
                lmObs    <- lm(index$Observed.abundance~plotyears)$coefficients[[2]]
                lmHab    <- lm(index$Modelled.habitat~plotyears)$coefficients[[2]]
                lmDens.s <- lm(index$Modelled.dens.insuit~plotyears)$coefficients[[2]]
                lmAbun.s <- lm(index$Modelled.abun.insuit~plotyears)$coefficients[[2]]
                lmDens.a <- lm(index$Modelled.dens.allLU~plotyears)$coefficients[[2]]
                # lmAbun.a <- lm(index$Modelled.abun.allLU~plotyears)$coefficients[[2]]
                 
        } else {
                lmObs    <- c(lmObs,    lm(index$Observed.abundance~plotyears)$coefficients[[2]])
                lmHab    <- c(lmHab,    lm(index$Modelled.habitat~plotyears)$coefficients[[2]])
                lmDens.s <- c(lmDens.s, lm(index$Modelled.dens.insuit~plotyears)$coefficients[[2]])
                lmAbun.s <- c(lmAbun.s, lm(index$Modelled.abun.insuit~plotyears)$coefficients[[2]])
                lmDens.a <- c(lmDens.a, lm(index$Modelled.dens.allLU~plotyears)$coefficients[[2]])
                # lmAbun.a <- c(lmAbun.a, lm(index$Modelled.abun.allLU~plotyears)$coefficients[[2]])
        }
        
        # -- p-values of linear trends
        if (is.null(lmObs.p)) {
                lmObs.p    <- summary(lm(index$Observed.abundance~plotyears))$coefficients[2,4]
                lmHab.p    <- summary(lm(index$Modelled.habitat~plotyears))$coefficients[2,4]
                lmDens.p.s <- summary(lm(index$Modelled.dens.insuit~plotyears))$coefficients[2,4]
                lmAbun.p.s <- summary(lm(index$Modelled.abun.insuit~plotyears))$coefficients[2,4]
                lmDens.p.a <- summary(lm(index$Modelled.dens.allLU~plotyears))$coefficients[2,4]
                # lmAbun.p.a <- summary(lm(index$Modelled.abun.allLU~plotyears))$coefficients[2,4]
        } else {
                lmObs.p    <- c(lmObs.p,    summary(lm(index$Observed.abundance~plotyears))$coefficients[2,4])
                lmHab.p    <- c(lmHab.p,    summary(lm(index$Modelled.habitat~plotyears))$coefficients[2,4])
                lmDens.p.s <- c(lmDens.p.s, summary(lm(index$Modelled.dens.insuit~plotyears))$coefficients[2,4])
                lmAbun.p.s <- c(lmAbun.p.s, summary(lm(index$Modelled.abun.insuit~plotyears))$coefficients[2,4])
                lmDens.p.a <- c(lmDens.p.a, summary(lm(index$Modelled.dens.allLU~plotyears))$coefficients[2,4])
                # lmAbun.p.a <- c(lmAbun.p.a, summary(lm(index$Modelled.abun.allLU~plotyears))$coefficients[2,4])
        }
        
        ## Trends
        index_long <- gather(index, key = "Index.type", value = "index",
                                   nms[2:6])
        
        index_long$Index.type <- factor(index_long$Index.type,
                                              levels=lvls,
                                              labels=lbls)
        
        g <- ggplot(data=index_long,
                    aes(x=year, y=index, colour=Index.type)) +
                geom_smooth(lwd=0.5, se=F, method = loess) +
                geom_point(cex=0.5) +
                scale_color_manual(values = cls) +
                # scale_x_continuous(breaks = years) +
                ggtitle(paste0(lpd.d$Common_name[1]),#" (n = ", #lpd.d$Binomial[1],
                               # nrow(lpd.d), ")"),
                        "Population index relative to 1992") +
                labs(y = "Index") +
                # labs(y = expression(GMA[t]~"/"~GMA[1992])) +
                theme_minimal() +
                theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "n")
        
        ## Rate of change (lambda)
        lambdas_data_long <- gather(lambdas, key = "Lambda.type", value = "lambda",
                                    nms[2:6])
        
        lambdas_data_long$Lambda.type <- factor(lambdas_data_long$Lambda.type,
                                                levels=lvls,
                                                labels=lbls)
        
        l <- ggplot(data=lambdas_data_long,
                    aes(x=year, y=lambda, colour=Lambda.type)) +
                geom_smooth(lwd=0.5, se=F, method = loess) +
                geom_point(cex=0.5) +
                # scale_x_continuous(breaks = years) +
                scale_color_manual(values = cls) +
                ggtitle("",
                        expression("Annual multiplicative change"~(lambda))) +
                labs(y = expression(lambda)) +
                theme_minimal() +
                theme(plot.title = element_text(hjust = 0, face = "bold"), 
                      legend.position = "right") +
                labs(colour = "")
        
        grid.arrange(g, l, ncol = 2, widths = c(1,1.5))
}
    
all.indices <- list(index.obs, index.hab, index.dens.allLU, 
                    index.dens.insuit, index.abun.insuit)
all.lambdas <- list(lambda.obs, lambda.hab, lambda.dens.allLU, 
                    lambda.dens.insuit, lambda.abun.insuit)

rm(l, g, dat, index, lambdas, sp, lpd.d, temp, test, datpop, i, ii, llambdas, 
   lambdas_data_long, index_long, p, pop)
rm(index.obs, index.hab, index.dens.allLU, index.abun.insuit, index.dens.insuit)
rm(lambda.obs, lambda.hab, lambda.dens.allLU, lambda.abun.insuit, lambda.dens.insuit)
rm(l1, l2, l3, l4, l5)
rm(t1, t2, t3, t4, t5)
rm(params, cls1, cls3, cls5)

4 Aggregating to multi-species index

Trends in geometric mean abundance across all species
With bootstrapped confidence intervals (10000 random samples of species)

# Bootstrap 95% CI for regression coefficients 

# function to obtain regression weights 
bs <- function(data, indices) {
  d <- data[indices,] # allows boot to select sample 
  gma <- apply(d[, -c(1,2)],
                          MARGIN = 2,
                          FUN = function(x) mean(x))
  return(gma) 
}

# is.nan.data.frame <- function(x) {
# do.call(cbind, lapply(x, is.nan)) }

R <- 10000 # number of bootstrap samples

## Modelled trends
lambdas <- data.frame(year = plotyears)
indices <- data.frame(year = years)
std.err <- data.frame(year = plotyears)
indices.low <- data.frame(year = plotyears)
indices.upp <- data.frame(year = plotyears)
lambdas.low <- data.frame(year = plotyears)
lambdas.upp <- data.frame(year = plotyears)

# betas <- data.frame(row.names = lvls, "index" = NA, 
#                     "lower" = NA, "upper" = NA, rwnms = lvls)
# betas <- betas[, -which(names(betas) == "rwnms")]

for (i in c(1:length(all.lambdas))) {
    dat <- all.lambdas[[i]]
    # dat <- subset(dat, Species.ID %in% observedspecies)
    # dat <- subset(dat, ID %in% finalpops)
    names(dat)[5:length(dat)] <- years
    ii <- i+1
    
    # Average lambdas by species: average across populations
    dat.avg <- dat %>% group_by(Species.ID) %>% summarise_at(vars(as.character(years)), mean, na.rm=T)
    
    # Bootstrapping lambdas with 1000 replications: average across species
    results <- boot(data=dat.avg, statistic=bs, R=R)
    
    lambdas[1, ii] <- 0
    lambdas[2:14, ii] <- results$t0
    
    indices[1, ii] <- 1
    for (t in c(2:14)) {
            indices[t, ii] <- indices[(t-1), ii]*(10^(lambdas[t, ii]))
    }
    
    # get 95% CI of the mean lambda across speies
    lambdas.low[1, ii] <- 0
    lambdas.upp[1, ii] <- 0
    for (y in 1:13) {
            lambdas.low[y+1, ii] <- boot.ci(results, type="norm", index=y)$normal[2]
            lambdas.upp[y+1, ii] <- boot.ci(results, type="norm", index=y)$normal[3]
    }
    
    # Convert 95% CI to index scale
    indices.low[1, ii] <- 1
    indices.upp[1, ii] <- 1
    for (t in c(2:14)) {
           indices.low[t, ii] <- indices.low[(t-1), ii]*(10^(lambdas.low[t, ii]))
           indices.upp[t, ii] <- indices.upp[(t-1), ii]*(10^(lambdas.upp[t, ii]))
    }
    
    std.err.lambda <- c(0, apply(results$t, 2, sd))
    std.err[1, ii] <- 1
    for (t in c(2:14)) {
            std.err[t, ii] <- std.err[(t-1), ii]*(10^(std.err.lambda[t]))
    }
}

names(lambdas) <- nms
names(indices) <- nms
names(std.err) <- nms
names(indices.low) <- nms
names(indices.upp) <- nms
names(lambdas.low) <- nms
names(lambdas.upp) <- nms

indices_long <- gather(indices, key = "key", value = "value", nms[2:6])
indices_long$key <- factor(indices_long$key, levels=lvls, labels=lbls)

std.err_long <- gather(std.err, key = "key", value = "value", nms[2:6])
std.err_long$key <- factor(std.err_long$key, levels=lvls, labels=lbls)

# Transpose Confidence Intervals to long format
ci.low.long <- gather(indices.low, key = "key", value = "value", nms[2:6])
ci.low.long$key <- factor(ci.low.long$key, levels=lvls, labels=lbls)

ci.upp.long <- gather(indices.upp, key = "key", value = "value", nms[2:6])
ci.upp.long$key <- factor(ci.upp.long$key, levels=lvls, labels=lbls)

indices_long <- cbind(indices_long, std.err_long$value, ci.low.long$value, ci.upp.long$value)
names(indices_long) <- c("year", "key", "value", "se", "lower", "upper")

allmods <- indices %>% gather(key = "key", value = "index", nms[2:6])
allmods$key <- factor(allmods$key, levels=lvls, labels=lbls)

indices.observed <- indices_long %>% filter(key == "Observed") %>%
        dplyr::select(., -key)

indices.modelled <- indices_long %>% filter(key != "Observed")

rm(ci.upp.long, ci.low.long, std.err_long)

dt.plot.obs <- indices.observed
dt.plot.mod <- indices.modelled

ggplot(data = dt.plot.mod, aes(x=year, y=value)) +
  
        # OBSERVED
        geom_point(data = dt.plot.obs, color = cls[1]) +
        geom_errorbar(data = dt.plot.obs, color = cls[1], width = 0.2,
                      aes(ymin = value-((lower-value)/(-1.96)), 
                          ymax = value+((upper-value)/(1.96)))) +
        geom_line(data = dt.plot.obs, lwd=0.5, color = cls[1]) +
        geom_ribbon(data = dt.plot.obs, lwd=0.5,
                    aes(x = year,
                        ymin = lower,
                        ymax = upper),
                    # ymin = index-1.96*se,
                    # ymax = index+1.96*se),
                    color = alpha(cls[1], 0),
                    fill = alpha(cls[1], 0.2)) +
        
        # MODELS
        geom_point(aes(colour=key, group=key)) +
        # geom_errorbar(aes(ymin=value-((lower-value)/(-1.96)),
        #                   ymax=value+((upper-value)/(1.96)),
        #                   width = 0.2, colour=key, group=key)) +
        # geom_errorbar(aes(ymin=index-se, ymax=index+se))
        geom_line(lwd=0.5, aes(colour=key, group=key)) +
        # geom_ribbon(lwd=0.5,
        #             aes(x = year,
        #                 ymin = lower,
        #                 ymax = upper,
        #                 fill = key,
        #                 group = key,
        #                 colour = key),
        #             color = alpha(1, 0)) +
        # ylim(c(0, 2.5)) +

        scale_colour_manual(values = cls, drop = F) +
        scale_fill_manual(values = alpha(cls, 0.2), drop = F) +
        facet_wrap(~key) +

        ggtitle(paste0("Change in ", length(finalpops)," populations of ",
                       length(observedspecies),
                       " African mammals"),
                "Geometric mean species abundance, relative to 1992") +
        # ylab(expression(GMA[t]~"/"~GMA[1992])) +
        ylab("Multi-species index") +
        # scale_x_continuous(breaks = c(1992:2005)) +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0, face = "bold"), legend.position = "top") +
        labs(colour = "", fill = "")

5 Validation metrics

RMSE <- rbind(data.frame(value = rmseHab,    model = lbls[2], popid = observedpops),
              data.frame(value = rmseDens.a, model = lbls[3], popid = observedpops),
              data.frame(value = rmseDens.s, model = lbls[4], popid = observedpops),
              data.frame(value = rmseAbun.s, model = lbls[5], popid = observedpops))

pvalsDa <- t.test(filter(RMSE, model == lbls[2])$value, 
                  filter(RMSE, model == lbls[3])$value)$p.value
pvalsDs <- t.test(filter(RMSE, model == lbls[2])$value, 
                  filter(RMSE, model == lbls[4])$value)$p.value
pvalsAs <- t.test(filter(RMSE, model == lbls[2])$value, 
                  filter(RMSE, model == lbls[5])$value)$p.value

rm(list=ls(pattern = "rmse"))

BIAS <- rbind(data.frame(value = biasHab,    model = lbls[2], popid = observedpops),
              data.frame(value = biasDens.a, model = lbls[3], popid = observedpops),
              data.frame(value = biasDens.s, model = lbls[4], popid = observedpops),
              data.frame(value = biasAbun.s, model = lbls[5], popid = observedpops))

pvals1 <- wilcox.test(filter(BIAS, model == lbls[3])$value, 
                      filter(BIAS, model == lbls[2])$value)$p.value
pvals2 <- wilcox.test(filter(BIAS, model == lbls[4])$value, 
                      filter(BIAS, model == lbls[2])$value)$p.value
pvals3 <- wilcox.test(filter(BIAS, model == lbls[5])$value, 
                      filter(BIAS, model == lbls[2])$value)$p.value

pvalsH  <- wilcox.test(filter(BIAS, model == lbls[2])$value)$p.value
pvalsDa <- wilcox.test(filter(BIAS, model == lbls[3])$value)$p.value
pvalsDs <- wilcox.test(filter(BIAS, model == lbls[4])$value)$p.value
pvalsAs <- wilcox.test(filter(BIAS, model == lbls[5])$value)$p.value

rm(list=ls(pattern = "bias"))

RMSE$metric <- "RMSE"
BIAS$metric <- "Bias"

vm <- rbind(RMSE, BIAS)
 
# Visualize: Specify the comparisons you want
my_comparisons <- list( c(lbls[2], lbls[3]),
                        c(lbls[2], lbls[4]), 
                        c(lbls[2], lbls[5]),
                        c(lbls[2], lbls[5])) # add one to overcome plotting issue

ytmp <- as.data.frame(rbind(range(filter(vm, metric == "RMSE")$value),
                   range(filter(vm, metric == "Bias")$value)))
yrange <- (ytmp[, 2]-ytmp[, 1])

ymax <- c(max(filter(vm, metric == "RMSE")$value),
                    max(filter(vm, metric == "Bias")$value))
ymin <- c(min(filter(vm, metric == "RMSE")$value),
                    min(filter(vm, metric == "Bias")$value))

newymax <- round(ymax+(0.01*yrange), 1)
newymin <- round(ymin-(0.1*yrange), 1)
laby <- ymin-(0.05*yrange)

dat_text <- data.frame(metric = c(rep("RMSE", 4), rep("Bias", 4)), 
                       model = rep(levels(vm$model), 2),
                       label = c(wilcox.test(filter(RMSE, model == lbls[2])$value)$p.value,
                                 wilcox.test(filter(RMSE, model == lbls[3])$value)$p.value,
                                 wilcox.test(filter(RMSE, model == lbls[4])$value)$p.value,
                                 wilcox.test(filter(RMSE, model == lbls[5])$value)$p.value,
                                 wilcox.test(filter(BIAS, model == lbls[2])$value)$p.value,
                                 wilcox.test(filter(BIAS, model == lbls[3])$value)$p.value,
                                 wilcox.test(filter(BIAS, model == lbls[4])$value)$p.value,
                                 wilcox.test(filter(BIAS, model == lbls[5])$value)$p.value),
                       value = c(rbind(laby, laby, laby, laby)))

ranges <- data.frame(ymax = newymax,
                     # ymin = c(0.5, 0, -.75, 0.1),
                     ymin = newymin,
                     metric = c("RMSE", "Bias"),
                     model = "ESH")
ranges <- gather(ranges, "key", "value", -metric, -model) %>% dplyr::select(., -key)

tip <- yrange*0.1

Bias and RMSE

ggplot(vm, aes(x = model, y = value, color = model)) +
        facet_wrap(~metric, scales = "free_y") +
        xlab("") +
        geom_boxplot() + 
        geom_jitter(shape = 1) +
        scale_colour_manual(values = cls[2:5], drop = F) +
        geom_hline(aes(yintercept=0), lty = 2) +
        theme_bw() +
        theme(axis.text.x = element_blank(), 
              # axis.text.x = element_text(angle = 0, hjust = .5), 
              axis.title.x = element_blank(),
              strip.background = element_rect(fill="white", colour = "white"),
              legend.position = "top") +
        scale_y_continuous(name = "") +
        
        geom_text(data = dat_text, aes(y=value, label = sprintf("p = %.2g", label)),
                  color = 1) +
        geom_blank(data = ranges) +
        
        geom_signif(comparisons = my_comparisons, test = "wilcox.test",
                    test.args = list(
                            # alternative = "less",
                            paired = T),
                    # label = "p.format",
                    step_increase = 0.15,
                    margin_top = 0.1, tip_length = 0.05,
                    color = rep(c(
                            alpha(rep("black", 3), 1),
                            alpha(rep("black", 3), 1),
                            alpha(rep("black", 3), 1),
                            alpha(rep("black", 3), 0)), 2),
                    size = 0.25, vjust = -0.5,
                    map_signif_level=function(p)sprintf("p = %.2g", p))

        # stat_compare_means(comparisons = my_comparisons, method = "wilcox.test",
        #                    # ref.group = "Habitat model",
        #                    label = "p.format",
        #                    inherit.aes = F,
        #                    paired = T,
        #                    aes(color = rep(c(
        #                            rep("black", 3),
        #                            rep("black", 3),
        #                            alpha(rep("black", 3), 0),
        #                            rep("black", 3)), 4)),
        #                    # method.args = list(alternative = "greater"),
        #                    tip.length = 0.05, bracket.size = 0.25)

Sign of Linear Trends

linear.trends <- as.data.frame(cbind(lmObs, lmHab, 
                                     lmDens.a, lmDens.s, lmAbun.s))
# row.names(linear.trends) <- observedspecies
row.names(linear.trends) <- finalpops
colnames(linear.trends)  <- nms[2:6]
linear.pvals <- as.data.frame(cbind(lmObs.p, lmHab.p,
                                    lmDens.p.a, lmDens.p.s, lmAbun.p.s))
# row.names(linear.pvals) <- observedspecies
row.names(linear.pvals) <- finalpops
colnames(linear.pvals)  <- nms[2:6]
 
slopes <- linear.trends
# linear.trends[linear.pvals > 0.05] <- 0

signs <- sign(linear.trends)

S <- data.frame(row.names = c("Correct", 
                              "Underestimate", "Missed +", "False -", "Opposing -", 
                              "Overestimate",  "Missed -", "False +", "Opposing +"))
S[[nms[3]]] <- NA
S[[nms[4]]] <- NA
S[[nms[5]]] <- NA
S[[nms[6]]] <- NA

for (m in nms[3:6]) {
  S["Correct", m] <- sum(signs[[nms[2]]] == signs[[m]])/length(observedpops)
  S["Underestimate", m] <- sum(signs[[nms[2]]] > signs[[m]])/length(observedpops)
  S["Missed +", m] <- sum(signs[[nms[2]]] == 1 & signs[[m]] == 0)/length(observedpops)
  S["False -", m] <- sum(signs[[nms[2]]] == 0 & signs[[m]] == -1)/length(observedpops)
  S["Opposing -", m] <- sum(signs[[nms[2]]] == 1 & signs[[m]] == -1)/length(observedpops)
  S["Overestimate", m] <- sum(signs[[nms[2]]] < signs[[m]])/length(observedpops)
  S["Missed -", m] <- sum(signs[[nms[2]]] == -1 & signs[[m]] == 0)/length(observedpops)
  S["False +", m] <- sum(signs[[nms[2]]] == 0 & signs[[m]] == 1)/length(observedpops)
  S["Opposing +", m] <- sum(signs[[nms[2]]] == -1 & signs[[m]] == 1)/length(observedpops)
}

names(S) <- lbls[2:5]
 
kable(round(S[c(1,2,6), ],2), caption = "Simple classification of sign of population-level linear trends (proportion)")

Table 5.1: Simple classification of sign of population-level linear trends (proportion)
	ESH	Density	Density in ESH	Abundance in ESH
Correct	0.37	0.47	0.47	0.43
Underestimate	0.33	0.13	0.13	0.13
Overestimate	0.30	0.40	0.40	0.43

# kable(round(S,2), caption = "Full classification of sign of population-level linear trends (proportion)")

write.csv(S, "outputs/Africa/SpeciesSpecific-linear-slopes-sign-ESACCI.csv")

Trends significant at p<0.05 only

linear.trends[linear.pvals > 0.05] <- 0

signs <- sign(linear.trends)

S <- data.frame(row.names = c("Correct", 
                              "Underestimate", "Missed +", "False -", "Opposing -", 
                              "Overestimate",  "Missed -", "False +", "Opposing +"))
S[[nms[3]]] <- NA
S[[nms[4]]] <- NA
S[[nms[5]]] <- NA
S[[nms[6]]] <- NA

for (m in nms[3:6]) {
  S["Correct", m] <- sum(signs[[nms[2]]] == signs[[m]])/length(observedpops)
  S["Underestimate", m] <- sum(signs[[nms[2]]] > signs[[m]])/length(observedpops)
  S["Missed +", m] <- sum(signs[[nms[2]]] == 1 & signs[[m]] == 0)/length(observedpops)
  S["False -", m] <- sum(signs[[nms[2]]] == 0 & signs[[m]] == -1)/length(observedpops)
  S["Opposing -", m] <- sum(signs[[nms[2]]] == 1 & signs[[m]] == -1)/length(observedpops)
  S["Overestimate", m] <- sum(signs[[nms[2]]] < signs[[m]])/length(observedpops)
  S["Missed -", m] <- sum(signs[[nms[2]]] == -1 & signs[[m]] == 0)/length(observedpops)
  S["False +", m] <- sum(signs[[nms[2]]] == 0 & signs[[m]] == 1)/length(observedpops)
  S["Opposing +", m] <- sum(signs[[nms[2]]] == -1 & signs[[m]] == 1)/length(observedpops)
}

names(S) <- lbls[2:5]
 
kable(round(S[c(1,2,6), ],2), caption = "Simple classification of sign of population-level linear trends (proportion)")

Table 5.2: Simple classification of sign of population-level linear trends (proportion)
	ESH	Density	Density in ESH	Abundance in ESH
Correct	0.37	0.2	0.17	0.1
Underestimate	0.33	0.3	0.33	0.4
Overestimate	0.30	0.5	0.50	0.5

kable(round(S,2), caption = "Full classification of sign of population-level linear trends (proportion)")

Table 5.2: Full classification of sign of population-level linear trends (proportion)
	ESH	Density	Density in ESH	Abundance in ESH
Correct	0.37	0.2	0.17	0.10
Underestimate	0.33	0.3	0.33	0.40
Missed +	0.03	0.3	0.30	0.30
False -	0.07	0.0	0.00	0.07
Opposing -	0.23	0.0	0.03	0.03
Overestimate	0.30	0.5	0.50	0.50
Missed -	0.00	0.5	0.50	0.50
False +	0.03	0.0	0.00	0.00
Opposing +	0.27	0.0	0.00	0.00

## Figs for manuscript
# Linear
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = lm, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = lm, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " African mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/linear.pdf", width = 12, height = 7.5, units = "cm")

# LOESS
ggplot(data=allmods,
            aes(x=year, y=index, colour=Trend.type )) +
        geom_point(cex=0.5) +
        geom_smooth(lwd=0.5, se=T, method = loess, data = obs) +
        geom_smooth(lwd=0.5, se=F, method = loess, data = mods) +
        scale_color_manual(values = c("#ff7f00", "#4daf4a", "#984ea3", "#377eb8", "#e41a1c")) +
        scale_x_continuous(breaks = seq(1992, 2013, 2)) +
        ggtitle(paste0("Change in ", nrow(lpd.interpolated)," populations of ",
                       length(unique(lpd.interpolated$Species.ID)), 
                       " Africa mammals"),
                "Change from 1992 (Nt/'N'[1992])") +
        ylab("Geometric Mean Abundance")  +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5),
              legend.position = "none")
ggsave("figs/Africa/gam.pdf", width = 12, height = 7.5, units = "cm")

dt <- left_join(vm, lpd[, c("ID", "Species.ID", "Common_name")], by = c("popid" = "ID"))

write.csv(dt, "outputs/Africa/Africa-ESACCI-SpeciesSpecific_goodness_of_fit_metrics.csv", row.names = F)

Species-specific density models

ESA-CCI landcover data

Judith Ament

Wednesday 16 October 2019

1 Setup

2 Data

3 Predictions of population trends

4 Aggregating to multi-species index

5 Validation metrics