Chapter 1 Workflow

Step 1 - Calculating Lambdas for each population

library(plyr)
library(taRifx)
library(mgcv)
library(zoo)

Subsetting the data to exclude fish, populations in marine systems and those without specific locations

LPI<-read.csv("LPI_pops_20160523_edited.csv")

LPI_pop<-subset(LPI, Specific_location==1 & System !="Marine" & Class != "Actinopterygii"& Class != "Cephalaspidomorphi")

ID<-LPI_pop$ID

pop_data<- LPI_pop[,c(1,65:130)]


pop_datab <- (pop_data [,2:57] !="NULL")
points_per_pop1950_2005 = rowSums(pop_datab)
length_id <- data.frame(ID,points_per_pop1950_2005)

LPI_pop<-merge(length_id, LPI_pop, by = "ID")
LPI_pop<-subset(LPI_pop, points_per_pop1950_2005 >=2)


LPI_pop2<-LPI_pop[,c(1:3,66:131)]
LPI_pop2[LPI_pop2 == 'NULL'] = NA

nrow(LPI_pop2)

## [1] 3686

Calculating lambdas

library(taRifx)
library(zoo)

doFit = function(sp_name) {
  spid2 = subset(LPI_pop2, ID == sp_name)   #subsetting the population data by each population 
  spid = spid2[,4:59]                     #subsetting only the dates
  colnames(spid)<-1950:2005              #renaming the date column names as R doesn't like numbered column names
  
  name<-spid2$Binomial
  id<-spid2$ID
  points<-spid2$points_per_pop1950_2005
  name_id<-paste(name, id, sep="_") #creating id for naming files of plots
  Date<-as.numeric(colnames(spid))
  spidt<-destring(t(spid))
  time<-length(min(which(!is.na(spidt))):max(which(!is.na(spidt))))
  missing<-time-points
  
  Year<-Date[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
  Population<-spidt[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
  Population[Population == 0] <- mean(Population, na.rm=TRUE)*0.01 #if a population is zero one year thhis is replaced with 1% of the average population estimate - because you can log zeros
  
  df<-data.frame(Year,Population)
  
  if (sum(na.omit(df$Population<1))>0) {
    df$Population<-df$Population+1
  } 
    
  
  if (points >=6) {           
    PopN = log10(df$Population)
    if (length(na.omit(PopN)) >=6) {
      SmoothParm = round(length(na.omit(PopN))/2)    
    } else {
      SmoothParm=3
    }

    mg2<-mgcv:::gam(PopN ~ s(Year, k=SmoothParm), fx=TRUE)
    pv2 <- predict(mg2,df,type="response",se=TRUE) 
    R_sq2<-summary(mg2)$r.sq
    model<-1
    pv2$fit[pv2$fit <= 0] <- NA
    lambda2<-diff(pv2$fit)
    lambda_sum2<-sum(lambda2, na.rm=TRUE)
    lambda_mean2<-mean(lambda2, na.rm=TRUE)

  } else {
    SmoothParm<-NA
    PopN = log10(df$Population)
    ml2<-lm(PopN~df$Year)
    R_sq2<-summary(ml2)$r.sq
    model<-0
    Pop_interp2<-na.approx(PopN)
    Pop_interp2[Pop_interp2<=0] <- NA
    lambda2<-diff(Pop_interp2)
    lambda_sum2<-sum(lambda2, na.rm=TRUE)
    lambda_mean2<-mean(lambda2, na.rm=TRUE)
 
  }

res_df = data.frame(sp_name=sp_name, points=points, SmoothParm=SmoothParm, r_sq=R_sq2, model=model,lambda_sum=lambda_sum2,lambda_mean=lambda_mean2,time=time, missing=missing)

#print(res_df)
return(res_df)
}

all_df_list <- lapply(unique(LPI_pop2$ID), doFit)

## Warning in summary.lm(ml2): essentially perfect fit: summary may be
## unreliable

## Warning in summary.lm(ml2): essentially perfect fit: summary may be
## unreliable

all_matrix <- matrix(unlist(all_df_list, use.names =FALSE), ncol=9, byrow=TRUE)
all_df <- data.frame(all_matrix)
colnames(all_df) <- c("ID", "points","SmoothParm", "r_sq", "model", "lambda_sum","lambda_mean", "length_time", "missing_years")

#write.csv(all_df, "Global_Population_Trends_Rsq_Lambda_16_08_16.csv") ##updated LPI

Step 2 - Extracting the Climate data for each population

library(raster)
library(doParallel)
library(lubridate)
library(reshape2)

CR30s<-brick("cru_ts3.23.1931.1940.tmp.dat.nc")

## Loading required namespace: ncdf4

CR40s<-brick("cru_ts3.23.1941.1950.tmp.dat.nc")
CR50s<-brick("cru_ts3.23.1951.1960.tmp.dat.nc")
CR60s<-brick("cru_ts3.23.1961.1970.tmp.dat.nc")
CR70s<-brick("cru_ts3.23.1971.1980.tmp.dat.nc")
CR80s<-brick("cru_ts3.23.1981.1990.tmp.dat.nc")
CR90s<-brick("cru_ts3.23.1991.2000.tmp.dat.nc")
CR00s<-brick("cru_ts3.23.2001.2010.tmp.dat.nc")
CR10s<-brick("cru_ts3.23.2011.2014.tmp.dat.nc")

plot(CR50s[[11]], main="Mean Temperature, Novermber 1951")

Subsetting the data to exclude fish, populations in marine systems and those without specific locations

LPI<-read.csv("LPI_pops_20160523_edited.csv")

LPIsp<-subset(LPI, Specific_location==1 & System !="Marine" & Class != "Actinopterygii"& Class != "Cephalaspidomorphi" )

CR<-stack(CR30s,CR40s,CR50s,CR60s,CR70s,CR80s,CR90s,CR00s,CR10s)

plot(CR[[1]], "Location of LPI Populations")
points(LPIsp$Longitude, LPIsp$Latitude)

xy<-data.frame(LPIsp$Longitude, LPIsp$Latitude)

xy<-unique(xy)     #identifying unique locations to extract climate data from 

xy_df<-data.frame(xy)
colnames(xy_df)<-c("lon", "lat")
coordinates(xy_df) <- c("lon", "lat")

print("number of populations remaining after subsetting")

## [1] "number of populations remaining after subsetting"

length(xy_df)

## [1] 1367

n<-2#number of cores to use - not sure how many I can go up to - 6 at UCL
cl<-makeCluster(n)
registerDoParallel(cl)  
getDoParWorkers()

buff<- function (year,b)  {
  
  rasterex<-raster:::extract(CR[[year]], xy_df, buff=b, fun=mean, na.rm=TRUE)
  return(rasterex)
  
}

diam<-0 #size of buffer to use - in metres
lyr<-1:nlayers(CR)

stime <- system.time({
  sr <- foreach(1, .combine = cbind) %dopar% mapply(buff,lyr,diam)
})
stime

stopCluster(cl)

dates<-seq(ymd('1931-01-16'),ymd('2014-12-16'), by = 'months')

datesr<-rep(dates, each=length(xy[,1]))   #each number should be number of pops

srm<-melt(sr)

lon<-xy[,1]
lat<-xy[,2]

srm2<-cbind(lon,lat,datesr, srm)

srm3<-srm2[,c(1,2,3,6)]

colnames(srm3)<-c("Longitude", "Latitude", "Date", "Mean_T")

LPI_ID<-LPIsp[,c("ID", "Longitude", "Latitude")]

LPIclim<-merge(srm3, LPI_ID, by=c("Longitude", "Latitude"))

#write.csv(LPIclim, "Global_Mean_Temp_All_LPI_nobuff_1931_moreLPI.csv")

LPIclim<-read.csv("Global_Mean_Temp_All_LPI_nobuff_1931_moreLPI.csv")
  
LPIclim$Date<-as.Date(LPIclim$Date, "%Y-%m-%d")
LPIclim$Year<-format(LPIclim$Date, "%Y")

LPIclim<-na.omit(LPIclim)

LPI<-read.csv("LPI_pops_20160523_edited.csv")

LPIsp<-subset(LPI, Specific_location==1 & System !="Marine" & Class != "Actinopterygii"& Class != "Cephalaspidomorphi" & ID !=4438)

pop_data<- LPIsp[,c(1,65:120)]
ID<-LPIsp$ID

pop_datab <- (pop_data [,2:57] !="NULL")
points_per_pop1950_2005 = rowSums(pop_datab)
length_id <- data.frame(ID,points_per_pop1950_2005)

LPIsp2<-merge(length_id, LPIsp, by = "ID")
LPIsp2<-subset(LPIsp2, points_per_pop1950_2005 >=2)

LPIsp2<-LPIsp2[,c(1:3,66:121)]
LPIsp2[LPIsp2 == 'NULL'] = NA

nrow(LPIsp2)

## [1] 3686

LPIclim<-LPIclim[LPIclim$ID %in% LPIsp2$ID,]

LPIsp2<-LPIsp2[LPIsp2$ID %in% LPIclim$ID,]

library(broom)
library(plyr)
library(taRifx)

lag<-0 #number of years before start date of pop records which I want climate data from

doMean = function(sp_name) {
  spid2 = subset(LPIsp2, ID == sp_name)   #subsetting the population data by each population
  spid = spid2[,4:59]                     #subsetting only the dates
  nid<-matrix(rep (NA, 10), nrow=1)                
  colnames(nid)<-1940:1949
  colnames(spid)<-1950:2005
  spid<-cbind(nid, spid)#renaming the date column names as R doesn't like numbered column names
  climid=subset(LPIclim, ID == sp_name)  #subsetting the climate data by each population
  
  year_temp <- ddply(climid, "Year", summarise,          #calculating the annual mean for max temp, min temp and precipitation
                     mean_mean = mean(na.omit(Mean_T)))
  name<-spid2$Binomial
  id<-spid2$ID
  points<-spid2$points_per_pop1950_2005
  name_id<-paste(name, id, sep="_") #creating id for naming files of plots
  Date<-as.numeric(colnames(spid))
  spidt<-destring(t(spid))

  Year<-Date[(min(which(!is.na(spidt)))-lag):max(which(!is.na(spidt)))]
  Population<-spidt[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]

  Mean<-year_temp$mean_mean[(min(which(!is.na(spidt)))-lag):max(which(!is.na(spidt)))]
  
  mean_change<-sum(diff(Mean))
  
  Mean_mon<-climid[climid$Year %in% Year, ]$Mean_T
  year_temp$Year<-as.numeric(year_temp$Year)

  if (sum(is.nan(Mean))!=length(Mean)){
    
    lm_mean<-lm(Mean~Year)
    #lm_mean<-lm(year_temp$mean_mean~year_temp$Year)
    lm_mean_df<-tidy(lm_mean)[2,]  
    mean_df<-cbind(id,mean_change, lm_mean_df)
    
  } else{
    
    mean_df<-matrix(c(id,mean_change,NA,NA,NA,NA,NA), nrow=1, ncol=6)
    colnames(mean_df)<-c("id","mean_change" ,"term", "estimate", "std.error", "statistic", "p.value")
    #mean_df<-data.frame(mean_df)
  }
  
  #print(mean_df)  
  return(mean_df)
}

all_df_list <- lapply(unique(LPIsp2$ID), doMean)

all_matrix <- matrix(unlist(all_df_list), ncol=7, byrow=TRUE)
mean_df <- data.frame(all_matrix)
colnames(mean_df) <- c("ID", "Sum_Mean_Change","Term","Estimate","SE","Statistic","p.val")

mean_df$Estimate<-as.numeric(as.character(mean_df$Estimate))

#write.csv(mean_df, "All_LPI_All_Years_Nobuff_1931_moreLPI_end2005.csv")#to go with hyde

Calculating land use change

#install.packages("devtools")
library(devtools)
#install_github("timnewbold/GISOperations")
library(GISOperations)
library(raster)

crop1940<-raster("crop1940AD.asc")
gras1940<-raster("gras1940AD.asc")

crop1950<-raster("crop1950AD.asc")
gras1950<-raster("gras1950AD.asc")

crop1960<-raster("crop1960AD.asc")
gras1960<-raster("gras1960AD.asc")

crop1970<-raster("crop1970AD.asc")
gras1970<-raster("gras1970AD.asc")

crop1980<-raster("crop1980AD.asc")
gras1980<-raster("gras1980AD.asc")

crop1990<-raster("crop1990AD.asc")
gras1990<-raster("gras1990AD.asc")

crop2000<-raster("crop2000AD.asc")
gras2000<-raster("gras2000AD.asc")

crop2005<-raster("crop2005AD.asc")
gras2005<-raster("gras2005AD.asc")


rlist<-list(crop1940,crop1950,crop1960,crop1970,crop1980,crop1990,crop2000,crop2005,gras1940,gras1950,gras1960,gras1970,gras1980,gras1990,gras2000,gras2005)
cellareas <- DegreeCellAreaKM(lat=coordinates(crop1940)[,2],height=res(crop1940)[2],width=res(crop1940)[1])



for (map in rlist){
  
  map_pcnt<-signif(values(map),digits=4)/cellareas
  map_pcnt_m<-matrix(map_pcnt, nrow=nrow(map), ncol=ncol(map), byrow=TRUE)
  map_pcnt_r<-raster(map_pcnt_m, xmn=map@extent[1], xmx=map@extent[2], ymn=map@extent[3], ymx=map@extent[4])
  writeRaster(map_pcnt_r, paste(names(map),"raster.tif", sep="_"),overwrite=TRUE)
  #print(names(map))
}


crop_s<-stack(list.files(path = getwd(), pattern = "^.*crop*.*.tif$"))
gras_s<-stack(list.files(path = getwd(), pattern = "^.*gras*.*.tif$"))


nat_1940<-(1 - (crop_s[[1]] + gras_s[[1]]))
nat_2005<-(1 - (crop_s[[7]] + gras_s[[7]]))
plot(nat_2005 - nat_1940, main="Change in Natural Land Use 1940 - 2005")

anth_1940<- (crop_s[[1]] + gras_s[[1]])
anth_2005<-(crop_s[[7]] + gras_s[[7]])

plot(anth_2005 - anth_1940, main="Change in Cropland and Pasture Land Use 1940 - 2005")

###########LPI data

LPI_LUC<-read.csv("LPI_pops_20160523_edited.csv")

LPI_pop<-subset(LPI_LUC, Specific_location==1 & System !="Marine" & Class != "Actinopterygii"& Class != "Cephalaspidomorphi")

ID<-LPI_pop$ID
pop_data<- LPI_pop[,c(1,65:130)]

pop_datab <- (pop_data [,2:67] !="NULL")
points_per_pop1950_2012 = rowSums(pop_datab)
length_id <- data.frame(ID,points_per_pop1950_2012)

LPI_LUC<-merge(length_id, LPI_pop, by = "ID")
LPI_LUC<-subset(LPI_pop, points_per_pop1950_2012 >=2)


id<-LPI_LUC$ID
latlong<-cbind(LPI_LUC$Longitude,LPI_LUC$Latitude)

x<-latlong[,1]
y<-latlong[,2]

xy<-cbind(id,x,y) #creating the grid around the population - 5km either side gives a grid of 121km^2
xy<-na.omit(xy)
id<-xy[,1]

points<-SpatialPoints(cbind(xy[,2], xy[,3]))


xmin<- colFromX(crop_s[[1]], points[1:length(points),]) - 1
xmax<- colFromX(crop_s[[1]], points[1:length(points),]) + 1
ymin<- rowFromY(crop_s[[1]], points[1:length(points),]) - 1
ymax<- rowFromY(crop_s[[1]], points[1:length(points),]) + 1    


grid_crop<-data.frame(ID=id,xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)   #setting extents of grid to extract

grid_crop2<-merge(grid_crop,LPI_LUC, by="ID")

grid_crop2<-subset(grid_crop2, Specific_location ==1) 

nrow(grid_crop2)

## [1] 3848

library(taRifx)
library(zoo)

result <- data.frame() #empty result dataframe

for (i in 1:length(grid_crop2$ID)){
  
  ID<-grid_crop2[i,1]
  Binomial<-as.character(grid_crop2[i,6])
  spid = grid_crop2[i,69:134]                     #subsetting only the dates
  colnames(spid)<-1950:2015
  
  Date<-as.numeric(colnames(spid))
  spidt<-destring(t(spid))
  
  Year<-Date[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
  Population<-spidt[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]

  crop_check<-crop(crop_s, extent(crop_s, grid_crop2[i,4],grid_crop2[i,5],grid_crop2[i,2],grid_crop2[i,3]))
  gras_check<-crop(gras_s, extent(gras_s, grid_crop2[i,4],grid_crop2[i,5],grid_crop2[i,2],grid_crop2[i,3]))
  
  crop_df<-data.frame(as.matrix(crop_check))
  gras_df<-data.frame(as.matrix(gras_check))
  
  decs<-c("1940","1950","1960","1970","1980","1990","2000","2005")
  colnames(crop_df)<-decs
  colnames(gras_df)<-decs
  
  mean_crop_df<-colMeans(crop_df)
  mean_crop_df2<-data.frame(mean_crop_df, decs)
  colnames(mean_crop_df2)<-c("mean_crop", "Year")
  
  mean_gras_df<-colMeans(gras_df)
  mean_gras_df2<-data.frame(mean_gras_df, decs)
  colnames(mean_gras_df2)<-c("mean_grass", "Year")
  
  Year_na<-rep(NA, length(1940:2005))
  Year_all<-1940:2005
  Yr<-cbind(Year_all, Year_na)
  colnames(Yr)<-c("Year", "Year_NA")
  
  Yr_intrp_crop<-merge(Yr, mean_crop_df2, by="Year", all=T)
  Yr_intrp_gras<-merge(Yr, mean_gras_df2, by="Year", all=T)
  
  min_yr<-min(Year)
  max_yr<-max(Year)
  
  
  if (min_yr != max_yr & min_yr < 2005 & sum(is.na(Yr_intrp_crop$mean_crop))<60) {
    
    Yr_intrp_crop$mean_crop_int<-na.approx(Yr_intrp_crop$mean_crop)
    Yr_intrp_gras$mean_gras_int<-na.approx(Yr_intrp_gras$mean_grass)
    Yr_intrp_crop$both<-Yr_intrp_crop$mean_crop_int + Yr_intrp_gras$mean_gras_int
    
    crop_change<-mean(diff(subset(Yr_intrp_crop, Year >= min_yr & Year <= max_yr)$mean_crop_int))
    gras_change<-mean(diff(subset(Yr_intrp_gras, Year >= min_yr & Year <= max_yr)$mean_gras_int))
    both_change<-mean(diff(subset(Yr_intrp_crop, Year >= min_yr & Year <= max_yr)$both))
    both_sum<-sum(diff(subset(Yr_intrp_crop, Year >= min_yr & Year <= max_yr)$both))
  }else{
    crop_change<-NA
    gras_change<-NA
    both_change<-NA
    both_sum<-NA
  }
  
  years<-max(Year)-min(Year)
  final<-cbind(ID,Binomial,crop_change, gras_change, both_change, both_sum, years) 
  result<-rbind(final,result)
  print(final)
  
}

#write.csv(result, "Hyde_crop_pasture_annual_change_sum.csv")

lpi<-read.csv("LPI_pops_20160523_edited.csv")
bm<-read.csv("Amniote_Database_Aug_2015.csv")
bm$Binomial<-paste(bm$genus, bm$species, sep="_")

lpibm<-merge(lpi, bm, by="Binomial")

lpibm<-data.frame(lpibm$Binomial, lpibm$ID, lpibm$adult_body_mass_g)
head(lpibm)

##                lpibm.Binomial lpibm.ID lpibm.adult_body_mass_g
## 1        Abrothrix_longipilis     4887                  185.00
## 2         Abrothrix_olivaceus     4600                   27.00
## 3         Abrothrix_olivaceus     4601                   27.00
## 4        Acanthiza_reguloides     8782                    7.70
## 5       Acanthiza_uropygialis     8723                    6.35
## 6 Acanthodactylus_scutellatus    10408                    7.10

colnames(lpibm)<-c("Binomial", "ID", "Body_mass_g")

lpibm$Log_Body_Mass_g<-log10(lpibm$Body_mass_g)

lpibm2<-lpibm[lpibm$Body_mass_g !=-999,]


lpibm2<-unique(lpibm2)

lpibm2$Log_Body_Mass_g<-log10(lpibm2$Body_mass_g)

#write.csv(lpibm2, "LPI_BodyMass_Amniote_Database.csv")

Setting up the modelling

temp<-read.csv("All_LPI_All_Years_Nobuff_1931_moreLPI_end2005.csv")

body4<-read.csv("LPI_BodyMass_Amniote_Database.csv")

body4<-body4[,-3]
hyde<-read.csv("Hyde_crop_pasture_annual_change.csv")

hyde2<-read.csv("Hyde_crop_pasture_annual_change_sum.csv")

temp<-temp[,c("ID", "Estimate")]
temp<-merge(temp, hyde[,c(2,3)], by="ID")

LPI<-read.csv("LPI_pops_20160523_edited.csv")

pop<-read.csv("Global_Population_Trends_Rsq_Lambda_07_10_16.csv")



LPI<-LPI[,c("ID","Binomial","Common_name", "Order","Family", "Protected_status", "Country","Region", "System", "Class","Specific_location", "Longitude", "Latitude", "Primary_threat", "Secondary_threat", "Tertiary_threat", "Migratory", "Forest")]

df<-merge(merge(temp,body4[,c(2:4)], by="Binomial"), merge(LPI, pop, by="ID"),by="ID")


dfd<-merge(df, hyde[,c(-1,-3)], by="ID")

head(dfd)

##   ID            Binomial.x   Estimate Body_mass_g Log_Body_Mass_g
## 1  4 Copsychus_sechellarum 0.01184524          71        1.851258
## 2  4 Copsychus_sechellarum 0.01184524          71        1.851258
## 3  4 Copsychus_sechellarum 0.01184524          71        1.851258
## 4  4 Copsychus_sechellarum 0.01184524          71        1.851258
## 5  4 Copsychus_sechellarum 0.01184524          71        1.851258
## 6  4 Copsychus_sechellarum 0.01184524          71        1.851258
##              Binomial.y             Common_name         Order       Family
## 1 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
## 2 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
## 3 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
## 4 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
## 5 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
## 6 Copsychus_sechellarum Seychelles magpie robin Passeriformes Muscicapidae
##   Protected_status    Country Region      System Class Specific_location
## 1               No Seychelles Africa Terrestrial  Aves                 1
## 2               No Seychelles Africa Terrestrial  Aves                 1
## 3               No Seychelles Africa Terrestrial  Aves                 1
## 4               No Seychelles Africa Terrestrial  Aves                 1
## 5               No Seychelles Africa Terrestrial  Aves                 1
## 6               No Seychelles Africa Terrestrial  Aves                 1
##   Longitude Latitude Primary_threat   Secondary_threat Tertiary_threat
## 1  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
## 2  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
## 3  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
## 4  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
## 5  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
## 6  55.93333 -4.58333   Habitat loss Invasive spp/genes            NULL
##   Migratory Forest X points SmoothParm      r_sq model lambda_sum
## 1         0      1 2     14          7 0.8027547     1  -0.319924
## 2         0      1 2     14          7 0.8027547     1  -0.319924
## 3         0      1 2     14          7 0.8027547     1  -0.319924
## 4         0      1 2     14          7 0.8027547     1  -0.319924
## 5         0      1 2     14          7 0.8027547     1  -0.319924
## 6         0      1 2     14          7 0.8027547     1  -0.319924
##   lambda_mean length_time missing_years crop_change gras_change
## 1 -0.02285171          15             1          NA          NA
## 2 -0.02285171          15             1          NA          NA
## 3 -0.02285171          15             1          NA          NA
## 4 -0.02285171          15             1          NA          NA
## 5 -0.02285171          15             1          NA          NA
## 6 -0.02285171          15             1          NA          NA
##   both_change
## 1          NA
## 2          NA
## 3          NA
## 4          NA
## 5          NA
## 6          NA

nrow(df)

## [1] 58904

nrow(dfd)

## [1] 58904

dfu<-unique(dfd)

df2<-subset(dfu, !is.na(Estimate) & r_sq >= 0.4999999  &length_time >=5& System!="Marine" 
            &Specific_location == 1 &!is.na(both_change) & !is.na(Log_Body_Mass_g)& Class=="Aves")
nrow(df2)

## [1] 410

colnames(df2)[2]<-"Binomial"

#df2[is.na(df2$lambda_mean),]$lambda_mean<-0

nrow(df2)

## [1] 410

library(plyr)
sp_dups<-data.frame(ddply(df2,.(Longitude,Latitude),nrow))
sp_dups$loc_id<-1:length(sp_dups$Longitude)
sp_dups_df<-merge(sp_dups, df2, by=c("Longitude","Latitude"))

library(data.table)
dt = as.data.table(sp_dups_df)

parm_df<-sp_dups_df[,c("ID","Estimate", "both_change", "Log_Body_Mass_g")]  ##ID, land use, and climate  use "LUC_dist" or "Nat_change" for purely annual change in summed primary, secondary and other

parm_mat<-as.matrix(parm_df)
parm_scale<-scale(parm_mat[,c("Estimate", "both_change", "Log_Body_Mass_g")])       #use the scaling factors at the bottom of these to scale the rasters

parm_id<-parm_mat[,"ID"]

parm_df_scale<-data.frame(parm_id,parm_scale)

colnames(parm_df_scale)<-c("ID","mean_slope_scale", "change_rate_scale", "Bodymass_scale")

sp_df_scale<-merge(sp_dups_df, parm_df_scale, by="ID")

dt<-data.table(sp_df_scale)

length(unique(dt$loc_id))

## [1] 148

nrow(dt)

## [1] 410

#write.csv(dt, "Mammals_scaled_ready_for_models.csv")

source("rsquaredglmm.R")

  library(lme4) 
  
  m0<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+change_rate_scale:mean_slope_scale+Bodymass_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m0f<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+change_rate_scale:mean_slope_scale+Bodymass_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)

  m0a<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+Bodymass_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m0af<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+Bodymass_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  
  m0b<-lmer(lambda_mean ~ change_rate_scale+Bodymass_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m0bf<-lmer(lambda_mean ~ change_rate_scale+Bodymass_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  
  m0c<-lmer(lambda_mean ~ mean_slope_scale+Bodymass_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m0cf<-lmer(lambda_mean ~ mean_slope_scale+Bodymass_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  
  m0d<-lmer(lambda_mean ~ Bodymass_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m0df<-lmer(lambda_mean ~ Bodymass_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  
  m1<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+change_rate_scale:mean_slope_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m1f<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+change_rate_scale:mean_slope_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)

  m1a<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m1af<-lmer(lambda_mean ~ change_rate_scale+mean_slope_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)

  m1b<-lmer(lambda_mean ~ change_rate_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m1bf<-lmer(lambda_mean ~ change_rate_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)  

  m1c<-lmer(lambda_mean ~ mean_slope_scale+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  m1cf<-lmer(lambda_mean ~ mean_slope_scale+Family+(1|Binomial)+(1|loc_id),data=dt, REML=F)
  mnull<-lmer(lambda_mean ~ 1+(1|Binomial)+(1|loc_id),data=dt, REML=F)

Model Comparison

  # #Weights
  library(MuMIn)
  library(lme4)  

  #msAICc <- model.sel(m1,m1a,m1b,m1c,mnull)
  msAICc <- model.sel(m0,m0a,m0b,m0c,m0d,m1,m1a,m1b,m1c,mnull)

  msAICc$model<-rownames(msAICc)
  msAICc<-data.frame(msAICc)
  msAICc

##       X.Intercept. Bodymass_scale change_rate_scale mean_slope_scale
## m1c   -0.012653385             NA                NA      -0.02019462
## m1a   -0.012661606             NA      -0.004277443      -0.01982774
## m0c   -0.010971022   -0.004278069                NA      -0.02008137
## m0a   -0.011081721   -0.004022805      -0.004026615      -0.01974469
## m1    -0.012612407             NA      -0.004248786      -0.02043261
## m0    -0.010965013   -0.004188318      -0.003982162      -0.02040480
## mnull -0.008661420             NA                NA               NA
## m1b   -0.008797728             NA      -0.005980260               NA
## m0d   -0.006849765   -0.004785007                NA               NA
## m0b   -0.007134453   -0.004379378      -0.005680850               NA
##       change_rate_scale.mean_slope_scale df   logLik      AICc     delta
## m1c                                   NA  5 438.5185 -866.8885  0.000000
## m1a                                   NA  6 438.8074 -865.4063  1.482218
## m0c                                   NA  6 438.7887 -865.3690  1.519573
## m0a                                   NA  7 439.0458 -863.8131  3.075483
## m1                          -0.001339526  7 438.9034 -863.5282  3.360294
## m0                          -0.001463867  8 439.1611 -861.9631  4.925432
## mnull                                 NA  4 431.2716 -854.4444 12.444126
## m1b                                   NA  5 431.8345 -853.5206 13.367958
## m0d                                   NA  5 431.5929 -853.0373 13.851210
## m0b                                   NA  6 432.1034 -851.9984 14.890183
##             weight model
## m1c   0.4105815849   m1c
## m1a   0.1956770714   m1a
## m0c   0.1920562079   m0c
## m0a   0.0882199333   m0a
## m1    0.0765104894    m1
## m0    0.0349828757    m0
## mnull 0.0008150641 mnull
## m1b   0.0005135517   m1b
## m0d   0.0004033177   m0d
## m0b   0.0002399039   m0b

  AIC(m0,m0a,m0b,m0c,m1,m1a,m1b,m1c,mnull)

##       df       AIC
## m0     8 -862.3222
## m0a    7 -864.0917
## m0b    6 -852.2068
## m0c    6 -865.5774
## m1     7 -863.8069
## m1a    6 -865.6148
## m1b    5 -853.6691
## m1c    5 -867.0371
## mnull  4 -854.5432

  #Rsq
  models_list<-list(m0,m0a,m0b,m0c,m0d,m1,m1a,m1b,m1c,mnull)
  modelsR<-lapply(models_list,rsquared.glmm)
  modelsRsq <- matrix(unlist(modelsR), ncol=6, byrow=T)
  rownames(modelsRsq)<-c("m0","m0a","m0b","m0c","m0d","m1","m1a","m1b","m1c","mnull")

  modelsRsq

##       [,1] [,2] [,3]        [,4]      [,5]      [,6]
## m0       1    1    1 0.044922570 0.6528140 -862.3222
## m0a      1    1    1 0.043981291 0.6524740 -864.0917
## m0b      1    1    1 0.005718271 0.6491038 -852.2068
## m0c      1    1    1 0.044118917 0.6554501 -865.5774
## m0d      1    1    1 0.002381116 0.6497789 -853.1858
## m1       1    1    1 0.042217264 0.6569938 -863.8069
## m1a      1    1    1 0.041539922 0.6565978 -865.6148
## m1b      1    1    1 0.003704818 0.6543594 -853.6691
## m1c      1    1    1 0.041560473 0.6599727 -867.0371
## mnull    1    1    1 0.000000000 0.6555752 -854.5432

  library(MuMIn)
  var_imp<-summary(model.avg(models_list))

  #mav<-model.avg(models_list, subset = cumsum(weight) <= 0.95)
  mav<-model.avg(models_list, subset = delta <= 6)
  
  smav<-summary(mav)
  
  coef_av<-smav$coefmat.subset[1:5,"Estimate"]
  coef_df<-data.frame(coef_av)
  coef_df$lowCI<-confint(mav)[1:5,1]
  coef_df$highCI<-confint(mav)[1:5,2]
  coef_df

##                                         coef_av        lowCI       highCI
## (Intercept)                        -0.012122135 -0.028058183  0.003813912
## mean_slope_scale                   -0.020086735 -0.030387264 -0.009786205
## change_rate_scale                  -0.004193031 -0.015209828  0.006823765
## Bodymass_scale                     -0.004197568 -0.015575700  0.007180563
## change_rate_scale:mean_slope_scale -0.001378540 -0.007379583  0.004622504

  coef_pcnt<-data.frame(((10^coef_df) - 1)*100)
  coef_pcnt

##                                       coef_av     lowCI    highCI
## (Intercept)                        -2.7526301 -6.256359  0.882053
## mean_slope_scale                   -4.5198120 -6.757752 -2.228159
## change_rate_scale                  -0.9608353 -3.441575  1.583639
## Bodymass_scale                     -0.9618699 -3.522887  1.667130
## change_rate_scale:mean_slope_scale -0.3169172 -1.684857  1.070055

  coef_pcnt$Var_name<-rownames(coef_pcnt)
  library(plotrix)
  
  plotCI(1:5, y=coef_pcnt$coef_av, ui=coef_pcnt$highCI, li=coef_pcnt$lowCI, ylab="Annual Population Change (%)", xlab="" ,xaxt = "n", 
         main="Birds", lwd=1, ylim=c(min(coef_pcnt$lowCI*1.1), max(coef_pcnt$highCI*1.2)))
  axis(1, at=1:5, labels=rownames(coef_pcnt), las=2)
  abline(h=0, col="red", lty =2)

  #AIC
  AICs<-c(AIC(m0),AIC(m0a),AIC(m0b),AIC(m0c),AIC(m0d),AIC(m1),AIC(m1a),AIC(m1b),AIC(m1c),AIC(mnull))
  mnames<-c("LUC*MTC+BM","LUC+MTC+BM", "LUC+BM", "MTC+BM","BM","LUC*MTC","LUC+MTC", "LUC", "MTC", "NULL")
  
  AIC_diff<-AICs - AIC(mnull)
  del_AIC_df<-data.frame(AIC_diff, mnames)
  del_AIC_df<-del_AIC_df[order(del_AIC_df$AIC_diff),]
  del_AIC_df

##       AIC_diff     mnames
## 9  -12.4938759        MTC
## 7  -11.0715798    LUC+MTC
## 4  -11.0342246     MTC+BM
## 2   -9.5484846 LUC+MTC+BM
## 6   -9.2636744    LUC*MTC
## 1   -7.7790313 LUC*MTC+BM
## 10   0.0000000       NULL
## 8    0.8740824        LUC
## 5    1.3573346         BM
## 3    2.3363849     LUC+BM

library(coefplot)
library(ggplot2)

coef_pcnt$val<-1:5
coef_pcnt$var_name <- factor(coef_pcnt$Var_name, levels = coef_pcnt$Var_name[order(coef_pcnt$val)])
  
  p1<-ggplot(coef_pcnt)
  p1<- p1 + geom_hline(yintercept = 0, colour=gray(1/2), lty=2)
  p1<- p1 + geom_linerange(aes(x=Var_name, ymin=lowCI, ymax=highCI), lwd=1.5, position = position_dodge(width=1/2))
  p1<- p1 + geom_pointrange(aes(x= Var_name, y=coef_av, ymin=lowCI, ymax=highCI), lwd=1, position=position_dodge(width=1/2), shape=21, fill="White")
  p1<- p1 + scale_y_continuous(breaks=seq(-8, 4, 2)) +theme_bw() + labs(y = "Annual Population Change (%)", x = "Variable") + theme(legend.title=element_blank(), text = element_text(size=20),axis.title.x = element_text(margin = unit(c(5, 5, 0, 0), "mm")))
  p1<-p1 + coord_flip()
  print(p1)

#coef_pcnt$Class<-"Birds"  
#coef_pcnt$Class<-"Mammals"  

#coef_pcntb<-coef_pcnt
#coef_pcntm<-coef_pcnt

Chapter 1 Workflow

Fiona Spooner

19 October 2017