Cervus_elaphus

install.packages('climates',,'http://www.rforge.net/')
install.packages("dismo")
install.packages("maps")
install.packages("mapdata")
install.packages("popbio")
install.packages("demoniche", repos="http://R-Forge.R-project.org")

library(dismo)
#library(demoniche)
library(maps)
library(mapdata)
library(rgeos)
library(zoo)
library(raster)
library(mgcv)
library(randomForest)
library(rgdal)
#library(doParallel)
library(ggplot2)
library(dplyr)
library(data.table)
library(reshape2)
library(taRifx)
library(plyr)
library(sp)
#library(lhs)

source("demoniche_setup_me.R")
source("demoniche_model_me.R")
source("ensemble_evaluate.R")
source("demoniche_population_function.R")

library(raster)

wd<-getwd()
genus<-"Cervus"
species<-"elaphus"
Europe_only<-FALSE
binomial<-paste(genus, species, sep="_")
min_lat<- 35    #one degree more/less than the iucn range extent
max_lat<- 68
min_lon<- -11
max_lon<- 33
e_iucn<-extent(min_lon, max_lon, min_lat, max_lat)

bioclim_layers<-c(2,4,5,6,7,10,11)   
bioclim_names<-paste("Bio_", bioclim_layers, "_2006_2016_average", sep="")

binomial<-paste(genus, species, sep="_")

k<-4
years<-1950:2005
spin_years<-1850:1949
lpi<-read.csv("LPI_pops_20160523_edited.csv")
species_directory<-paste(wd, "/",binomial, "_bias", sep="")
dir.create(species_directory)

## Warning in dir.create(species_directory): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias' already exists

sdm_folder<-paste(species_directory, "SDM_folder_bias", sep = "/")
dir.create(sdm_folder)

## Warning in dir.create(sdm_folder): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias\SDM_folder_bias' already exists

demoniche_folder<-paste(species_directory, "Demoniche_Output_bias", sep = "/")
dir.create(demoniche_folder)

## Warning in dir.create(demoniche_folder): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias\Demoniche_Output_bias' already exists

no_yrs_mine<-1 
prob_scenario<-c(0.5,0.5)    #need to check this
noise<-0.90 

env_stochas_type<-"normal"   #can also be lognormal

wd<- getwd()
capra <-gbif(genus =genus, species = species)
capgeo <- subset(capra, !is.na(lon) & !is.na(lat) & (lon!="NA" & lat !="NA") | year!="NA")
dups <- duplicated(capgeo[, c("lon", "lat")])
capg <-capgeo[!dups, ]
capg2 <- capg[capg$lon > min_lon & capg$lon< max_lon & capg$lat > min_lat & capg$lat < max_lat & capg$year>=2006 & capg$coordinateUncertaintyInMeters <1000, ]
capg2<-data.frame(capg2$lon,capg2$lat)
colnames(capg2)<-c("Longitude", "Latitude")

pyr<-subset(lpi, Binomial == binomial & Specific_location==1 )  #10716 v.near two other populations  #record 11470 had wrong longitude - in Russia!
pyrs<-pyr[,c("Longitude","Latitude")]
capg2<-rbind(capg2, pyrs)
capg2<-na.omit(capg2)
capg2$presence<-rep(1)
capg2$ID<-1:nrow(capg2)

capc<-as.matrix(capg2[ , c( "Longitude","Latitude", "presence")])
capc<-matrix(capc[complete.cases(capc)], ncol=3)
xy<-as.matrix(capc[,c(1,2)])
df<-data.frame(capc[,3])

write.csv(df, paste(species_directory,"/", binomial,"_gbif.csv", sep=""), row.names = FALSE)
write.csv(xy, paste(species_directory,"/",binomial, "_locs.csv", sep=""), row.names=FALSE)

library(sp)
library(raster)

df<-read.csv(paste(species_directory, "/",binomial, "_gbif.csv", sep=""))
xy<-read.csv(paste(species_directory,"/" ,binomial, "_locs.csv", sep=""))

sp<-sp:::SpatialPointsDataFrame(coords=xy, data=df)
sp<-sp[sp@coords[,1] > min_lon & sp@coords[,1]< max_lon &sp@coords[,2] >min_lat & sp@coords[,2] <max_lat,]

e<-raster:::extent(sp)

e_big<-e+2

maps:::map('world',  col="light grey", fill=T, xlim=c(e_big[1],e_big[2]), ylim=c(e_big[3],e_big[4]))

points(sp, col="red", pch=20)

if (Europe_only){
lf<-list.files(paste(wd, "/Bioclim/", sep=""))
} else{
lf<-list.files(paste(wd, "/Bioclim/Global", sep=""))
}

first<-which(grepl("2006_bioclim", lf)) #was initially 1985
last<-which(grepl("2016_bioclim", lf))

if (Europe_only){
all_years<-stack(paste(wd, "/Bioclim/",lf[first:last], sep=""))
} else{
all_years<-stack(paste(wd, "/Bioclim/Global/",lf[first:last], sep=""))
}

bios<-seq(1,nlayers(all_years), by=19)
cellStats(all_years[[bios]], stat="mean")

## X2006_bioclim_variable_stack.1 X2007_bioclim_variable_stack.1 
##                       8.420860                       8.533780 
## X2008_bioclim_variable_stack.1 X2009_bioclim_variable_stack.1 
##                       8.958709                       8.426517 
## X2010_bioclim_variable_stack.1 X2011_bioclim_variable_stack.1 
##                       8.716551                       9.016502 
## X2012_bioclim_variable_stack.1 X2013_bioclim_variable_stack.1 
##                       8.794914                       8.714704 
## X2014_bioclim_variable_stack.1 X2015_bioclim_variable_stack.1 
##                       9.044250                       9.075093 
## X2016_bioclim_variable_stack.1 
##                       9.069219

#creating a 1985-2016 average of each bioclim variable
for (i in 1:19){
  layers<-bios
  bio_layer<-mean(all_years[[layers]])
  if (Europe_only){
  writeRaster(bio_layer, paste(wd, "/Bioclim/Bio_",i,"_2006_2016_average.tif",sep=""), overwrite=TRUE)
  } else {
  writeRaster(bio_layer, paste(wd, "/Bioclim/Global/Bio_",i,"_2006_2016_average.tif",sep=""), overwrite=TRUE)
  }
  bios<-bios+1
  #print(layers)
}

#pred_nf<-stack(paste(wd, "/Bioclim/Bio_", bio_layer_pred,"_1985_2016_average.tif",sep="" ))

hyde<-stack(paste(wd,"/Hyde_Interpolated.tif", sep=""))

names(hyde)<-paste("Hyde_Year", 1950:2005, sep="_")

hyde_resample<-raster:::resample(hyde, all_years)

if (Europe_only){

  writeRaster(hyde_resample, "hyde_resample_europe.tif")  
  
} else {
  
  writeRaster(hyde_resample, "hyde_resample_global.tif")  
}

Selecting out the bioclim layers I’m interested in:

if (Europe_only){
hyde_files<-list.files(paste(wd,"/hyde_europe/", sep=""))
hyde_resample<-stack(paste(wd,"/hyde_europe/", hyde_files,sep=""))
pred_nf<-stack(paste(wd, "/Bioclim/Bio_", bioclim_layers,"_2006_2016_average.tif",sep="" ))
} else {
hyde_files<-list.files(paste(wd,"/hyde_global/", sep=""))
hyde_resample<-stack(paste(wd,"/hyde_global/", hyde_files,sep=""))
pred_nf<-stack(paste(wd, "/Bioclim/Global/Bio_", bioclim_layers,"_2006_2016_average.tif",sep="" ))
}

pred_nf<-stack(pred_nf, hyde_resample[[nlayers(hyde_resample)]])

pred_crop<-crop(pred_nf, e_iucn)
plot(pred_crop)

pred_ex<-extract(pred_crop, sp)

bad_sp<-which(is.na(pred_ex[,1]))

spdf<-data.frame(sp)

spdf<-spdf[-bad_sp,]

Creating the testing and training datasets

Using K=4 so 75% of points are used for training and 25% for testing.

library(dismo)

set.seed(10)

group_pres<-kfold(spdf, k)

if (!file.exists(paste(species_directory, "k_folds_presence_2006.csv", sep="/"))){
  write.csv(group_pres, paste(species_directory, "k_folds_presence_2006.csv", sep="/"))
}

group_pres<-read.csv(paste(species_directory, "k_folds_presence_2006.csv", sep="/"))
group_pres<-group_pres[,-1]  #all presence points

sp_train<-sp[group_pres!=1,] #presence points split into training and test
sp_test<-sp[group_pres ==1,]

targetRecords <- occ_search(
  scientificName=c(
    'Dama_dama',
    'Alces_alces',
    'Capreolus capreolus',
     'Sus scrofa',
    'Martes_martes',
    'Meles_meles',
    "Vulpes_vulpes",
    "Mustela_erminea",
    "Lutra_lutra"
 
  ),
  hasCoordinate=TRUE,
  year='2000,2016',
  hasGeospatialIssue=FALSE,
  fields='minimal',
  decimalLongitude='-11,33',
  decimalLatitude='35,68',
  limit=11000
)

if (exists('targetSites')) rm(targetSites)

for (i in 1:length(targetRecords)) {

  thisGenusCoords <- data.frame(
    longitude=targetRecords[[i]]$data$decimalLongitude,
    latitude=targetRecords[[i]]$data$decimalLatitude
  )

  targetSites <- if (exists('targetSites')) {
      
    rbind(targetSites, thisGenusCoords)
    
  } else {
    
    thisGenusCoords
    
  }
  
}

targetEnv <- as.data.frame(extract(pred_crop, targetSites))

outside <- which(is.na(rowSums(targetEnv))) 

targetSites <- targetSites[-outside, ]
targetEnv <- targetEnv[-outside, ]

targetBg <- cbind(targetSites, targetEnv)
names(targetBg)[1:2] <- c('longitude', 'latitude')

nrow(targetBg)

targetBg <- targetBg[sample(1:nrow(targetBg), 10000), ]
nrow(targetBg)

#write.csv(targetBg, paste(species_directory,"background_data_et_ungulates.csv", sep="/"))

bg_pse<-read.csv(paste(species_directory,"background_data_et_ungulates.csv", sep="/"))

bg_pse<-bg_pse[,-1]

k<-4

targetBg_k<-kfold(bg_pse, k)

#write.csv(targetBg_k, paste(species_directory, "kfolds_background_data_et_ungulates.csv", sep="/"))

group_back<-read.csv(paste(species_directory,"kfolds_background_data_et_ungulates.csv", sep="/"))

group_back<-group_back[,-1]

sp_backg_train <- bg_pse[group_back != 1,1:2] #background points split into training and test
sp_backg_test <- bg_pse[group_back == 1,1:2]
colnames(sp_backg_train)<-c("lon", "lat")
colnames(sp_backg_test)<-c("lon", "lat")

sp_train<-sp_train@coords #presence training points
colnames(sp_train)<-c("lon", "lat")
train <- rbind(sp_train, sp_backg_train)  #presence and background training points combined

sp_test<-sp_test@coords #presence test points
colnames(sp_test)<-c("lon", "lat")
test<-rbind(sp_test, sp_backg_test)    #presence and background test points combined

pb_train <- c(rep(1, nrow(sp_train)), rep(0, nrow(sp_backg_train)))
envtrain <- extract(pred_nf, train)
envtrain <- data.frame( cbind(pa=pb_train, envtrain) ) #training points with environmental  variables

envtrain<-na.omit(envtrain)

pb_test<-c(rep(1, nrow(sp_test)), rep(0, nrow(sp_backg_test)))
envtest <- extract(pred_nf, test)
envtest <- data.frame( cbind(pa=pb_test, envtest) ) #test points with environmental  variables
envtest<-na.omit(envtest)

env_all<-rbind(envtrain, envtest) #all of the training and test data with environmental variables

env_pres<-extract(pred_nf, sp)
pa<-rep(1, nrow(env_pres))
env_pres<-data.frame(pa, env_pres)  #presence points with environmental variables
colnames(env_pres)<-c("pa", bioclim_names, "Hyde_Year_2005")
env_pres_xy<-data.frame(sp@coords, env_pres)
env_pres_xy<-na.omit(env_pres_xy)

env_back<-extract(pred_nf, bg_pse[,1:2])
ap<-rep(0, nrow(env_back))
env_back<-data.frame(ap, env_back) #background points with environmental variables
colnames(env_back)<-c("pa",bioclim_names, "Hyde_Year_2005")
env_back_xy<-data.frame(bg_pse[,1:2], env_back)
env_back_xy<-na.omit(env_back_xy)

group_pres<-group_pres[1:nrow(env_pres_xy)]
group_back<-group_back[1:nrow(env_back_xy)]

Bioclim

Running the Bioclim envelope model and evaluating it using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.

evl_bc<- list()
tss_bc<-list()
for (i in 1:k){
  pres_train<-sp[group_pres!=i,]
  pres_test<-sp[group_pres==i,]
  backg_test<-bg_pse[group_back==i,1:2]
  bc <- bioclim(pred_nf,pres_train)
  evl_bc[[i]] <- dismo:::evaluate(pres_test, backg_test, bc,pred_nf,type="response")#test presence, test absence, model, predictor variables
     #print(i)
}

auc_bc <- sapply( evl_bc, function(x){slot(x, "auc")} )
print(auc_bc)

## [1] 0.6220573 0.6098527 0.6265069 0.6392110

bc_auc<-mean(auc_bc)

GAM

Running a GAM and evaluating it to get an AUC value using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.

#library(mgcv)

evl_gam<- list()
tss_gam<-list()
for (i in 1:k){
  pres_train<-env_pres_xy[group_pres!=i ,-c(1,2)]
  pres_test<-env_pres_xy[(group_pres==i) ,-c(1,2)]
  back_test<-env_back_xy[(group_back==i),-c(1,2)]
  back_train<-env_back_xy[(group_back!=i),-c(1,2)]
  envtrain<-rbind(pres_train, back_train)
  gm1<-mgcv:::gam(pa~ s(Bio_2_2006_2016_average)+s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+ 
           s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+ s(Bio_10_2006_2016_average)+ 
           s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), family = binomial(link = "logit"),data=envtrain)


  evl_gam[[i]] <- dismo:::evaluate(p = pres_test, a = back_test,model= gm1,type="response")
    }

auc_gam <- sapply( evl_gam, function(x){slot(x, "auc")} )
print(auc_gam)

## [1] 0.8656110 0.8649390 0.8606699 0.8603572

gam_auc<-mean(auc_gam)

Random Forest

Running a random forest model and evaluating it using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.

model<-pa ~  Bio_2_2006_2016_average+Bio_4_2006_2016_average+ 
  Bio_5_2006_2016_average+ Bio_6_2006_2016_average+ Bio_7_2006_2016_average+ 
  Bio_10_2006_2016_average+ Bio_11_2006_2016_average + Hyde_Year_2005


evl_rf<- list()
for (i in 1:k){
  pres_train<-env_pres_xy[group_pres!=i,-c(1,2)]
  pres_test<-env_pres_xy[(group_pres==i) ,-c(1,2)]
  back_test<-env_back_xy[(group_back==i),-c(1,2)]
  back_train<-env_back_xy[group_back !=i,-c(1,2)]
  envtrain<-rbind(pres_train, back_train)
  rf1 <- randomForest(model, data=envtrain)
  evl_rf[[i]] <- dismo:::evaluate(pres_test, back_test, rf1,type="response")
  }

auc_rf <- sapply( evl_rf, function(x){slot(x, "auc")} )
print(auc_rf)

## [1] 0.9225918 0.9254292 0.9267000 0.9205677

rf_auc<-mean(auc_rf)

Ensemble Model

Creating a weighted (based on AUC) ensemble average suitability model for 2006-2016 from the Bioclim, GAM and Random Forest models. This will be used to predict habitat suitability for Alpine ibex for each year 1950-2016.

#total models
library(randomForest)
library(boot)
bc <- bioclim(pred_crop, sp)
gm1<-gam(pa~ s(Bio_2_2006_2016_average)+s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+ 
           s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+ s(Bio_10_2006_2016_average)+ 
           s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), family = binomial(link = "logit"),data=env_all)
rf1 <- randomForest(model, data=env_all)

pb <- predict(pred_crop, bc, ext=e_iucn, progress='') #bioclim predict
pg <- predict(pred_crop, gm1, ext=e_iucn) #gam predict
pgl<-raster:::calc(pg, fun=function(x){ exp(x)/(1+exp(x))}) #backtransforming from logit space
pr <- predict(pred_crop, rf1, ext=e_iucn) #random forest predict

models <- stack(pb, pgl, pr)
names(models) <- c("bioclim", "gam", "random forest")
plot(models)

auc<-c(bc_auc, gam_auc, rf_auc)
w <- (auc-0.5)^2
wm <- weighted.mean( models[[c("bioclim", "gam", "random.forest")]], w)
plot(wm, main="Ensemble model")
points(sp)

Weighted Threshold

abs_xy<-env_back_xy[,c(1,2)]

ens<-ensemble_evaluate(sp,abs_xy , wm)
thresh<-threshold(ens, stat="spec_sens")

thresh

## [1] 0.4061065

Annual Habitat Suitability Predictions

Using the ensemble model to predict suitability for each year - 1950-2016 and for each year creating a binary presence/absence map based on a three different thresholding techniques. I will go forward using true skill statistic based on Allouche 2006.

bc <- bioclim(pred_nf, sp)
gm1<-mgcv:::gam(pa~ s(Bio_2_2006_2016_average)+ s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+ s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+s(Bio_10_2006_2016_average)+ s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), data=env_all)
rf1 <- randomForest:::randomForest(model, data=env_all)

for (i in 1:length(years)){
 
  if (Europe_only){
      pred_nf<-stack(paste(wd, "/Bioclim/", years[i], "_bioclim_variable_stack.tif", sep="" ))
  }else{
     pred_nf<-stack(paste(wd, "/Bioclim/Global/", years[i], "_bioclim_variable_stack.tif", sep="" ))
  }

  pred_nf<-pred_nf[[bioclim_layers]]
  names(pred_nf)<-bioclim_names
  hyde_yr<-raster(paste(wd, "/hyde/Hyde_Year_", years[i], ".tif",sep=""))
  names(hyde_yr)<-"Hyde_Year_2005"
  pred_nf<-stack(pred_nf, hyde_yr)
  
  pb <- predict(pred_nf, bc, ext=e_big, progress='')
  pg <- predict(pred_nf, gm1, ext=e_big) #gam predict
  pgl<-raster:::calc(pg, fun=function(x){ exp(x)/(1+exp(x))})
  pr <- predict(pred_nf, rf1, ext=e_big)

  models <- stack(pb, pgl, pr)
  names(models) <- c("bioclim", "gam", "random forest")
  wm <- weighted.mean( models[[c("bioclim", "gam", "random.forest")]], w)

  pa_sss<-wm>thresh

 writeRaster(wm , paste(sdm_folder, "/hyde_weighted_ensemble_sdm_", years[i], ".tif", sep=""), overwrite=TRUE)
 writeRaster(pa_sss , paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years[i], ".tif", sep=""), overwrite=TRUE)

  plot(pa_sss, main=years[i])
}

Trends in habitat suitability 1950-2016

Patches

Plots of the number of cells with predicted presence and number of patches (contiguous presence) over 1950-2016.

Demoniche

Formatting the population occurrence points

Formatting the LPI population data for use in Demoniche, they start as the “seed” populations. Might be better to use GBIF data for this - unrealistic that the LPI populations are the only existing populations in 1950 - alternatively a historical map of where the ibex were in 1950?

xy<-read.csv(paste(species_directory,"/" ,binomial, "_locs.csv", sep=""))
#patch<-stack(paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years[1],".tif", sep=""))
sdm_stack<-stack(paste(sdm_folder, "/hyde_weighted_ensemble_sdm_", years,".tif", sep=""))
patch_stack<-stack(paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years,".tif", sep=""))

pyr<-subset(lpi, Binomial ==binomial & Specific_location==1& Region =="Europe")    #record 11470 had wrong longitude - in Russia!
pops<-pyr[,c(65:120)]
pop_counts <- (pops !="NULL")
points_per_pop1950_2005 = rowSums(pop_counts)
pyr<-pyr[-which(points_per_pop1950_2005<5),]
pops<-pops[-which(points_per_pop1950_2005<5),]
pops<-data.frame(pyr$ID, pops)
colnames(pops)<-c("ID", 1950:2005)


#formatting the lpi data for use in demoniche
pyrs<-pyr[,c("ID","Longitude","Latitude")]
xy_lpi<-data.frame(pyrs$Longitude, pyrs$Latitude)

sdm_lpi<-extract(sdm_stack, xy_lpi)
colnames(sdm_lpi)<-1950:2005
sdm_lpi_melt<-melt(sdm_lpi)
colnames(sdm_lpi_melt)<-c("ID", "Year", "HSI")
sdm_lpi_melt$ID<-rep(pyrs$ID, length(1950:2005))


ggplot(data = sdm_lpi_melt, aes(x = Year, y= HSI, group=ID))+
  geom_line()+
  geom_smooth()+
  facet_grid(.~ID)

lambda<-function(x){

  lambdas<-diff(log10(as.numeric(x)))

}

sdm_lambdas<-t(apply(sdm_lpi,1,lambda))

colnames(sdm_lambdas)<-1950:2004
sdm_lambdas_melt<-melt(sdm_lambdas)
colnames(sdm_lambdas_melt)<-c("ID", "Year", "HSI")
sdm_lambdas_melt$ID<-rep(pyrs$ID, length(1950:2004))


ggplot(data = sdm_lambdas_melt, aes(x = Year, y= HSI, group=ID))+
  geom_line()+
  geom_smooth()+
  facet_grid(.~ID)

Adding in the LPI data

colnames(pops)[2:ncol(pops)]<-1950:2005
melt_lpi<-data.table:::melt(pops, id.vars = "ID")

## Warning: attributes are not identical across measure variables; they will
## be dropped

colnames(melt_lpi)<-c("ID", "Year","HSI")
melt_lpi$Year<-as.numeric(as.character(melt_lpi$Year))
melt_lpi$HSI<-as.numeric(melt_lpi$HSI)

## Warning: NAs introduced by coercion

ggplot(data = melt_lpi, aes(x = Year, y= HSI, group=ID))+
  geom_line()+
  geom_smooth()+
  facet_grid(.~ID)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 188 rows containing non-finite values (stat_smooth).

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1951.8

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 22.16

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0

## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 103.23

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 1951.8

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 22.16

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0

## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 103.23

## Warning: Removed 152 rows containing missing values (geom_path).

library(taRifx)
library(plyr)
library(mgcv)

pops<-pyr[,c(1,65:120)]
colnames(pops)[2:ncol(pops)]<-paste("Year", 1950:2005, sep="_")
pops[pops=="NULL"]<-NA

#popsm<-as.matrix(pops)

lambda_lpi<-function(x){
  #subsetting the population data by each population
  spid = x[2:(length(x))]                     #subsetting only the dates
  names(spid)<-1950:2005              #renaming the date column names as R doesn't like numbered column names
  spid<-as.numeric(spid)
  pop_datab <- (!is.na(spid) )
  points = sum(pop_datab)
  id<-x[1] 
  id<-as.numeric(id)
  Date<-1950:2005
  spidt<-destring(t(spid))
  time<-length(min(which(!is.na(spidt))):max(which(!is.na(spidt))))
  missing<-time-points
  
  Year<-Date[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
  Population<-spidt[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
  Population[Population == 0] <- mean(Population, na.rm=TRUE)*0.01 #if a population is zero one year thhis is replaced with 1% of the average population estimate - because you can log zeros
  
  df<-data.frame(Year,Population)
  PopN = log10(df$Population)

  if (points >=6) {
    if (length(na.omit(PopN)) >=6) {
      SmoothParm = round(length(na.omit(PopN))/2)
    } else {
      SmoothParm=3
    }
    
    mg2<-mgcv:::gam(PopN ~ s(Year, k=SmoothParm), fx=TRUE)
    pv2 <- predict(mg2,df,type="response",se=TRUE)
    R_sq2<-summary(mg2)$r.sq
    model<-1
    pv2$fit[pv2$fit <= 0] <- NA
    
    lambda2<-diff(pv2$fit)
    
    ial<-data.frame(id, Year[-length(Year)], lambda2)
    
    colnames(ial)<-c("ID", "Year", "Lambda")
  } else{

    lint<-na.approx(PopN)
    lint[lint<=0] <- NA
    lambda2<-diff(lint)
    ial<-data.frame(id, Year[-length(Year)], lambda2)
    colnames(ial)<-c("ID", "Year", "Lambda")
     
    }
  
  return(ial)
}

lambda_lpi_r<-apply(pops,  1, lambda_lpi)
lambda_r<-do.call( "rbind", lambda_lpi_r)

lambda_r<-lambda_r[lambda_r$Year <=2005,]

fill<-data.frame(rep(pops$ID, each=length(1950:2005)), 1950:2005)
colnames(fill)<-c("ID", "Year")

all_year_ab<-join(fill, lambda_r, type="left")

## Joining by: ID, Year

ggplot(data = all_year_ab, aes(x = Year, y= Lambda, group=ID))+
  geom_line()+
  geom_smooth()+
  facet_grid(.~ID)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 157 rows containing non-finite values (stat_smooth).

## Warning: Removed 157 rows containing missing values (geom_path).

Predicted and Observed together

ggplot()+
  geom_smooth(data = sdm_lambdas_melt, aes(x = Year, y= HSI, group=ID), colour = "black")+
  geom_smooth(data = all_year_ab, aes(x = Year, y= Lambda, group=ID), colour="red")+
  #geom_line(data =  gam_r_lambda, aes(x =Year, y = Abundance, group=sp_lpi.ID), colour = "blue" )+
  facet_grid(.~ID)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: Removed 157 rows containing non-finite values (stat_smooth).