install.packages('climates',,'http://www.rforge.net/')
install.packages("dismo")
install.packages("maps")
install.packages("mapdata")
install.packages("popbio")
install.packages("demoniche", repos="http://R-Forge.R-project.org")library(dismo)
#library(demoniche)
library(maps)
library(mapdata)
library(rgeos)
library(zoo)
library(raster)
library(mgcv)
library(randomForest)
library(rgdal)
#library(doParallel)
library(ggplot2)
library(dplyr)
library(data.table)
library(reshape2)
library(taRifx)
library(plyr)
library(sp)
#library(lhs)
source("demoniche_setup_me.R")
source("demoniche_model_me.R")
source("ensemble_evaluate.R")
source("demoniche_population_function.R")library(raster)
wd<-getwd()
genus<-"Cervus"
species<-"elaphus"
Europe_only<-FALSE
binomial<-paste(genus, species, sep="_")
min_lat<- 35 #one degree more/less than the iucn range extent
max_lat<- 68
min_lon<- -11
max_lon<- 33
e_iucn<-extent(min_lon, max_lon, min_lat, max_lat)
bioclim_layers<-c(2,4,5,6,7,10,11)
bioclim_names<-paste("Bio_", bioclim_layers, "_2006_2016_average", sep="")binomial<-paste(genus, species, sep="_")
k<-4
years<-1950:2005
spin_years<-1850:1949
lpi<-read.csv("LPI_pops_20160523_edited.csv")
species_directory<-paste(wd, "/",binomial, "_bias", sep="")
dir.create(species_directory)## Warning in dir.create(species_directory): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias' already exists
sdm_folder<-paste(species_directory, "SDM_folder_bias", sep = "/")
dir.create(sdm_folder)## Warning in dir.create(sdm_folder): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias\SDM_folder_bias' already exists
demoniche_folder<-paste(species_directory, "Demoniche_Output_bias", sep = "/")
dir.create(demoniche_folder)## Warning in dir.create(demoniche_folder): 'D:\Fiona\Git_Method\Git_Method
## \Cervus_elaphus_bias\Demoniche_Output_bias' already exists
no_yrs_mine<-1
prob_scenario<-c(0.5,0.5) #need to check this
noise<-0.90
env_stochas_type<-"normal" #can also be lognormalwd<- getwd()
capra <-gbif(genus =genus, species = species)
capgeo <- subset(capra, !is.na(lon) & !is.na(lat) & (lon!="NA" & lat !="NA") | year!="NA")
dups <- duplicated(capgeo[, c("lon", "lat")])
capg <-capgeo[!dups, ]
capg2 <- capg[capg$lon > min_lon & capg$lon< max_lon & capg$lat > min_lat & capg$lat < max_lat & capg$year>=2006 & capg$coordinateUncertaintyInMeters <1000, ]
capg2<-data.frame(capg2$lon,capg2$lat)
colnames(capg2)<-c("Longitude", "Latitude")
pyr<-subset(lpi, Binomial == binomial & Specific_location==1 ) #10716 v.near two other populations #record 11470 had wrong longitude - in Russia!
pyrs<-pyr[,c("Longitude","Latitude")]
capg2<-rbind(capg2, pyrs)
capg2<-na.omit(capg2)
capg2$presence<-rep(1)
capg2$ID<-1:nrow(capg2)
capc<-as.matrix(capg2[ , c( "Longitude","Latitude", "presence")])
capc<-matrix(capc[complete.cases(capc)], ncol=3)
xy<-as.matrix(capc[,c(1,2)])
df<-data.frame(capc[,3])
write.csv(df, paste(species_directory,"/", binomial,"_gbif.csv", sep=""), row.names = FALSE)
write.csv(xy, paste(species_directory,"/",binomial, "_locs.csv", sep=""), row.names=FALSE)library(sp)
library(raster)
df<-read.csv(paste(species_directory, "/",binomial, "_gbif.csv", sep=""))
xy<-read.csv(paste(species_directory,"/" ,binomial, "_locs.csv", sep=""))
sp<-sp:::SpatialPointsDataFrame(coords=xy, data=df)
sp<-sp[sp@coords[,1] > min_lon & sp@coords[,1]< max_lon &sp@coords[,2] >min_lat & sp@coords[,2] <max_lat,]
e<-raster:::extent(sp)
e_big<-e+2
maps:::map('world', col="light grey", fill=T, xlim=c(e_big[1],e_big[2]), ylim=c(e_big[3],e_big[4]))
points(sp, col="red", pch=20)if (Europe_only){
lf<-list.files(paste(wd, "/Bioclim/", sep=""))
} else{
lf<-list.files(paste(wd, "/Bioclim/Global", sep=""))
}
first<-which(grepl("2006_bioclim", lf)) #was initially 1985
last<-which(grepl("2016_bioclim", lf))
if (Europe_only){
all_years<-stack(paste(wd, "/Bioclim/",lf[first:last], sep=""))
} else{
all_years<-stack(paste(wd, "/Bioclim/Global/",lf[first:last], sep=""))
}bios<-seq(1,nlayers(all_years), by=19)
cellStats(all_years[[bios]], stat="mean")## X2006_bioclim_variable_stack.1 X2007_bioclim_variable_stack.1
## 8.420860 8.533780
## X2008_bioclim_variable_stack.1 X2009_bioclim_variable_stack.1
## 8.958709 8.426517
## X2010_bioclim_variable_stack.1 X2011_bioclim_variable_stack.1
## 8.716551 9.016502
## X2012_bioclim_variable_stack.1 X2013_bioclim_variable_stack.1
## 8.794914 8.714704
## X2014_bioclim_variable_stack.1 X2015_bioclim_variable_stack.1
## 9.044250 9.075093
## X2016_bioclim_variable_stack.1
## 9.069219
#creating a 1985-2016 average of each bioclim variable
for (i in 1:19){
layers<-bios
bio_layer<-mean(all_years[[layers]])
if (Europe_only){
writeRaster(bio_layer, paste(wd, "/Bioclim/Bio_",i,"_2006_2016_average.tif",sep=""), overwrite=TRUE)
} else {
writeRaster(bio_layer, paste(wd, "/Bioclim/Global/Bio_",i,"_2006_2016_average.tif",sep=""), overwrite=TRUE)
}
bios<-bios+1
#print(layers)
}
#pred_nf<-stack(paste(wd, "/Bioclim/Bio_", bio_layer_pred,"_1985_2016_average.tif",sep="" ))hyde<-stack(paste(wd,"/Hyde_Interpolated.tif", sep=""))
names(hyde)<-paste("Hyde_Year", 1950:2005, sep="_")
hyde_resample<-raster:::resample(hyde, all_years)
if (Europe_only){
writeRaster(hyde_resample, "hyde_resample_europe.tif")
} else {
writeRaster(hyde_resample, "hyde_resample_global.tif")
}Selecting out the bioclim layers I’m interested in:
if (Europe_only){
hyde_files<-list.files(paste(wd,"/hyde_europe/", sep=""))
hyde_resample<-stack(paste(wd,"/hyde_europe/", hyde_files,sep=""))
pred_nf<-stack(paste(wd, "/Bioclim/Bio_", bioclim_layers,"_2006_2016_average.tif",sep="" ))
} else {
hyde_files<-list.files(paste(wd,"/hyde_global/", sep=""))
hyde_resample<-stack(paste(wd,"/hyde_global/", hyde_files,sep=""))
pred_nf<-stack(paste(wd, "/Bioclim/Global/Bio_", bioclim_layers,"_2006_2016_average.tif",sep="" ))
}
pred_nf<-stack(pred_nf, hyde_resample[[nlayers(hyde_resample)]])
pred_crop<-crop(pred_nf, e_iucn)
plot(pred_crop)pred_ex<-extract(pred_crop, sp)
bad_sp<-which(is.na(pred_ex[,1]))
spdf<-data.frame(sp)
spdf<-spdf[-bad_sp,]Creating the testing and training datasets
Using K=4 so 75% of points are used for training and 25% for testing.
library(dismo)
set.seed(10)
group_pres<-kfold(spdf, k)
if (!file.exists(paste(species_directory, "k_folds_presence_2006.csv", sep="/"))){
write.csv(group_pres, paste(species_directory, "k_folds_presence_2006.csv", sep="/"))
}
group_pres<-read.csv(paste(species_directory, "k_folds_presence_2006.csv", sep="/"))
group_pres<-group_pres[,-1] #all presence points
sp_train<-sp[group_pres!=1,] #presence points split into training and test
sp_test<-sp[group_pres ==1,]targetRecords <- occ_search(
scientificName=c(
'Dama_dama',
'Alces_alces',
'Capreolus capreolus',
'Sus scrofa',
'Martes_martes',
'Meles_meles',
"Vulpes_vulpes",
"Mustela_erminea",
"Lutra_lutra"
),
hasCoordinate=TRUE,
year='2000,2016',
hasGeospatialIssue=FALSE,
fields='minimal',
decimalLongitude='-11,33',
decimalLatitude='35,68',
limit=11000
)
if (exists('targetSites')) rm(targetSites)
for (i in 1:length(targetRecords)) {
thisGenusCoords <- data.frame(
longitude=targetRecords[[i]]$data$decimalLongitude,
latitude=targetRecords[[i]]$data$decimalLatitude
)
targetSites <- if (exists('targetSites')) {
rbind(targetSites, thisGenusCoords)
} else {
thisGenusCoords
}
}
targetEnv <- as.data.frame(extract(pred_crop, targetSites))
outside <- which(is.na(rowSums(targetEnv)))
targetSites <- targetSites[-outside, ]
targetEnv <- targetEnv[-outside, ]
targetBg <- cbind(targetSites, targetEnv)
names(targetBg)[1:2] <- c('longitude', 'latitude')
nrow(targetBg)
targetBg <- targetBg[sample(1:nrow(targetBg), 10000), ]
nrow(targetBg)
#write.csv(targetBg, paste(species_directory,"background_data_et_ungulates.csv", sep="/"))bg_pse<-read.csv(paste(species_directory,"background_data_et_ungulates.csv", sep="/"))
bg_pse<-bg_pse[,-1]
k<-4
targetBg_k<-kfold(bg_pse, k)
#write.csv(targetBg_k, paste(species_directory, "kfolds_background_data_et_ungulates.csv", sep="/"))
group_back<-read.csv(paste(species_directory,"kfolds_background_data_et_ungulates.csv", sep="/"))
group_back<-group_back[,-1]sp_backg_train <- bg_pse[group_back != 1,1:2] #background points split into training and test
sp_backg_test <- bg_pse[group_back == 1,1:2]
colnames(sp_backg_train)<-c("lon", "lat")
colnames(sp_backg_test)<-c("lon", "lat")
sp_train<-sp_train@coords #presence training points
colnames(sp_train)<-c("lon", "lat")
train <- rbind(sp_train, sp_backg_train) #presence and background training points combined
sp_test<-sp_test@coords #presence test points
colnames(sp_test)<-c("lon", "lat")
test<-rbind(sp_test, sp_backg_test) #presence and background test points combined
pb_train <- c(rep(1, nrow(sp_train)), rep(0, nrow(sp_backg_train)))
envtrain <- extract(pred_nf, train)
envtrain <- data.frame( cbind(pa=pb_train, envtrain) ) #training points with environmental variables
envtrain<-na.omit(envtrain)
pb_test<-c(rep(1, nrow(sp_test)), rep(0, nrow(sp_backg_test)))
envtest <- extract(pred_nf, test)
envtest <- data.frame( cbind(pa=pb_test, envtest) ) #test points with environmental variables
envtest<-na.omit(envtest)
env_all<-rbind(envtrain, envtest) #all of the training and test data with environmental variables
env_pres<-extract(pred_nf, sp)
pa<-rep(1, nrow(env_pres))
env_pres<-data.frame(pa, env_pres) #presence points with environmental variables
colnames(env_pres)<-c("pa", bioclim_names, "Hyde_Year_2005")
env_pres_xy<-data.frame(sp@coords, env_pres)
env_pres_xy<-na.omit(env_pres_xy)
env_back<-extract(pred_nf, bg_pse[,1:2])
ap<-rep(0, nrow(env_back))
env_back<-data.frame(ap, env_back) #background points with environmental variables
colnames(env_back)<-c("pa",bioclim_names, "Hyde_Year_2005")
env_back_xy<-data.frame(bg_pse[,1:2], env_back)
env_back_xy<-na.omit(env_back_xy)
group_pres<-group_pres[1:nrow(env_pres_xy)]
group_back<-group_back[1:nrow(env_back_xy)]Bioclim
Running the Bioclim envelope model and evaluating it using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.
evl_bc<- list()
tss_bc<-list()
for (i in 1:k){
pres_train<-sp[group_pres!=i,]
pres_test<-sp[group_pres==i,]
backg_test<-bg_pse[group_back==i,1:2]
bc <- bioclim(pred_nf,pres_train)
evl_bc[[i]] <- dismo:::evaluate(pres_test, backg_test, bc,pred_nf,type="response")#test presence, test absence, model, predictor variables
#print(i)
}
auc_bc <- sapply( evl_bc, function(x){slot(x, "auc")} )
print(auc_bc)## [1] 0.6220573 0.6098527 0.6265069 0.6392110
bc_auc<-mean(auc_bc)GAM
Running a GAM and evaluating it to get an AUC value using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.
#library(mgcv)
evl_gam<- list()
tss_gam<-list()
for (i in 1:k){
pres_train<-env_pres_xy[group_pres!=i ,-c(1,2)]
pres_test<-env_pres_xy[(group_pres==i) ,-c(1,2)]
back_test<-env_back_xy[(group_back==i),-c(1,2)]
back_train<-env_back_xy[(group_back!=i),-c(1,2)]
envtrain<-rbind(pres_train, back_train)
gm1<-mgcv:::gam(pa~ s(Bio_2_2006_2016_average)+s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+
s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+ s(Bio_10_2006_2016_average)+
s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), family = binomial(link = "logit"),data=envtrain)
evl_gam[[i]] <- dismo:::evaluate(p = pres_test, a = back_test,model= gm1,type="response")
}
auc_gam <- sapply( evl_gam, function(x){slot(x, "auc")} )
print(auc_gam)## [1] 0.8656110 0.8649390 0.8606699 0.8603572
gam_auc<-mean(auc_gam)Random Forest
Running a random forest model and evaluating it using three different thresholds (kappa, no omission and true skill statistic) to get AUC values.
model<-pa ~ Bio_2_2006_2016_average+Bio_4_2006_2016_average+
Bio_5_2006_2016_average+ Bio_6_2006_2016_average+ Bio_7_2006_2016_average+
Bio_10_2006_2016_average+ Bio_11_2006_2016_average + Hyde_Year_2005
evl_rf<- list()
for (i in 1:k){
pres_train<-env_pres_xy[group_pres!=i,-c(1,2)]
pres_test<-env_pres_xy[(group_pres==i) ,-c(1,2)]
back_test<-env_back_xy[(group_back==i),-c(1,2)]
back_train<-env_back_xy[group_back !=i,-c(1,2)]
envtrain<-rbind(pres_train, back_train)
rf1 <- randomForest(model, data=envtrain)
evl_rf[[i]] <- dismo:::evaluate(pres_test, back_test, rf1,type="response")
}
auc_rf <- sapply( evl_rf, function(x){slot(x, "auc")} )
print(auc_rf)## [1] 0.9225918 0.9254292 0.9267000 0.9205677
rf_auc<-mean(auc_rf)Ensemble Model
Creating a weighted (based on AUC) ensemble average suitability model for 2006-2016 from the Bioclim, GAM and Random Forest models. This will be used to predict habitat suitability for Alpine ibex for each year 1950-2016.
#total models
library(randomForest)
library(boot)
bc <- bioclim(pred_crop, sp)
gm1<-gam(pa~ s(Bio_2_2006_2016_average)+s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+
s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+ s(Bio_10_2006_2016_average)+
s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), family = binomial(link = "logit"),data=env_all)
rf1 <- randomForest(model, data=env_all)
pb <- predict(pred_crop, bc, ext=e_iucn, progress='') #bioclim predict
pg <- predict(pred_crop, gm1, ext=e_iucn) #gam predict
pgl<-raster:::calc(pg, fun=function(x){ exp(x)/(1+exp(x))}) #backtransforming from logit space
pr <- predict(pred_crop, rf1, ext=e_iucn) #random forest predict
models <- stack(pb, pgl, pr)
names(models) <- c("bioclim", "gam", "random forest")
plot(models)auc<-c(bc_auc, gam_auc, rf_auc)
w <- (auc-0.5)^2
wm <- weighted.mean( models[[c("bioclim", "gam", "random.forest")]], w)
plot(wm, main="Ensemble model")
points(sp)Weighted Threshold
abs_xy<-env_back_xy[,c(1,2)]
ens<-ensemble_evaluate(sp,abs_xy , wm)
thresh<-threshold(ens, stat="spec_sens")
thresh## [1] 0.4061065
Annual Habitat Suitability Predictions
Using the ensemble model to predict suitability for each year - 1950-2016 and for each year creating a binary presence/absence map based on a three different thresholding techniques. I will go forward using true skill statistic based on Allouche 2006.
bc <- bioclim(pred_nf, sp)
gm1<-mgcv:::gam(pa~ s(Bio_2_2006_2016_average)+ s(Bio_4_2006_2016_average)+ s(Bio_5_2006_2016_average)+ s(Bio_6_2006_2016_average)+ s(Bio_7_2006_2016_average)+s(Bio_10_2006_2016_average)+ s(Bio_11_2006_2016_average) + s(Hyde_Year_2005), data=env_all)
rf1 <- randomForest:::randomForest(model, data=env_all)
for (i in 1:length(years)){
if (Europe_only){
pred_nf<-stack(paste(wd, "/Bioclim/", years[i], "_bioclim_variable_stack.tif", sep="" ))
}else{
pred_nf<-stack(paste(wd, "/Bioclim/Global/", years[i], "_bioclim_variable_stack.tif", sep="" ))
}
pred_nf<-pred_nf[[bioclim_layers]]
names(pred_nf)<-bioclim_names
hyde_yr<-raster(paste(wd, "/hyde/Hyde_Year_", years[i], ".tif",sep=""))
names(hyde_yr)<-"Hyde_Year_2005"
pred_nf<-stack(pred_nf, hyde_yr)
pb <- predict(pred_nf, bc, ext=e_big, progress='')
pg <- predict(pred_nf, gm1, ext=e_big) #gam predict
pgl<-raster:::calc(pg, fun=function(x){ exp(x)/(1+exp(x))})
pr <- predict(pred_nf, rf1, ext=e_big)
models <- stack(pb, pgl, pr)
names(models) <- c("bioclim", "gam", "random forest")
wm <- weighted.mean( models[[c("bioclim", "gam", "random.forest")]], w)
pa_sss<-wm>thresh
writeRaster(wm , paste(sdm_folder, "/hyde_weighted_ensemble_sdm_", years[i], ".tif", sep=""), overwrite=TRUE)
writeRaster(pa_sss , paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years[i], ".tif", sep=""), overwrite=TRUE)
plot(pa_sss, main=years[i])
}Trends in habitat suitability 1950-2016
Patches
Plots of the number of cells with predicted presence and number of patches (contiguous presence) over 1950-2016.
Demoniche
Formatting the population occurrence points
Formatting the LPI population data for use in Demoniche, they start as the “seed” populations. Might be better to use GBIF data for this - unrealistic that the LPI populations are the only existing populations in 1950 - alternatively a historical map of where the ibex were in 1950?
xy<-read.csv(paste(species_directory,"/" ,binomial, "_locs.csv", sep=""))
#patch<-stack(paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years[1],".tif", sep=""))
sdm_stack<-stack(paste(sdm_folder, "/hyde_weighted_ensemble_sdm_", years,".tif", sep=""))
patch_stack<-stack(paste(sdm_folder, "/hyde_pres_abs_sss_weighted_ensemble_sdm_", years,".tif", sep=""))
pyr<-subset(lpi, Binomial ==binomial & Specific_location==1& Region =="Europe") #record 11470 had wrong longitude - in Russia!
pops<-pyr[,c(65:120)]
pop_counts <- (pops !="NULL")
points_per_pop1950_2005 = rowSums(pop_counts)
pyr<-pyr[-which(points_per_pop1950_2005<5),]
pops<-pops[-which(points_per_pop1950_2005<5),]
pops<-data.frame(pyr$ID, pops)
colnames(pops)<-c("ID", 1950:2005)
#formatting the lpi data for use in demoniche
pyrs<-pyr[,c("ID","Longitude","Latitude")]
xy_lpi<-data.frame(pyrs$Longitude, pyrs$Latitude)
sdm_lpi<-extract(sdm_stack, xy_lpi)
colnames(sdm_lpi)<-1950:2005
sdm_lpi_melt<-melt(sdm_lpi)
colnames(sdm_lpi_melt)<-c("ID", "Year", "HSI")
sdm_lpi_melt$ID<-rep(pyrs$ID, length(1950:2005))
ggplot(data = sdm_lpi_melt, aes(x = Year, y= HSI, group=ID))+
geom_line()+
geom_smooth()+
facet_grid(.~ID)lambda<-function(x){
lambdas<-diff(log10(as.numeric(x)))
}
sdm_lambdas<-t(apply(sdm_lpi,1,lambda))
colnames(sdm_lambdas)<-1950:2004
sdm_lambdas_melt<-melt(sdm_lambdas)
colnames(sdm_lambdas_melt)<-c("ID", "Year", "HSI")
sdm_lambdas_melt$ID<-rep(pyrs$ID, length(1950:2004))
ggplot(data = sdm_lambdas_melt, aes(x = Year, y= HSI, group=ID))+
geom_line()+
geom_smooth()+
facet_grid(.~ID)Adding in the LPI data
colnames(pops)[2:ncol(pops)]<-1950:2005
melt_lpi<-data.table:::melt(pops, id.vars = "ID")## Warning: attributes are not identical across measure variables; they will
## be dropped
colnames(melt_lpi)<-c("ID", "Year","HSI")
melt_lpi$Year<-as.numeric(as.character(melt_lpi$Year))
melt_lpi$HSI<-as.numeric(melt_lpi$HSI)## Warning: NAs introduced by coercion
ggplot(data = melt_lpi, aes(x = Year, y= HSI, group=ID))+
geom_line()+
geom_smooth()+
facet_grid(.~ID)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 188 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1951.8
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 22.16
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 103.23
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small.
## fewer data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 1951.8
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 22.16
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 103.23
## Warning: Removed 152 rows containing missing values (geom_path).
library(taRifx)
library(plyr)
library(mgcv)
pops<-pyr[,c(1,65:120)]
colnames(pops)[2:ncol(pops)]<-paste("Year", 1950:2005, sep="_")
pops[pops=="NULL"]<-NA
#popsm<-as.matrix(pops)
lambda_lpi<-function(x){
#subsetting the population data by each population
spid = x[2:(length(x))] #subsetting only the dates
names(spid)<-1950:2005 #renaming the date column names as R doesn't like numbered column names
spid<-as.numeric(spid)
pop_datab <- (!is.na(spid) )
points = sum(pop_datab)
id<-x[1]
id<-as.numeric(id)
Date<-1950:2005
spidt<-destring(t(spid))
time<-length(min(which(!is.na(spidt))):max(which(!is.na(spidt))))
missing<-time-points
Year<-Date[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
Population<-spidt[min(which(!is.na(spidt))):max(which(!is.na(spidt)))]
Population[Population == 0] <- mean(Population, na.rm=TRUE)*0.01 #if a population is zero one year thhis is replaced with 1% of the average population estimate - because you can log zeros
df<-data.frame(Year,Population)
PopN = log10(df$Population)
if (points >=6) {
if (length(na.omit(PopN)) >=6) {
SmoothParm = round(length(na.omit(PopN))/2)
} else {
SmoothParm=3
}
mg2<-mgcv:::gam(PopN ~ s(Year, k=SmoothParm), fx=TRUE)
pv2 <- predict(mg2,df,type="response",se=TRUE)
R_sq2<-summary(mg2)$r.sq
model<-1
pv2$fit[pv2$fit <= 0] <- NA
lambda2<-diff(pv2$fit)
ial<-data.frame(id, Year[-length(Year)], lambda2)
colnames(ial)<-c("ID", "Year", "Lambda")
} else{
lint<-na.approx(PopN)
lint[lint<=0] <- NA
lambda2<-diff(lint)
ial<-data.frame(id, Year[-length(Year)], lambda2)
colnames(ial)<-c("ID", "Year", "Lambda")
}
return(ial)
}
lambda_lpi_r<-apply(pops, 1, lambda_lpi)
lambda_r<-do.call( "rbind", lambda_lpi_r)
lambda_r<-lambda_r[lambda_r$Year <=2005,]
fill<-data.frame(rep(pops$ID, each=length(1950:2005)), 1950:2005)
colnames(fill)<-c("ID", "Year")
all_year_ab<-join(fill, lambda_r, type="left")## Joining by: ID, Year
ggplot(data = all_year_ab, aes(x = Year, y= Lambda, group=ID))+
geom_line()+
geom_smooth()+
facet_grid(.~ID)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 157 rows containing non-finite values (stat_smooth).
## Warning: Removed 157 rows containing missing values (geom_path).
Predicted and Observed together
ggplot()+
geom_smooth(data = sdm_lambdas_melt, aes(x = Year, y= HSI, group=ID), colour = "black")+
geom_smooth(data = all_year_ab, aes(x = Year, y= Lambda, group=ID), colour="red")+
#geom_line(data = gam_r_lambda, aes(x =Year, y = Abundance, group=sp_lpi.ID), colour = "blue" )+
facet_grid(.~ID)## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 157 rows containing non-finite values (stat_smooth).