Bootstrapping the full data with REM measures.
Version 12
This script looks at effort for number of days AND cams AND squares. Output is in three 3D arrays: stat_lcl, stat_mean and stat_ucl
#Define the size of the bootstrap, how often data are sampled, and the number of repeats, in multiples of 10
rm(list=ls())
nboots<-500
allrepeat <-1000 # this is the number of times the subsample is taken
#Set the working directory, and the number of sites and cameras to keep in the dataset input.
setwd("XXX")
nsquares <- XX
ncams <- XX
dd <- read.csv("XXX.csv",header=T,stringsAsFactors=F)
colnames(dd)[8]<-"theta"
#Add to this dataframe the increment of Density from the REM attributable to each capture
# time: 24 hours per camera day
tm<- XX
# velocity (Hare movement per day in km?)
v <- XX
dens <- (dd$captures/tm)*pi/(v*dd$r*(2+dd$theta))
# convert NA density to zero
dens[is.na(dens)]<- 0
# add to original dataframe
d2 <- cbind(dd,dens)
# drop the incomplete(?) squares numbers >13
d2<- d2[d2$square <= nsquares,]
head(d2)
# summary : check mean dens for sanity
print(summary(d2))
#There can be repeated rows of d2 for the same date,square and camera. Take the mean of these for use in bootstrapping. Need means for some things, sums for others
z1<-aggregate(x=d2[,2:9],by=list(date2=d2$date,sq=d2$square,camera=d2$cam),mean,na.rm=TRUE)
z2<-aggregate(x=d2[,2:9],by=list(date2=d2$date,sq=d2$square,camera=d2$cam),sum,na.rm=TRUE)
head(z1);head(z2)
# keep captures and density from the sum aggregate (z2)
z3<-z1;z3[,c("captures","dens")]<-z2[,c("captures","dens")]
# Check out the data
for(i in unique(z3$sq)){
print(i)
print(dim(z3[z3$sq==i,]))
print(z3[z3$sq==i,])
print("_________________________________")
}
z4 <- cbind(z3,dayofyear=julian(as.Date(z3$date,'%d/%m/%Y'),origin = as.Date("2013-01-01")),month=months(as.Date(z3$date,'%d/%m/%Y')) )
z5<- z4[order(z4$dayofyear),]
lookup<-rep(NA,310)
lookup[unique(z5$dayofyear)]<-order(unique(z5$dayofyear))
z6<-cbind(z5,dayseq=lookup[z5$dayofyear])
# this is the main dataframe used by the bootstrapping process, below
dframe <- z6
# total number of days sampled in the fieldwork
workdays <- max(dframe$dayseq)
# only bootstrap up to X days effort
ndays <- 1
# the objective is to bootstrap the number of days on CI 2.5% & 97.5%
# i.e. number of days within the week for which the cameras are out
# assign storage for the output statistics in 4-D array
stat_mean<-array( dim=c(ndays,ncams,nsquares),
dimnames=c("xdays","xcams","xsquares") )
stat_lcl<-array( dim=c(ndays,ncams,nsquares),
dimnames=c("xdays","xcams","xsquares") )
stat_ucl<-array( dim=c(ndays,ncams,nsquares),
dimnames=c("xdays","xcams","xsquares") )
# storage for the sample reps
reps <- matrix(nrow=allrepeat,ncol=3); colnames(reps)<-c("lcl025","mn","ucl975")
boots <- 1:nboots
numdaytable<-table(dframe[,c("square","cam")])
numday=cbind(rep(1:13,21),sort(rep(1:21,13)),stack(as.data.frame(numdaytable)))[,1:3]
colnames(numday)<-c("square","cam","days")
#=======================================
# xdays out of ndays: this is the size of the subsample to gauge CI dependency on #days
for( xdays in 1:ndays) {
print(paste("xdays =",xdays))
ndTF <- numday$days >= xdays
dfextract <- dframe[dframe$square %in% numday[ndTF,"square"] & dframe$cam %in% numday[ndTF,"cam"],]
nr <- nrow(dfextract)
# xsquares out of nsquares: this is the size of the subsample to gauge CI dependency on #squares
for( xsquares in 1:nsquares) {
print(paste("xsquares =",xsquares))
# xcams out of ncams: same but on #cameras
for( xcams in 1:ncams ) {
print(paste("xcam =",xcams))
sampsize <- xdays*xsquares*xcams
if(sampsize <= nr) {
# do the sampling 'allrepeat' times
for(isub in 1:allrepeat) {
#print(paste("isub =",isub))
#extract a constrained sample of days (must be consecutive days)
#NB: we need one of these for each camera and each square
#lastpossday <- workdays-xdays+1
#startday <- sample(1:lastpossday,1)
#finishday <- startday+xdays-1
#day_indices <- startday:finishday
#snip out the matching bit of dataframe$dens
#extract xdays x xsquares x xcams ROWS of data
extract1 <- dfextract[sample(1:nr,sampsize),]
#print("data extracted for bootstrapping (extract1):")
#print(dim(extract1))
#print(extract1)
haredens <- extract1$dens
#print("haredens =")
#print(haredens)
#bootstrap the hare density: resample with replacement nboots times
lharedens<-length(haredens)
for(xboot in 1:nboots) {
sam_nhares <- sample(haredens,lharedens,replace=T)
boots[xboot] <- mean(sam_nhares)
}
#print(boots)
mn <- mean(boots)
lcl025 <- boots[order(boots)][nboots*0.025]
ucl975 <- boots[order(boots)][nboots*0.975]
reps[isub,] <- c(lcl025,mn,ucl975)
}
stat_lcl[xdays,xcams,xsquares] <- mean(reps[,"lcl025"])
stat_mean[xdays,xcams,xsquares] <- mean(reps[,"mn"])
stat_ucl[xdays,xcams,xsquares] <- mean(reps[,"ucl975"])
print( stat_lcl[xdays,xcams,xsquares])
print(stat_mean[xdays,xcams,xsquares])
print( stat_ucl[xdays,xcams,xsquares])
}
}
}
}
write.csv(stat_lcl, file="stat_lcl.csv")
write.csv(stat_mean, file="stat_mean.csv")
write.csv(stat_ucl, file="stat_ucl.csv")
install.packages("spatstat", dependencies = TRUE)
require(spatstat)
par(mfrow=c(3,1))
plot(im(stat_lcl[7,,])) # change the first dimension to any of 1..7 to get that number of days
plot(im(stat_mean[7,,]))
plot(im(stat_ucl[7,,]))
# end
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.