the vector d is the distance to water (meters) value for a randomly selected site Each value corresponds to one of the 26 10x10-meters cells that make up this site
npudistbw() - computes a bandwidth object for a kernel cumulative distribution estimator
npudist() - computes kernel unconditional cumulative distribution estimates on evaluation data
## Warning: package 'np' was built under R version 3.1.3
d <- c(21.3,31.9, 42.5, 15.0, 23.8, 33.6, 43.8, 54.2, 23.8, 30.1, 38.3, 47.6, 57.3,
30.1, 38.3, 45.1, 53.2, 62.0, 38.3, 45.1, 53.2, 60.2,68.1, 53.2, 60.2, 68.1)
d <- round(d,0)
bwDist <- npudistbw(d) # compute bandwidth for CDF
fDist <- npudist(bws=bwDist) # fit bandwidth to d
pDist <- fitted(fDist) # get the fitted values for d < x
print(pDist) # print fitted cumulative probabilities
## [1] 0.11478027 0.27101183 0.46013709 0.06085512 0.15029552 0.30621965
## [7] 0.50052552 0.69880705 0.15029552 0.23761346 0.38109036 0.58154037
## [13] 0.75302246 0.23761346 0.38109036 0.52080736 0.67993579 0.83296168
## [19] 0.38109036 0.52080736 0.67993579 0.80277094 0.90704296 0.67993579
## [25] 0.80277094 0.90704296
plot(bwDist) # plot CDF
# inefficient routine to simualte data points for +/- 10 std devs on either side of site data mean
meanX1 <- round(mean(d),1) # get mean
sdX1 <- round(sd(d),1) # get SD
simX1v <- meanX1 # start simulated data with mean
for(i in 1:10){ # loop to add 10 positive StdDevs
simX1 <- meanX1 + sdX1*i
simX1v <- c(simX1v, simX1)# add sim data to right of mean
}
simX1vneg <- NULL
for(i in 10:1){ # loop to get 10 negative StdDevs
simX1 <- meanX1 - sdX1*i
simX1vneg <- c(simX1vneg, simX1)
}
simData <- c(simX1vneg, simX1v)# add sim data to left of mean
simDataMin <- simData-2 # create interval for min data
simDataMax <- simData+2 # create interval for max data
#
print(simDataMin) # this is what it looks like
## [1] -105.3 -90.6 -75.9 -61.2 -46.5 -31.8 -17.1 -2.4 12.3 27.0
## [11] 41.7 56.4 71.1 85.8 100.5 115.2 129.9 144.6 159.3 174.0
## [21] 188.7
print(simDataMax) # this is what it looks like
## [1] -101.3 -86.6 -71.9 -57.2 -42.5 -27.8 -13.1 1.6 16.3 31.0
## [11] 45.7 60.4 75.1 89.8 104.5 119.2 133.9 148.6 163.3 178.0
## [21] 192.7
# min value fit
fDistSIMmin <- npudist(bws=bwDist, edat=simDataMin) # fit CDF to sim data using empirical bws object
pDistSIMmin <- fitted(fDistSIMmin) # get fitted probabilities for sim data
# max value fit
fDistSIMmax <- npudist(bws=bwDist, edat=simDataMax) # fit CDF to sim data using empirical bws object
pDistSIMmax <- fitted(fDistSIMmax) # get fitted probabilities for sim data
print(pDistSIMmin)
## [1] 0.000000e+00 0.000000e+00 6.405133e-18 1.624278e-13 7.044434e-10
## [6] 5.839376e-07 9.888771e-05 3.879141e-03 4.366113e-02 1.913758e-01
## [11] 4.541132e-01 7.425020e-01 9.351615e-01 9.938397e-01 9.998380e-01
## [16] 9.999990e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## [21] 1.000000e+00
print(pDistSIMmax)
## [1] 0.000000e+00 0.000000e+00 1.259676e-16 1.873068e-12 5.155760e-09
## [6] 2.750621e-06 3.080042e-04 8.386330e-03 7.064183e-02 2.540723e-01
## [11] 5.350114e-01 8.090113e-01 9.618188e-01 9.973721e-01 9.999532e-01
## [16] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## [21] 1.000000e+00
simPlot <- data.frame(simMin = simDataMin, simMax = simDataMax,
Min = pDistSIMmin, max = pDistSIMmax)
# plot the CDFs of min (blue) and max (orange), red line is empirical mean
ggplot(simPlot, aes(x = simMin, y=pDistSIMmin)) +
geom_line(color = "blue") + geom_vline(xintercept = meanX1, color="red") +
geom_line(aes(x = simMax, y = pDistSIMmin), color="orange")
# plot the probability of Loi realization across all simulated Loi observations
prob <- pDistSIMmax - pDistSIMmin
simProb <- data.frame(simData = simData, Probability = prob)
ggplot(simProb, aes(x = simData, y=prob)) + geom_line() + geom_vline(xintercept = meanX1, color="red")
# loop the fitting of simulated Loi observations acorss shifts from 1 to 10 integers
shifts <- 10
Probs <- matrix(ncol = shifts, nrow = length(simData))
cdfVecs <- NULL
for(i in 1:shifts){
shiftFactor = i
simDataMin <- simData - shiftFactor # create interval for min data
simDataMax <- simData + shiftFactor # create interval for max data
pDistSIMmin <- fitted(npudist(bws=bwDist, edat=simDataMin))
pDistSIMmax <- fitted(npudist(bws=bwDist, edat=simDataMax))
prob <- pDistSIMmax - pDistSIMmin
Probs[,i] <- prob
cdf <- cbind(rep(i,length(simData)),pDistSIMmin,pDistSIMmax, simData)
cdfVecs <- rbind(cdfVecs, cdf)
}
# Plot probability of simulated Loi at different shifts
probsMelt <- melt(Probs)
colnames(probsMelt) <- c("index", "shift", "Probability")
ggplot(probsMelt, aes(x = rep(simData, times=shifts), y=Probability, group=factor(shift), color=factor(shift))) +
geom_line() + geom_vline(xintercept = mean(simData), color="red")
# Plot CDFs of simulated Loi at different shifts
cdfVecs <- data.frame(cdfVecs)
colnames(cdfVecs) <- c("shift", "cdfMin", "cdfMax", "simData")
ggplot(cdfVecs, aes(x=simData, y = cdfMin, group=shift, color=factor(shift) )) +
geom_line() +
geom_line(aes(x = simData, y = cdfMax,group=shift, color=factor(shift))) +
geom_vline(xintercept = mean(simData), color="red")