npudist

Run npudistbw() & npudist()

the vector d is the distance to water (meters) value for a randomly selected site Each value corresponds to one of the 26 10x10-meters cells that make up this site

npudistbw() - computes a bandwidth object for a kernel cumulative distribution estimator

npudist() - computes kernel unconditional cumulative distribution estimates on evaluation data

## Warning: package 'np' was built under R version 3.1.3
d <-  c(21.3,31.9, 42.5, 15.0, 23.8, 33.6, 43.8, 54.2, 23.8, 30.1, 38.3, 47.6, 57.3,
        30.1, 38.3, 45.1, 53.2, 62.0, 38.3, 45.1, 53.2, 60.2,68.1, 53.2, 60.2, 68.1)
d <- round(d,0)
bwDist <- npudistbw(d)        # compute bandwidth for CDF
fDist <- npudist(bws=bwDist)  # fit bandwidth to d
pDist <- fitted(fDist)        # get the fitted values for d < x
print(pDist)                  # print fitted cumulative probabilities
##  [1] 0.11478027 0.27101183 0.46013709 0.06085512 0.15029552 0.30621965
##  [7] 0.50052552 0.69880705 0.15029552 0.23761346 0.38109036 0.58154037
## [13] 0.75302246 0.23761346 0.38109036 0.52080736 0.67993579 0.83296168
## [19] 0.38109036 0.52080736 0.67993579 0.80277094 0.90704296 0.67993579
## [25] 0.80277094 0.90704296
plot(bwDist)                  # plot CDF

Simulate a bunch of data points (Loi) that lie at standard deviation distances on either side of site data mean. Then use a shift of 2 for min and max values

# inefficient routine to simualte data points for +/- 10 std devs on either side of site data mean
meanX1 <- round(mean(d),1)  # get mean
sdX1 <- round(sd(d),1)      # get SD
simX1v <- meanX1            # start simulated data with mean
for(i in 1:10){             # loop to add 10 positive StdDevs
  simX1 <- meanX1 + sdX1*i
  simX1v <- c(simX1v, simX1)# add sim data to right of mean
}
simX1vneg <- NULL
for(i in 10:1){             # loop to get 10 negative StdDevs
  simX1 <- meanX1 - sdX1*i
  simX1vneg <- c(simX1vneg, simX1)
}
simData <- c(simX1vneg, simX1v)# add sim data to left of mean 
simDataMin <- simData-2  # create interval for min data
simDataMax <- simData+2  # create interval for max data
#
print(simDataMin) # this is what it looks like
##  [1] -105.3  -90.6  -75.9  -61.2  -46.5  -31.8  -17.1   -2.4   12.3   27.0
## [11]   41.7   56.4   71.1   85.8  100.5  115.2  129.9  144.6  159.3  174.0
## [21]  188.7
print(simDataMax) # this is what it looks like
##  [1] -101.3  -86.6  -71.9  -57.2  -42.5  -27.8  -13.1    1.6   16.3   31.0
## [11]   45.7   60.4   75.1   89.8  104.5  119.2  133.9  148.6  163.3  178.0
## [21]  192.7

Get fitted values of density and cumulative probability using bws objects created above and sim data

# min value fit
fDistSIMmin <- npudist(bws=bwDist, edat=simDataMin) # fit CDF to sim data using empirical bws object
pDistSIMmin <- fitted(fDistSIMmin)                  # get fitted probabilities for sim data
# max value fit
fDistSIMmax <- npudist(bws=bwDist, edat=simDataMax) # fit CDF to sim data using empirical bws object
pDistSIMmax <- fitted(fDistSIMmax)                  # get fitted probabilities for sim data

print(pDistSIMmin)
##  [1] 0.000000e+00 0.000000e+00 6.405133e-18 1.624278e-13 7.044434e-10
##  [6] 5.839376e-07 9.888771e-05 3.879141e-03 4.366113e-02 1.913758e-01
## [11] 4.541132e-01 7.425020e-01 9.351615e-01 9.938397e-01 9.998380e-01
## [16] 9.999990e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## [21] 1.000000e+00
print(pDistSIMmax)
##  [1] 0.000000e+00 0.000000e+00 1.259676e-16 1.873068e-12 5.155760e-09
##  [6] 2.750621e-06 3.080042e-04 8.386330e-03 7.064183e-02 2.540723e-01
## [11] 5.350114e-01 8.090113e-01 9.618188e-01 9.973721e-01 9.999532e-01
## [16] 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## [21] 1.000000e+00
simPlot <- data.frame(simMin = simDataMin, simMax = simDataMax,
                      Min = pDistSIMmin, max = pDistSIMmax)
# plot the CDFs of min (blue) and max (orange), red line is empirical mean
ggplot(simPlot, aes(x = simMin, y=pDistSIMmin)) + 
  geom_line(color = "blue") + geom_vline(xintercept = meanX1, color="red") +
  geom_line(aes(x = simMax, y = pDistSIMmin), color="orange")

# plot the probability of Loi realization across all simulated Loi observations
prob <- pDistSIMmax - pDistSIMmin
simProb <- data.frame(simData = simData, Probability = prob)
ggplot(simProb, aes(x = simData, y=prob)) + geom_line() + geom_vline(xintercept = meanX1, color="red")

See what different sensitivities do for the probability

# loop the fitting of simulated Loi observations acorss shifts from 1 to 10 integers
shifts <- 10
Probs <- matrix(ncol = shifts, nrow = length(simData))
cdfVecs <- NULL
for(i in 1:shifts){
  shiftFactor = i
  simDataMin <- simData - shiftFactor  # create interval for min data
  simDataMax <- simData + shiftFactor  # create interval for max data
  pDistSIMmin <- fitted(npudist(bws=bwDist, edat=simDataMin))                  
  pDistSIMmax <- fitted(npudist(bws=bwDist, edat=simDataMax)) 
  prob <- pDistSIMmax - pDistSIMmin
  Probs[,i] <- prob
  cdf <- cbind(rep(i,length(simData)),pDistSIMmin,pDistSIMmax, simData)
  cdfVecs <- rbind(cdfVecs, cdf)
}

# Plot probability of simulated Loi at different shifts
probsMelt <- melt(Probs)
colnames(probsMelt) <- c("index", "shift", "Probability")
ggplot(probsMelt, aes(x = rep(simData, times=shifts), y=Probability, group=factor(shift), color=factor(shift))) + 
  geom_line() + geom_vline(xintercept = mean(simData), color="red")

# Plot CDFs of simulated Loi at different shifts
cdfVecs <- data.frame(cdfVecs)
colnames(cdfVecs) <- c("shift", "cdfMin", "cdfMax", "simData")
ggplot(cdfVecs, aes(x=simData, y = cdfMin, group=shift, color=factor(shift) )) + 
  geom_line()  + 
  geom_line(aes(x = simData, y = cdfMax,group=shift, color=factor(shift))) +
  geom_vline(xintercept = mean(simData), color="red")