Michael S. Czahor

Non-Parametric Bootstrapping With Continuous Weights via Direchlet Distribution

Mirror/Packages


options(repos = structure(c(CRAN = 
"http://streaming.stat.iastate.edu/CRAN/")))

install.packages("gtools")
## 
##   There is a binary version available (and will be installed) but
##   the source version is later:
##        binary source
## gtools  2.7.1  3.3.0
## 
## 
## The downloaded binary packages are in
##  /var/folders/s7/b77ffsr9155dls85f90m83lc0000gn/T//RtmpMlH0UX/downloaded_packages
library(gtools)
install.packages("bootstrap")
## 
##   There is a binary version available (and will be installed) but
##   the source version is later:
##              binary    source
## bootstrap 2012.04-0 2012.04-1
## 
## 
## The downloaded binary packages are in
##  /var/folders/s7/b77ffsr9155dls85f90m83lc0000gn/T//RtmpMlH0UX/downloaded_packages
library(bootstrap)
install.packages("ggplot2")
## 
## The downloaded binary packages are in
##  /var/folders/s7/b77ffsr9155dls85f90m83lc0000gn/T//RtmpMlH0UX/downloaded_packages
library(ggplot2)

Volume Vector with mean From Meeker Hahn Statistical Intervals Chapter 14

Volume=matrix(c(0.149, 0.086, 0.149, 0.194, 0.044, 0.104, 0.156, 0.122, 
0.117, 0.079, 0.179, 0.307, 0.049,0.165, 0.043, 0.079, 0.109, 0.102,
0.195, 0.063, 0.068, 0.029, 0.079, 0.124, 0.151, 0.115, 0.023, 0.016, 0.067),1,29)
Volmean=mean(Volume)
Volmean
## [1] 0.1091

n=29 Dirichlet Weights for Volume

x=29*t(rdirichlet(1,rep(1,29)))
x
##          [,1]
##  [1,] 3.58004
##  [2,] 0.15786
##  [3,] 2.95257
##  [4,] 1.38164
##  [5,] 0.21304
##  [6,] 0.54298
##  [7,] 0.28698
##  [8,] 0.37153
##  [9,] 0.62114
## [10,] 0.28399
## [11,] 1.56792
## [12,] 0.01032
## [13,] 0.43189
## [14,] 1.44956
## [15,] 0.41586
## [16,] 0.01475
## [17,] 0.42835
## [18,] 2.16480
## [19,] 0.57255
## [20,] 0.46819
## [21,] 1.92395
## [22,] 2.05585
## [23,] 1.62374
## [24,] 0.13698
## [25,] 0.73290
## [26,] 2.20108
## [27,] 1.34061
## [28,] 0.37181
## [29,] 0.69713
sum(x) #Test to equal 29
## [1] 29

A sample mean from One bootstrap Sample Using Dirichlet Weights

dim(x)
## [1] 29  1
dim(Volume)
## [1]  1 29
(Volume%*%x)/29  #Sample Mean From one fraction weight bootstrap sample
##        [,1]
## [1,] 0.1124

Theta Assignment for Bootstrap Function

theta <- function(x){(Volume%*%x)}
results <- bootstrap(x,100,theta)
results
## $thetastar
##   [1] 4.304 3.053 4.202 3.189 3.479 2.310 3.302 2.291 3.039 2.960 4.372
##  [12] 2.767 2.801 2.247 3.359 3.516 3.509 3.678 4.524 2.774 3.065 2.279
##  [23] 3.253 3.143 3.225 3.663 2.772 3.443 3.393 2.185 3.720 2.620 3.155
##  [34] 3.939 2.409 2.568 4.021 3.049 3.949 3.301 1.913 3.834 3.893 3.100
##  [45] 3.179 2.404 3.018 3.357 2.256 3.460 2.106 2.483 4.855 3.553 3.063
##  [56] 3.255 4.072 2.676 2.799 3.630 2.784 4.042 2.980 2.912 3.529 3.464
##  [67] 2.828 3.407 3.574 1.838 3.965 2.732 2.675 3.907 2.758 2.323 2.197
##  [78] 3.676 2.420 2.417 5.021 2.812 3.689 1.992 2.566 3.963 2.208 3.707
##  [89] 2.816 3.137 3.698 2.813 3.037 3.317 3.470 2.670 2.787 4.493 4.126
## [100] 3.262
## 
## $func.thetastar
## NULL
## 
## $jack.boot.val
## NULL
## 
## $jack.boot.se
## NULL
## 
## $call
## bootstrap(x = x, nboot = 100, theta = theta)

Use Results thetastar vector to find each bootstrap sample mean. B=100 Samples

Fraction=results$thetastar/29
Ordfrac=sort(Fraction)
shapiro.test(Ordfrac)
## 
##  Shapiro-Wilk normality test
## 
## data:  Ordfrac 
## W = 0.9878, p-value = 0.4954
Ordfrac
##   [1] 0.06336 0.06595 0.06869 0.07263 0.07533 0.07575 0.07614 0.07747
##   [9] 0.07778 0.07858 0.07899 0.07965 0.08012 0.08289 0.08307 0.08334
##  [17] 0.08344 0.08564 0.08850 0.08855 0.09035 0.09207 0.09225 0.09227
##  [25] 0.09421 0.09510 0.09542 0.09559 0.09564 0.09599 0.09610 0.09653
##  [33] 0.09658 0.09698 0.09699 0.09711 0.09751 0.10041 0.10206 0.10275
##  [41] 0.10406 0.10474 0.10481 0.10515 0.10526 0.10561 0.10568 0.10688
##  [49] 0.10816 0.10837 0.10880 0.10961 0.10998 0.11121 0.11218 0.11225
##  [57] 0.11248 0.11383 0.11387 0.11438 0.11577 0.11583 0.11699 0.11749
##  [65] 0.11872 0.11930 0.11944 0.11967 0.11996 0.12100 0.12124 0.12167
##  [73] 0.12251 0.12325 0.12518 0.12631 0.12675 0.12682 0.12720 0.12752
##  [81] 0.12783 0.12828 0.13219 0.13426 0.13474 0.13584 0.13618 0.13667
##  [89] 0.13672 0.13866 0.13938 0.14042 0.14228 0.14491 0.14840 0.15077
##  [97] 0.15494 0.15599 0.16740 0.17313

Plot Continuous Weight B=100 Samples

hist(Ordfrac)

plot of chunk unnamed-chunk-7

Manually Calculate Mean B=100 Samples

allsums=sum(results$thetastar)
allsums
## [1] 317.7
FracMean=allsums/2900  
FracMean    #Bootstrap Estimate Mean with 100 samples
## [1] 0.1096
            #Note Volume mean=0.109069

** .025 and .975 Quantile Estimation**

perc025 <- function(x){quantile(x, .025)}
perc975 <- function(x){quantile(x, .975)}

Create A Confidence Interval e.g. 95%

results=bootstrap(x,100,theta, func=perc025)
results
## $thetastar
##   [1] 4.409 2.116 3.296 3.549 3.694 3.049 3.195 3.609 2.355 2.865 3.024
##  [12] 3.052 3.505 3.889 3.715 4.485 3.456 2.396 2.752 2.642 3.910 1.781
##  [23] 2.753 2.477 2.973 2.339 2.853 2.172 2.785 3.096 3.099 3.123 2.981
##  [34] 4.045 2.632 3.142 3.678 3.056 3.560 3.239 2.902 4.254 2.889 2.602
##  [45] 2.607 2.489 2.438 1.902 3.606 3.613 3.674 3.697 3.278 2.955 3.867
##  [56] 2.288 3.820 2.519 2.033 3.240 2.746 2.251 3.214 2.891 2.318 3.419
##  [67] 2.342 1.998 3.504 2.515 3.001 2.675 2.979 3.326 2.800 2.764 2.493
##  [78] 3.624 3.555 3.373 3.250 3.526 2.340 2.364 3.085 2.738 4.197 2.702
##  [89] 2.617 2.937 4.123 3.671 2.229 3.224 3.845 4.882 4.864 3.363 2.956
## [100] 2.074
## 
## $func.thetastar
##  2.5% 
## 2.015 
## 
## $jack.boot.val
##  [1] 2.066 2.303 2.116 2.198 2.124 2.224 1.902 2.010 1.976 2.123 1.986
## [12] 2.316 2.346 2.123 2.243 2.144 2.023 1.996 1.863 2.015 2.035 1.914
## [23] 2.026 2.340 1.875 1.881 1.887 1.998 2.066
## 
## $jack.boot.se
## [1] 0.7551
## 
## $call
## bootstrap(x = x, nboot = 100, theta = theta, func = perc025)
lowerconf=results$func.thetastar/29    #Lower Bound of 95%CI for Mean

results=bootstrap(x,100,theta, func=perc975)
results
## $thetastar
##   [1] 3.641 2.708 3.395 3.710 2.072 2.668 4.411 3.171 3.301 3.306 3.234
##  [12] 2.930 2.594 4.130 3.483 2.922 2.792 2.864 2.343 3.908 3.063 3.170
##  [23] 3.836 2.540 3.390 3.054 3.264 4.814 2.837 2.270 2.769 2.631 2.658
##  [34] 3.174 4.560 3.499 3.291 4.033 4.241 2.969 3.268 3.547 2.772 3.302
##  [45] 2.681 3.602 3.309 2.430 2.779 3.577 3.111 3.061 3.188 3.580 1.928
##  [56] 2.991 4.036 4.122 3.840 3.377 3.776 2.741 3.908 3.037 3.804 3.423
##  [67] 3.806 2.524 4.359 3.510 3.671 3.474 2.537 3.906 2.524 2.905 2.440
##  [78] 2.613 3.484 2.333 3.510 2.592 2.378 3.545 3.694 3.387 3.619 2.935
##  [89] 4.696 2.376 2.342 3.442 2.831 2.501 2.443 2.878 3.563 3.062 4.387
## [100] 3.685
## 
## $func.thetastar
## 97.5% 
## 4.489 
## 
## $jack.boot.val
##  [1] 3.632 4.441 4.385 4.256 4.702 4.437 4.387 4.393 4.405 4.429 3.967
## [12] 4.731 4.598 3.956 4.439 4.385 4.287 4.199 4.699 4.143 4.232 4.362
## [23] 4.441 4.221 4.584 4.475 4.130 4.731 4.567
## 
## $jack.boot.se
## [1] 1.293
## 
## $call
## bootstrap(x = x, nboot = 100, theta = theta, func = perc975)
upperconf=results$func.thetastar/29    #Upper Bound of 95%CI for Mean

Confidence Interval–Boot Mean—Original Mean

CI=c(lowerconf,upperconf) #95 percent confidence interval
CI   #95 percent Confidence Interval
##    2.5%   97.5% 
## 0.06947 0.15480

Fracsummary=matrix(,1,4)
Fracsummary[1:4]=c(lowerconf,upperconf,FracMean,Volmean)
colnames(Fracsummary)=c(".025Quantile",".975Quantile",
                        "B=100Mean","SampleMean")
rownames(Fracsummary)="n=100"

Fracsummary
##       .025Quantile .975Quantile B=100Mean SampleMean
## n=100      0.06947       0.1548    0.1096     0.1091

Graphical Representation

Ordgraph=hist(Ordfrac, breaks = 10, col = "red", xlab = "Volume", 
    main = "Continuous Weights B=100")
xfit <- seq(min(Ordfrac), max(Ordfrac), length = 40)
yfit <- dnorm(xfit, mean = mean(Ordfrac), sd = sd(Ordfrac)) 
lines(xfit, yfit, col = "blue", lwd = 2)

plot of chunk unnamed-chunk-12

Ordgraph2 <- density(Ordfrac)  ###Filled Density
plot(Ordgraph2, main = "Continuous Weights B=100",xlim=c(.03,.2),xlab="Volume")
polygon(Ordgraph2, col = "Red", border = "blue")

plot of chunk unnamed-chunk-12