## knitr configuration: http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", error= TRUE, warning = FALSE, message = FALSE,
tidy = FALSE, cache = F, echo = T,
fig.width = 5, fig.height = 5)
## R configuration
options(width = 116, scipen = 5)
Sample with 6 individuals
X <- c(1,5,6,7,8,9)
Sample median
median(X)
[1] 6.5
20 replications of bootstrap samples
resampled <- matrix(sample(X, size = length(X) * 20, replace = TRUE), nrow = length(X))
resampled
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20]
[1,] 8 6 6 8 7 6 7 1 6 7 9 8 8 7 1 8 6 6 6 8
[2,] 7 7 5 1 9 7 5 8 5 6 8 9 1 8 1 9 1 6 9 1
[3,] 7 5 6 1 5 1 9 8 5 8 6 7 5 8 8 1 8 6 1 1
[4,] 9 5 6 1 9 6 9 5 6 6 6 8 1 6 8 8 6 6 1 9
[5,] 1 8 5 5 8 6 8 9 5 7 6 6 7 7 5 7 1 8 5 6
[6,] 8 7 9 8 7 1 9 6 6 8 9 7 9 6 6 6 6 1 9 5
Calculate median for each replicate
medians <- apply(resampled, 2, median)
summary(medians)
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.00 5.88 6.25 6.40 7.12 8.50
library(lattice)
densityplot(medians, width = 2)
95% confidence interval by quantile method
quantile(medians, c(0.025, 0.975))
2.5% 97.5%
4.188 8.025
1000 replications
resampled <- matrix(sample(X, size = length(X) * 1000, replace = TRUE), nrow = length(X))
medians <- apply(resampled, 2, median)
summary(medians)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 6.00 6.50 6.41 7.00 9.00
densityplot(medians, width = 2)
## 95% confidence interval for median by bootstrap quantile method
quantile(medians, c(0.025, 0.975))
2.5% 97.5%
3.0 8.5
## 95% confidence interval for median by asymptotic method
median(X) + c(-1,1) * qnorm(0.975) * sd(medians)
[1] 4.035 8.965
Load package
library(boot)
Define a median function for bootstrapping Define a function that takes the original data as the first argument and a vector of indices as the second argument.
fun.boot <- function(x, i) {
median(x[i])
}
res.boot <- boot(data = X, statistic = fun.boot, R = 1000)
Plot result object
plot(res.boot)
Bootstrap confidence intervals
These are the first order normal approximation (norm), the basic bootstrap interval (basic), the studentized bootstrap interval (stud), the bootstrap percentile interval (perc), and the adjusted bootstrap percentile interval (bca).
In this example studentized interval does not work.
## Normal approximation interval
boot.ci(boot.out = res.boot, conf = 0.95, type = "norm")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot, conf = 0.95, type = "norm")
Intervals :
Level Normal
95% ( 4.214, 9.174 )
Calculations and Intervals on Original Scale
## Basic interval
boot.ci(boot.out = res.boot, conf = 0.95, type = "basic")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot, conf = 0.95, type = "basic")
Intervals :
Level Basic
95% ( 4.5, 10.0 )
Calculations and Intervals on Original Scale
## Percentile interval
boot.ci(boot.out = res.boot, conf = 0.95, type = "perc")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot, conf = 0.95, type = "perc")
Intervals :
Level Percentile
95% ( 3.0, 8.5 )
Calculations and Intervals on Original Scale
## Adjusted percentile interval
boot.ci(boot.out = res.boot, conf = 0.95, type = "bca")
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 1000 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot, conf = 0.95, type = "bca")
Intervals :
Level BCa
95% ( 3, 8 )
Calculations and Intervals on Original Scale
Some BCa intervals may be unstable
URL: http://cran.r-project.org/doc/contrib/Fox-Companion/appendix-bootstrapping.pdf
Load package and dataset
library(car)
library(MASS)
data(Duncan)
head(Duncan)
type income education prestige
accountant prof 62 86 82
pilot prof 72 76 83
architect prof 75 92 90
author prof 55 90 76
chemist prof 64 86 90
minister prof 21 84 87
Fit a linear model by robust regression using an M estimator The coefficient standard errors reported by rlm rely on asymptotic approximations, and may not be trustworthy in a sample of size 45.
mod.duncan.hub <- rlm(prestige ~ income + education, data=Duncan)
summary(mod.duncan.hub, cor = T)
Call: rlm(formula = prestige ~ income + education, data = Duncan)
Residuals:
Min 1Q Median 3Q Max
-30.12 -6.89 1.29 4.59 38.60
Coefficients:
Value Std. Error t value
(Intercept) -7.111 3.881 -1.832
income 0.701 0.109 6.452
education 0.485 0.089 5.438
Residual standard error: 9.89 on 42 degrees of freedom
Correlation of Coefficients:
(Intercept) income
income -0.297
education -0.359 -0.725
Create a bootstrap function
boot.huber <- function(X, i, maxit = 30) {
## Select observations by row numbers
X <- X[i, ]
## Fit model
res.rlm <- rlm(prestige ~ income + education, data = X, maxit = maxit)
## Return coefficient vector
coefficients(res.rlm)
}
Perform bootstrap
res.boot.rlm <- boot(data = Duncan, statistic = boot.huber, R = 100)
res.boot.rlm
ORDINARY NONPARAMETRIC BOOTSTRAP
Call:
boot(data = Duncan, statistic = boot.huber, R = 100)
Bootstrap Statistics :
original bias std. error
t1* -7.1107 -0.276687 3.2831
t2* 0.7014 -0.004200 0.1498
t3* 0.4854 0.003643 0.1207
summary(res.boot.rlm)
R original bootBias bootSE bootMed
1 100 -7.111 -0.27669 3.283 -7.361
2 100 0.701 -0.00420 0.150 0.720
3 100 0.485 0.00364 0.121 0.483
## Coefficient for intercept
plot(res.boot.rlm, index = 1)
## Coefficient for income
plot(res.boot.rlm, index = 2)
## Coefficient for education
plot(res.boot.rlm, index = 3)
Show normal approximation and percentile confidence intervals
## Bootstrap confidence interval for intercept
boot.ci(res.boot.rlm, index = 1, type = c("norm","perc"))
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot.rlm, type = c("norm", "perc"), index = 1)
Intervals :
Level Normal Percentile
95% (-13.269, -0.399 ) (-13.730, -0.124 )
Calculations and Intervals on Original Scale
Some percentile intervals may be unstable
## Bootstrap confidence interval for income
boot.ci(res.boot.rlm, index = 2, type = c("norm","perc"))
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot.rlm, type = c("norm", "perc"), index = 2)
Intervals :
Level Normal Percentile
95% ( 0.4121, 0.9992 ) ( 0.4179, 0.9429 )
Calculations and Intervals on Original Scale
Some percentile intervals may be unstable
## Bootstrap confidence interval for education
boot.ci(res.boot.rlm, index = 3, type = c("norm","perc"))
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 100 bootstrap replicates
CALL :
boot.ci(boot.out = res.boot.rlm, type = c("norm", "perc"), index = 3)
Intervals :
Level Normal Percentile
95% ( 0.2452, 0.7184 ) ( 0.2670, 0.7184 )
Calculations and Intervals on Original Scale
Some percentile intervals may be unstable