Douglas Curran-Everett
Explorations in statistics: the bootstrap
Advan Physiol Educ 2009 33:286-292;
doi:10.1152/advan.00062.2009
#
# -- Advances_Statistics_Code_Boot.R: R code for Explorations in Statistics ----
# -- Load the boot package and set up data graphics ----------------------------
#
library ( boot ) # load the boot package
par ( mfrow = c( 1, 1 ) ) # 1 graphic on 1 screen
par ( las = 1 ) # make axis numbers horizontal
par ( adj = 0.5 ) # center titles and labels
# -- Define population parameters, sample numbers, and plotting ranges ---------
#
PopMean <- 0 # population mean
PopSD <- 1 # population standard deviation
PlotFactor <- 1000 # scaling factor for horizontal axes
ybarPlotMin <- PlotFactor * -6 # sample mean: minimum for plotting
ybarPlotMax <- PlotFactor * 6 # sample mean: maximum for plotting
Alpha <- 0.10 # critical significance level
Conf_Level <- 1 - Alpha # confidence level
nBoot <- 10000 # number of bootstrap replications
mean.boot <- function ( x, i ) mean ( x[i] ) # necessary for bootstrap
# -- The Bootstrap :: Table 1 -------------------------------------------------- # -- Table 1: first line
#
Sample_1_Data <- c ( 0.422, 1.103, 1.006, 1.034, 0.285, -0.647, 1.235, 0.912 ,1.825 )
Sample_1_Data
## [1] 0.422 1.103 1.006 1.034 0.285 -0.647 1.235 0.912 1.825
nObs <- length ( Sample_1_Data ) # number of observations in the sample
# -- Generate and print 3 bootstrap samples ---------------------------------
#
Bootstrap_Data <- matrix ( nrow = nObs, ncol = 2 ) # matrix for bootstrap data
for ( i in 1:3 )
{ TheData <- trunc ( runif ( nObs, min = 1, max = nObs+1 ) )
for ( j in 1:nObs )
{ Bootstrap_Data [ j, 1 ] <- TheData [ j ]
Bootstrap_Data [ j, 2 ] <- Sample_1_Data [ TheData [ j ] ]
}
print ( Bootstrap_Data [, 2 ] )
Sample_Mean <- round ( mean ( Bootstrap_Data [, 2 ] ), 3 )
print ( Sample_Mean )
} # -- Table 1: last line
## [1] 1.235 1.006 0.422 1.235 0.422 1.825 1.825 1.235 1.103
## [1] 1.145
## [1] 0.422 1.103 0.285 0.422 1.103 1.235 0.422 1.103 1.034
## [1] 0.792
## [1] 1.103 1.103 0.912 1.235 1.034 1.103 0.285 1.825 1.034
## [1] 1.07
# -- The Bootstrap :: Figure 1 ------------------------------------------------- # -- Figure 1: first line
#
Sample_1_Bootstrap <- boot( Sample_1_Data, mean.boot, nBoot )
# -- Calculate percentiles of bootstrap distribution ---------------------------
#
L99 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.005 ) ), 3 )
L95 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.025 ) ), 3 )
L90 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.05 ) ), 3 )
U90 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.95 ) ), 3 )
U95 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.975 ) ), 3 )
U99 <- round ( quantile ( Sample_1_Bootstrap$t, probs = c ( 0.995 ) ), 3 )
hist ( Sample_1_Bootstrap$t, # plot bootstrap distribution
axes = FALSE,
xlab = "Bootstrap sample mean",
ylab = " ",
main = paste ( "Distribution of", nBoot, "bootstrap means" ),
)
axis ( 1 )
#
segments ( L99, 0, L99, nBoot, col = "gray50" ) # 99th percentiles
segments ( U99, 0, U99, nBoot, col = "gray50" )
segments ( L95, 0, L95, nBoot, col = "gray50" ) # 95th percentiles
segments ( U95, 0, U95, nBoot, col = "gray50" )
segments ( L90, 0, L90, nBoot, col = "gray50" ) # 90th percentiles
segments ( U90, 0, U90, nBoot, col = "gray50" )
Bootstrap_Example <- recordPlot ( ) # store data graphic # -- Figure 1: last line
# -- Get average and standard deviation of bootstrap replications --------------
#
print ( round ( mean ( Sample_1_Bootstrap$t ), 3 ) ) # Ave { bootstrap means }
## [1] 0.799
print ( round ( sd ( Sample_1_Bootstrap$t ), 3 ) ) # SD { bootstrap means }
## [1] 0.223
# -- The Bootstrap :: Figure 2 ------------------------------------------------- # -- Figure 2: first line
#
par ( mfrow = c( 2, 2 ) ) # 4 graphics on 1 screen
nObs <- 100 # sample size
xy <- c ( ybarPlotMin:ybarPlotMax ) / PlotFactor # generate theoretical distribution
N_0_1 <- dnorm ( xy, mean = PopMean, sd = PopSD ) # normal distribution
L_0_1 <- dlnorm ( xy, mean = PopMean, sd = PopSD ) # log normal distribution
N_x_Min <- -4 * PopSD # minimum x for normal distribution
N_x_Max <- 4 * PopSD # maximum x for normal distribution
N_y_Max <- max ( N_0_1 ) # maximum y for normal distribution
L_y_Max <- max ( L_0_1 ) # maximum y for log normal distribution
# -- The Bootstrap :: Figure 2 ... normal distribution -------------------------
#
TheData <- round ( rnorm ( nObs, mean = PopMean, sd = PopSD ), 4 )
plot ( 0, 0, # normal distribution
axes = FALSE,
frame.plot = FALSE,
main = "Normal distribution",
xlab = " ",
ylab = " ",
xlim = c ( N_x_Min, N_x_Max ),
ylim = c ( 0, N_y_Max ),
type = "n"
)
axis ( 1 )
lines ( xy, N_0_1, col = "black" )
qqnorm ( TheData, # normal distribution: normal quantile plot
main = paste ( "Normal distribution: n = ", nObs ),
xlab = "Theoretical quantiles",
ylab = "Empirical quantiles",
pch = 19,
frame.plot = FALSE,
)
qqline ( TheData )
# -- The Bootstrap :: Figure 2 ... nonnormal distribution ----------------------
#
LogData <- round ( rlnorm ( nObs, mean = PopMean, sd = PopSD ), 4 )
plot ( 0, 0, # nonnormal distribution: log normal distribution
axes = FALSE,
frame.plot = FALSE,
main = "Nonnormal distribution",
xlab = " ",
ylab = " ",
xlim = c ( 0, N_x_Max ),
ylim = c ( 0, L_y_Max ),
type = "n"
)
axis ( 1 )
lines ( xy, L_0_1, col = "black" )
qqnorm ( LogData, # nonnormal distribution: normal quantile plot
main = paste ( "Nonnormal distribution: n = ", nObs ),
xlab = "Theoretical quantiles",
ylab = "Empirical quantiles",
pch = 19,
frame.plot = FALSE,
)
qqline ( LogData )
Normal_Quantile_Plots <- recordPlot ( ) # store data graphic # -- Figure 2: last line
# -- The Bootstrap :: Figure 3 ------------------------------------------------- # -- Figure 3: first line
#
par ( mfrow = c( 1, 1 ) ) # 1 graphic on 1 screen
qqnorm ( Sample_1_Bootstrap$t,
main = paste ( "Normal quantile plot: sample 1" ),
xlab = "Theoretical quantiles",
ylab = "Empirical quantiles",
frame.plot = FALSE,
)
qqline ( Sample_1_Bootstrap$t )
Sample_1_Normal_Quantile_Plot <- recordPlot ( ) # store data graphic # -- Figure 3: last line
# -- The Bootstrap :: Figure 4 ------------------------------------------------- # -- Figure 4: first line
#
Sample_1000_Data <- c ( 0.560, -1.138, 0.485, -0.864, -0.277, 2.198, 0.050, 0.500, 0.587 )
Sample_1000_Data
## [1] 0.560 -1.138 0.485 -0.864 -0.277 2.198 0.050 0.500 0.587
Sample_1000_Bootstrap <- boot( Sample_1000_Data, mean.boot, nBoot )
qqnorm ( Sample_1000_Bootstrap$t,
main = paste ( "Normal quantile plot: sample 1000" ),
xlab = "Theoretical quantiles",
ylab = "Empirical quantiles",
frame.plot = FALSE,
)
qqline ( Sample_1000_Bootstrap$t )
Sample_1000_Normal_Quantile_Plot <- recordPlot ( ) # store data graphic # -- Figure 4: last line
# -- The Bootstrap :: Confidence intervals: sample 1 ---------------------------
#
boot.ci ( Sample_1_Bootstrap,
conf = Conf_Level,
type = c ( "norm", "perc", "bca" )
)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 10000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = Sample_1_Bootstrap, conf = Conf_Level, type = c("norm",
## "perc", "bca"))
##
## Intervals :
## Level Normal Percentile BCa
## 90% ( 0.4291, 1.1623 ) ( 0.4066, 1.1451 ) ( 0.3444, 1.1032 )
## Calculations and Intervals on Original Scale
# -- The Bootstrap in Data Transformation --------------------------------------
#
par ( mfrow = c( 2, 2 ) ) # 4 graphics on 1 screen
# -- Actual CRP data and bootstrap of sample mean ------------------------------
#
Temp_CRP_Data <- c ( 0, 3.9, 5.64, 8.22, 0, 5.62, 3.92, 6.81, 30.61, 0,
73.2, 0, 46.7, 0, 0, 26.41, 22.82, 0, 0, 3.49,
0, 0, 4.81, 9.57, 5.36, 0, 5.66, 0, 59.76, 12.38,
15.74, 0, 0, 0, 0, 9.37, 20.78, 7.1, 7.89, 5.53
)
CRP_Obs <- length ( Temp_CRP_Data )
CRP_Data <- matrix ( nrow = CRP_Obs, ncol = 2 )
CRP_Data [ , 1 ] <- Temp_CRP_Data
#
CRP_Bootstrap_Data <- boot( CRP_Data [ , 1 ], mean.boot, nBoot )
# -- Log-transformed CRP data and bootstrap of sample mean ---------------------
#
CRP_Data [ , 2 ] <- log ( CRP_Data [ , 1 ] + 1 ) # ln ( CRP + 1 )
CRP_Data
## [,1] [,2]
## [1,] 0.00 0.000
## [2,] 3.90 1.589
## [3,] 5.64 1.893
## [4,] 8.22 2.221
## [5,] 0.00 0.000
## [6,] 5.62 1.890
## [7,] 3.92 1.593
## [8,] 6.81 2.055
## [9,] 30.61 3.453
## [10,] 0.00 0.000
## [11,] 73.20 4.307
## [12,] 0.00 0.000
## [13,] 46.70 3.865
## [14,] 0.00 0.000
## [15,] 0.00 0.000
## [16,] 26.41 3.311
## [17,] 22.82 3.171
## [18,] 0.00 0.000
## [19,] 0.00 0.000
## [20,] 3.49 1.502
## [21,] 0.00 0.000
## [22,] 0.00 0.000
## [23,] 4.81 1.760
## [24,] 9.57 2.358
## [25,] 5.36 1.850
## [26,] 0.00 0.000
## [27,] 5.66 1.896
## [28,] 0.00 0.000
## [29,] 59.76 4.107
## [30,] 12.38 2.594
## [31,] 15.74 2.818
## [32,] 0.00 0.000
## [33,] 0.00 0.000
## [34,] 0.00 0.000
## [35,] 0.00 0.000
## [36,] 9.37 2.339
## [37,] 20.78 3.081
## [38,] 7.10 2.092
## [39,] 7.89 2.185
## [40,] 5.53 1.876
#
log_CRP_Bootstrap_Data <- boot( CRP_Data [ , 2 ], mean.boot, nBoot )
# -- The Bootstrap in Data Transformation :: Figure 6 --- # -- Figure 6: first line
#
hist ( CRP_Data [ , 1 ], # Actual CRP data
axes = FALSE,
xlab = " ",
ylab = " ",
main = paste ( "Actual: n = ", CRP_Obs )
)
axis ( 1 )
#
qqnorm ( CRP_Data [ , 1 ], main = "Normal quantile plot", pch = 19, frame.plot = FALSE )
qqline ( CRP_Data [ , 1 ] )
hist ( CRP_Data [ , 2 ], # Transformed CRP data
axes = FALSE,
xlab = " ",
ylab = " ",
main = paste ( "Transformed: n = ", CRP_Obs )
)
axis ( 1 )
#
qqnorm ( CRP_Data [ , 2 ], main = "Normal quantile plot", pch = 19, frame.plot = FALSE )
qqline ( CRP_Data [ , 2 ] )
Actual_Log_Data_Graphic <- recordPlot ( ) # store data graphic # -- Figure 6: last line
# -- The Bootstrap in Data Transformation :: Figure 7 --- # -- Figure 7: first line
#
hist ( CRP_Bootstrap_Data$t, # Actual CRP data
axes = FALSE,
xlab = " ",
ylab = " ",
main = paste ( "Actual:", nBoot, "bootstrap means" )
)
axis ( 1 )
#
qqnorm ( CRP_Bootstrap_Data$t, main = "Normal quantile plot", frame.plot = FALSE )
qqline ( CRP_Bootstrap_Data$t )
hist ( log_CRP_Bootstrap_Data$t, # Transformed CRP data
axes = FALSE,
xlab = " ",
ylab = " ",
main = paste ( "Transformed:", nBoot, "bootstrap means" )
)
axis ( 1 )
#
qqnorm ( log_CRP_Bootstrap_Data$t, main = "Normal quantile plot", frame.plot = FALSE )
qqline ( log_CRP_Bootstrap_Data$t )
Actual_Log_Bootstrap_Graphic <- recordPlot ( ) # store data graphic # Figure 7: last line
# -- Replay each data graphic --------------------------------------------------
#
replayPlot ( Bootstrap_Example ) # Figure 1
replayPlot ( Normal_Quantile_Plots ) # Figure 2
replayPlot ( Sample_1_Normal_Quantile_Plot ) # Figure 3
replayPlot ( Sample_1000_Normal_Quantile_Plot ) # Figure 4
replayPlot ( Actual_Log_Data_Graphic ) # Figure 6
replayPlot ( Actual_Log_Bootstrap_Graphic ) # Figure 7