Douglas Curran-Everett
Explorations in statistics: confidence intervals
Advan Physiol Educ 2009 33:87-90;
doi:10.1152/advan.00006.2009
# -- Advances_Statistics_Code_CIs.R: R code for Explorations in Statistics
# -----
# -- Define population parameters, sample numbers, and plotting ranges
# ---------
PopMean <- 0 # population mean
PopSD <- 1 # population standard deviation
PopVar <- PopSD^2 # population variance
nObs <- 9 # number of observations in each sample
SE <- PopSD/sqrt(nObs) # SD of theoretical distribution of sample mean
nSamples <- 1000 # number of random samples
nCIs <- 100 # number of confidence intervals to plot
nStatistics <- 7 # number of sample statistics: sample, mean, SD, SE, t, lower CI bound, upper CI bound
Alpha <- 0.1 # critical significance level
Conf_Level <- (1 - Alpha) * 100 # confidence level
tCoeff <- qt(Alpha/2, nObs - 1, lower.tail = FALSE) # t coefficient for confidence intervals
nSampleSizes <- 4 # number of sample sizes for CLT example
nObs1 <- 4 # sample size 1
nObs2 <- 2 * nObs1 # sample size 2
nObs3 <- 2 * nObs2 # sample size 3
nObs4 <- 2 * nObs3 # sample size 4
PlotFactor <- 1000 # scaling factor for horizontal axes
ybarPlotMin <- PlotFactor * -6 # sample mean: minimum for plotting
ybarPlotMax <- PlotFactor * 6 # sample mean: maximum for plotting
tPlotMin <- PlotFactor * -10 # t: minimum for plotting
tPlotMax <- PlotFactor * 10 # t: maximum for plotting
# -- The Simulation: Observations and Sample Statistics
# ------------------------
SampleStats <- matrix(nrow = nSamples, ncol = nStatistics)
for (i in 1:nSamples) {
TheData <- round(rnorm(nObs, mean = PopMean, sd = PopSD), 3)
if (i <= 2)
print(TheData) # print observations for samples 1-2
if (i == 1000)
print(TheData) # print observations for sample 1000
Sample_Number <- i # column 1: sample number
SampleStats[i, 1] <- Sample_Number
Sample_Mean <- mean(TheData) # column 2: sample mean
SampleStats[i, 2] <- round(Sample_Mean, 3)
Sample_SD <- sd(TheData) # column 3: standard deviation
SampleStats[i, 3] <- round(Sample_SD, 3)
Sample_SE <- Sample_SD/sqrt(nObs) # column 4: standard error
SampleStats[i, 4] <- round(Sample_SE, 3)
t_Observed <- Sample_Mean/Sample_SE # column 5: observed value of t
SampleStats[i, 5] <- round(t_Observed, 3)
Allowance <- tCoeff * Sample_SE # allowance for conf int
Lower_CI_Bound <- Sample_Mean - Allowance # column 6: lower bound of conf int
SampleStats[i, 6] <- round(Lower_CI_Bound, 3)
Upper_CI_Bound <- Sample_Mean + Allowance # column 7: upper bound of conf int
SampleStats[i, 7] <- round(Upper_CI_Bound, 3)
}
## [1] -0.237 -2.199 0.696 0.381 1.765 -0.723 -1.409 0.563 -0.006
## [1] 0.435 -0.539 0.566 0.427 -1.036 -1.057 0.084 -1.000 -0.765
## [1] 0.706 0.729 -2.261 0.965 0.330 0.419 0.312 -0.339 -1.627
# -- Print statistics from initial 100 and final 5 random samples
# -------------- Sample Mean SD SE t LCI UCI
SampleStats[1:nCIs, ] # print initial 100 samples
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 -0.130 1.189 0.396 -0.328 -0.867 0.607
## [2,] 2 -0.321 0.693 0.231 -1.388 -0.750 0.109
## [3,] 3 0.111 1.064 0.355 0.313 -0.549 0.771
## [4,] 4 0.482 0.926 0.309 1.560 -0.092 1.056
## [5,] 5 -0.207 1.004 0.335 -0.618 -0.829 0.416
## [6,] 6 -0.283 1.082 0.361 -0.784 -0.953 0.388
## [7,] 7 0.334 0.955 0.318 1.050 -0.258 0.926
## [8,] 8 0.118 0.801 0.267 0.443 -0.378 0.614
## [9,] 9 0.181 1.684 0.561 0.322 -0.863 1.224
## [10,] 10 0.277 0.989 0.330 0.841 -0.336 0.890
## [11,] 11 0.356 1.355 0.452 0.788 -0.484 1.196
## [12,] 12 -0.957 0.904 0.301 -3.177 -1.518 -0.397
## [13,] 13 0.226 0.873 0.291 0.778 -0.315 0.767
## [14,] 14 0.042 1.166 0.389 0.107 -0.681 0.764
## [15,] 15 -0.007 0.934 0.311 -0.021 -0.585 0.572
## [16,] 16 0.067 0.650 0.217 0.307 -0.336 0.469
## [17,] 17 -0.196 0.945 0.315 -0.621 -0.782 0.390
## [18,] 18 0.253 1.300 0.433 0.584 -0.553 1.059
## [19,] 19 -0.041 0.767 0.256 -0.160 -0.517 0.435
## [20,] 20 0.263 0.581 0.194 1.358 -0.097 0.623
## [21,] 21 -0.269 1.090 0.363 -0.740 -0.945 0.407
## [22,] 22 -0.434 0.792 0.264 -1.643 -0.925 0.057
## [23,] 23 -0.073 1.353 0.451 -0.162 -0.912 0.765
## [24,] 24 -0.122 0.616 0.205 -0.595 -0.504 0.259
## [25,] 25 0.252 0.776 0.259 0.975 -0.229 0.733
## [26,] 26 0.037 0.678 0.226 0.165 -0.383 0.458
## [27,] 27 0.050 0.633 0.211 0.238 -0.342 0.443
## [28,] 28 0.189 1.001 0.334 0.568 -0.431 0.810
## [29,] 29 -0.169 0.856 0.285 -0.592 -0.700 0.362
## [30,] 30 -0.273 0.802 0.267 -1.020 -0.770 0.224
## [31,] 31 0.275 1.007 0.336 0.818 -0.350 0.899
## [32,] 32 -0.068 0.571 0.190 -0.358 -0.422 0.286
## [33,] 33 -0.062 0.581 0.194 -0.318 -0.422 0.298
## [34,] 34 0.056 1.126 0.375 0.150 -0.642 0.754
## [35,] 35 -0.353 1.062 0.354 -0.997 -1.011 0.306
## [36,] 36 -0.025 1.004 0.335 -0.075 -0.648 0.597
## [37,] 37 0.210 0.901 0.300 0.700 -0.348 0.768
## [38,] 38 0.325 1.160 0.387 0.840 -0.394 1.044
## [39,] 39 0.170 1.388 0.463 0.368 -0.690 1.031
## [40,] 40 -0.213 0.668 0.223 -0.955 -0.626 0.201
## [41,] 41 -0.216 0.940 0.313 -0.691 -0.799 0.366
## [42,] 42 -0.312 0.864 0.288 -1.085 -0.848 0.223
## [43,] 43 0.035 1.144 0.381 0.091 -0.674 0.744
## [44,] 44 0.269 0.885 0.295 0.914 -0.279 0.818
## [45,] 45 -0.211 0.448 0.149 -1.411 -0.488 0.067
## [46,] 46 0.019 1.286 0.429 0.044 -0.778 0.816
## [47,] 47 0.344 0.457 0.152 2.257 0.061 0.627
## [48,] 48 -0.201 0.860 0.287 -0.703 -0.735 0.332
## [49,] 49 -0.031 1.469 0.490 -0.064 -0.942 0.879
## [50,] 50 0.028 0.954 0.318 0.089 -0.563 0.620
## [51,] 51 0.492 0.888 0.296 1.663 -0.058 1.042
## [52,] 52 -0.349 0.737 0.246 -1.421 -0.806 0.108
## [53,] 53 -0.205 1.179 0.393 -0.521 -0.935 0.526
## [54,] 54 0.028 1.531 0.510 0.054 -0.921 0.977
## [55,] 55 0.265 0.888 0.296 0.895 -0.285 0.815
## [56,] 56 0.249 1.402 0.467 0.533 -0.620 1.118
## [57,] 57 0.208 1.254 0.418 0.497 -0.570 0.985
## [58,] 58 0.229 0.893 0.298 0.768 -0.325 0.782
## [59,] 59 -0.046 1.209 0.403 -0.114 -0.795 0.703
## [60,] 60 -0.126 0.708 0.236 -0.535 -0.565 0.312
## [61,] 61 -0.014 1.142 0.381 -0.037 -0.722 0.694
## [62,] 62 0.349 1.114 0.371 0.941 -0.341 1.040
## [63,] 63 0.221 1.040 0.347 0.638 -0.423 0.865
## [64,] 64 -0.160 0.475 0.158 -1.013 -0.455 0.134
## [65,] 65 -0.351 1.581 0.527 -0.667 -1.331 0.628
## [66,] 66 -0.178 0.509 0.170 -1.050 -0.493 0.137
## [67,] 67 0.218 0.800 0.267 0.816 -0.278 0.713
## [68,] 68 0.444 1.164 0.388 1.145 -0.277 1.166
## [69,] 69 0.043 0.896 0.299 0.143 -0.513 0.598
## [70,] 70 0.055 1.029 0.343 0.162 -0.583 0.694
## [71,] 71 0.046 1.021 0.340 0.136 -0.586 0.679
## [72,] 72 -0.410 1.123 0.374 -1.095 -1.106 0.286
## [73,] 73 -0.169 0.805 0.268 -0.629 -0.668 0.330
## [74,] 74 -0.105 1.040 0.347 -0.303 -0.750 0.540
## [75,] 75 -0.100 1.092 0.364 -0.273 -0.777 0.578
## [76,] 76 0.210 0.791 0.264 0.795 -0.281 0.700
## [77,] 77 -0.367 0.755 0.252 -1.457 -0.835 0.101
## [78,] 78 0.380 1.320 0.440 0.864 -0.438 1.199
## [79,] 79 0.172 0.718 0.239 0.718 -0.273 0.617
## [80,] 80 -0.053 0.899 0.300 -0.176 -0.610 0.504
## [81,] 81 0.050 0.765 0.255 0.195 -0.425 0.524
## [82,] 82 0.847 0.868 0.289 2.927 0.309 1.385
## [83,] 83 0.148 1.126 0.375 0.393 -0.550 0.846
## [84,] 84 -0.305 1.461 0.487 -0.626 -1.211 0.601
## [85,] 85 0.625 0.797 0.266 2.354 0.131 1.119
## [86,] 86 -0.657 0.902 0.301 -2.186 -1.217 -0.098
## [87,] 87 0.330 1.012 0.337 0.980 -0.297 0.957
## [88,] 88 -0.304 0.941 0.314 -0.970 -0.887 0.279
## [89,] 89 -0.336 0.769 0.256 -1.312 -0.813 0.140
## [90,] 90 -0.061 1.082 0.361 -0.168 -0.731 0.610
## [91,] 91 -0.224 0.509 0.170 -1.320 -0.540 0.092
## [92,] 92 0.329 0.778 0.259 1.269 -0.153 0.811
## [93,] 93 0.316 1.212 0.404 0.781 -0.435 1.067
## [94,] 94 0.329 1.088 0.363 0.908 -0.345 1.004
## [95,] 95 0.404 0.565 0.188 2.141 0.053 0.754
## [96,] 96 0.029 1.065 0.355 0.082 -0.631 0.689
## [97,] 97 -0.064 1.172 0.391 -0.164 -0.790 0.662
## [98,] 98 0.682 0.962 0.321 2.127 0.086 1.278
## [99,] 99 0.230 0.598 0.199 1.153 -0.141 0.601
## [100,] 100 -0.184 1.108 0.369 -0.497 -0.871 0.503
SampleStats[996:nSamples, ] # print final 5 samples
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 996 0.375 1.060 0.353 1.060 -0.282 1.032
## [2,] 997 0.603 1.053 0.351 1.717 -0.050 1.256
## [3,] 998 0.363 1.165 0.388 0.935 -0.359 1.085
## [4,] 999 -0.208 1.170 0.390 -0.533 -0.934 0.518
## [5,] 1000 -0.085 1.126 0.375 -0.227 -0.783 0.613
# -- Get percentiles of all 1000 sample means
# ----------------------------------
round(quantile(SampleStats[, 2], probs = c(Alpha/2, 0.5, 1 - Alpha/2)), 3)
## 5% 50% 95%
## -0.541 -0.001 0.563
# -- Confidence Intervals :: Figure 2
# ------------------------------------------ # -- Figure 2: first line
# -- Plot the initial 100 confidence intervals
# ---------------------------------
xMin <- trunc(min(SampleStats[1:nCIs, 6])) - 1 # x axis: minimum
xMax <- trunc(max(SampleStats[1:nCIs, 7])) + 1 # x axis: maximum
par(las = 1) # make axis numbers horizontal
#
plot(SampleStats[1:nCIs, 2], SampleStats[1:nCIs, 1], axes = FALSE, frame.plot = FALSE,
main = paste("Initial 100", Conf_Level, "% confidence intervals"), xlab = "Confidence interval bounds",
ylab = " ", xlim = c(xMin, xMax), ylim = c(1, nCIs), type = "n")
axis(1)
for (j in 1:nCIs) {
segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j,
1], lty = 1, lwd = 1, col = "gray50")
if (SampleStats[j, 6] > PopMean)
segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j,
1], lwd = 2, col = "black")
if (SampleStats[j, 7] < PopMean)
segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j,
1], lwd = 2, col = "black")
}
segments(PopMean, 0, PopMean, nCIs, lwd = 2, col = "white")
CI_Graphic <- recordPlot() # store data graphic # -- Figure 2: last line
# -- Replay each data graphic
# --------------------------------------------------
replayPlot(CI_Graphic)