Explorations in statistics: confidence intervals

Douglas Curran-Everett
Explorations in statistics: confidence intervals
Advan Physiol Educ 2009 33:87-90;
doi:10.1152/advan.00006.2009

# -- Advances_Statistics_Code_CIs.R: R code for Explorations in Statistics
# -----


# -- Define population parameters, sample numbers, and plotting ranges
# ---------
PopMean <- 0  #  population mean
PopSD <- 1  #  population standard deviation
PopVar <- PopSD^2  #  population variance
nObs <- 9  #  number of observations in each sample
SE <- PopSD/sqrt(nObs)  #  SD of theoretical distribution of sample mean
nSamples <- 1000  #  number of random samples
nCIs <- 100  #  number of confidence intervals to plot
nStatistics <- 7  #  number of sample statistics: sample, mean, SD, SE, t, lower CI bound, upper CI bound

Alpha <- 0.1  #  critical significance level
Conf_Level <- (1 - Alpha) * 100  #  confidence level
tCoeff <- qt(Alpha/2, nObs - 1, lower.tail = FALSE)  #  t coefficient for confidence intervals

nSampleSizes <- 4  #  number of sample sizes for CLT example
nObs1 <- 4  #  sample size 1
nObs2 <- 2 * nObs1  #  sample size 2
nObs3 <- 2 * nObs2  #  sample size 3
nObs4 <- 2 * nObs3  #  sample size 4

PlotFactor <- 1000  #  scaling factor for horizontal axes
ybarPlotMin <- PlotFactor * -6  #  sample mean: minimum for plotting
ybarPlotMax <- PlotFactor * 6  #  sample mean: maximum for plotting
tPlotMin <- PlotFactor * -10  #  t: minimum for plotting
tPlotMax <- PlotFactor * 10  #  t: maximum for plotting


# -- The Simulation: Observations and Sample Statistics
# ------------------------
SampleStats <- matrix(nrow = nSamples, ncol = nStatistics)
for (i in 1:nSamples) {
    TheData <- round(rnorm(nObs, mean = PopMean, sd = PopSD), 3)
    if (i <= 2) 
        print(TheData)  #  print observations for samples 1-2
    if (i == 1000) 
        print(TheData)  #  print observations for sample 1000

    Sample_Number <- i  #  column 1: sample number
    SampleStats[i, 1] <- Sample_Number

    Sample_Mean <- mean(TheData)  #  column 2: sample mean
    SampleStats[i, 2] <- round(Sample_Mean, 3)

    Sample_SD <- sd(TheData)  #  column 3: standard deviation
    SampleStats[i, 3] <- round(Sample_SD, 3)

    Sample_SE <- Sample_SD/sqrt(nObs)  #  column 4: standard error
    SampleStats[i, 4] <- round(Sample_SE, 3)

    t_Observed <- Sample_Mean/Sample_SE  #  column 5: observed value of t
    SampleStats[i, 5] <- round(t_Observed, 3)

    Allowance <- tCoeff * Sample_SE  #  allowance for conf int

    Lower_CI_Bound <- Sample_Mean - Allowance  #  column 6: lower bound of conf int
    SampleStats[i, 6] <- round(Lower_CI_Bound, 3)

    Upper_CI_Bound <- Sample_Mean + Allowance  #  column 7: upper bound of conf int
    SampleStats[i, 7] <- round(Upper_CI_Bound, 3)
}

## [1] -0.237 -2.199  0.696  0.381  1.765 -0.723 -1.409  0.563 -0.006
## [1]  0.435 -0.539  0.566  0.427 -1.036 -1.057  0.084 -1.000 -0.765
## [1]  0.706  0.729 -2.261  0.965  0.330  0.419  0.312 -0.339 -1.627



# -- Print statistics from initial 100 and final 5 random samples
# -------------- Sample Mean SD SE t LCI UCI
SampleStats[1:nCIs, ]  #  print initial 100 samples

##        [,1]   [,2]  [,3]  [,4]   [,5]   [,6]   [,7]
##   [1,]    1 -0.130 1.189 0.396 -0.328 -0.867  0.607
##   [2,]    2 -0.321 0.693 0.231 -1.388 -0.750  0.109
##   [3,]    3  0.111 1.064 0.355  0.313 -0.549  0.771
##   [4,]    4  0.482 0.926 0.309  1.560 -0.092  1.056
##   [5,]    5 -0.207 1.004 0.335 -0.618 -0.829  0.416
##   [6,]    6 -0.283 1.082 0.361 -0.784 -0.953  0.388
##   [7,]    7  0.334 0.955 0.318  1.050 -0.258  0.926
##   [8,]    8  0.118 0.801 0.267  0.443 -0.378  0.614
##   [9,]    9  0.181 1.684 0.561  0.322 -0.863  1.224
##  [10,]   10  0.277 0.989 0.330  0.841 -0.336  0.890
##  [11,]   11  0.356 1.355 0.452  0.788 -0.484  1.196
##  [12,]   12 -0.957 0.904 0.301 -3.177 -1.518 -0.397
##  [13,]   13  0.226 0.873 0.291  0.778 -0.315  0.767
##  [14,]   14  0.042 1.166 0.389  0.107 -0.681  0.764
##  [15,]   15 -0.007 0.934 0.311 -0.021 -0.585  0.572
##  [16,]   16  0.067 0.650 0.217  0.307 -0.336  0.469
##  [17,]   17 -0.196 0.945 0.315 -0.621 -0.782  0.390
##  [18,]   18  0.253 1.300 0.433  0.584 -0.553  1.059
##  [19,]   19 -0.041 0.767 0.256 -0.160 -0.517  0.435
##  [20,]   20  0.263 0.581 0.194  1.358 -0.097  0.623
##  [21,]   21 -0.269 1.090 0.363 -0.740 -0.945  0.407
##  [22,]   22 -0.434 0.792 0.264 -1.643 -0.925  0.057
##  [23,]   23 -0.073 1.353 0.451 -0.162 -0.912  0.765
##  [24,]   24 -0.122 0.616 0.205 -0.595 -0.504  0.259
##  [25,]   25  0.252 0.776 0.259  0.975 -0.229  0.733
##  [26,]   26  0.037 0.678 0.226  0.165 -0.383  0.458
##  [27,]   27  0.050 0.633 0.211  0.238 -0.342  0.443
##  [28,]   28  0.189 1.001 0.334  0.568 -0.431  0.810
##  [29,]   29 -0.169 0.856 0.285 -0.592 -0.700  0.362
##  [30,]   30 -0.273 0.802 0.267 -1.020 -0.770  0.224
##  [31,]   31  0.275 1.007 0.336  0.818 -0.350  0.899
##  [32,]   32 -0.068 0.571 0.190 -0.358 -0.422  0.286
##  [33,]   33 -0.062 0.581 0.194 -0.318 -0.422  0.298
##  [34,]   34  0.056 1.126 0.375  0.150 -0.642  0.754
##  [35,]   35 -0.353 1.062 0.354 -0.997 -1.011  0.306
##  [36,]   36 -0.025 1.004 0.335 -0.075 -0.648  0.597
##  [37,]   37  0.210 0.901 0.300  0.700 -0.348  0.768
##  [38,]   38  0.325 1.160 0.387  0.840 -0.394  1.044
##  [39,]   39  0.170 1.388 0.463  0.368 -0.690  1.031
##  [40,]   40 -0.213 0.668 0.223 -0.955 -0.626  0.201
##  [41,]   41 -0.216 0.940 0.313 -0.691 -0.799  0.366
##  [42,]   42 -0.312 0.864 0.288 -1.085 -0.848  0.223
##  [43,]   43  0.035 1.144 0.381  0.091 -0.674  0.744
##  [44,]   44  0.269 0.885 0.295  0.914 -0.279  0.818
##  [45,]   45 -0.211 0.448 0.149 -1.411 -0.488  0.067
##  [46,]   46  0.019 1.286 0.429  0.044 -0.778  0.816
##  [47,]   47  0.344 0.457 0.152  2.257  0.061  0.627
##  [48,]   48 -0.201 0.860 0.287 -0.703 -0.735  0.332
##  [49,]   49 -0.031 1.469 0.490 -0.064 -0.942  0.879
##  [50,]   50  0.028 0.954 0.318  0.089 -0.563  0.620
##  [51,]   51  0.492 0.888 0.296  1.663 -0.058  1.042
##  [52,]   52 -0.349 0.737 0.246 -1.421 -0.806  0.108
##  [53,]   53 -0.205 1.179 0.393 -0.521 -0.935  0.526
##  [54,]   54  0.028 1.531 0.510  0.054 -0.921  0.977
##  [55,]   55  0.265 0.888 0.296  0.895 -0.285  0.815
##  [56,]   56  0.249 1.402 0.467  0.533 -0.620  1.118
##  [57,]   57  0.208 1.254 0.418  0.497 -0.570  0.985
##  [58,]   58  0.229 0.893 0.298  0.768 -0.325  0.782
##  [59,]   59 -0.046 1.209 0.403 -0.114 -0.795  0.703
##  [60,]   60 -0.126 0.708 0.236 -0.535 -0.565  0.312
##  [61,]   61 -0.014 1.142 0.381 -0.037 -0.722  0.694
##  [62,]   62  0.349 1.114 0.371  0.941 -0.341  1.040
##  [63,]   63  0.221 1.040 0.347  0.638 -0.423  0.865
##  [64,]   64 -0.160 0.475 0.158 -1.013 -0.455  0.134
##  [65,]   65 -0.351 1.581 0.527 -0.667 -1.331  0.628
##  [66,]   66 -0.178 0.509 0.170 -1.050 -0.493  0.137
##  [67,]   67  0.218 0.800 0.267  0.816 -0.278  0.713
##  [68,]   68  0.444 1.164 0.388  1.145 -0.277  1.166
##  [69,]   69  0.043 0.896 0.299  0.143 -0.513  0.598
##  [70,]   70  0.055 1.029 0.343  0.162 -0.583  0.694
##  [71,]   71  0.046 1.021 0.340  0.136 -0.586  0.679
##  [72,]   72 -0.410 1.123 0.374 -1.095 -1.106  0.286
##  [73,]   73 -0.169 0.805 0.268 -0.629 -0.668  0.330
##  [74,]   74 -0.105 1.040 0.347 -0.303 -0.750  0.540
##  [75,]   75 -0.100 1.092 0.364 -0.273 -0.777  0.578
##  [76,]   76  0.210 0.791 0.264  0.795 -0.281  0.700
##  [77,]   77 -0.367 0.755 0.252 -1.457 -0.835  0.101
##  [78,]   78  0.380 1.320 0.440  0.864 -0.438  1.199
##  [79,]   79  0.172 0.718 0.239  0.718 -0.273  0.617
##  [80,]   80 -0.053 0.899 0.300 -0.176 -0.610  0.504
##  [81,]   81  0.050 0.765 0.255  0.195 -0.425  0.524
##  [82,]   82  0.847 0.868 0.289  2.927  0.309  1.385
##  [83,]   83  0.148 1.126 0.375  0.393 -0.550  0.846
##  [84,]   84 -0.305 1.461 0.487 -0.626 -1.211  0.601
##  [85,]   85  0.625 0.797 0.266  2.354  0.131  1.119
##  [86,]   86 -0.657 0.902 0.301 -2.186 -1.217 -0.098
##  [87,]   87  0.330 1.012 0.337  0.980 -0.297  0.957
##  [88,]   88 -0.304 0.941 0.314 -0.970 -0.887  0.279
##  [89,]   89 -0.336 0.769 0.256 -1.312 -0.813  0.140
##  [90,]   90 -0.061 1.082 0.361 -0.168 -0.731  0.610
##  [91,]   91 -0.224 0.509 0.170 -1.320 -0.540  0.092
##  [92,]   92  0.329 0.778 0.259  1.269 -0.153  0.811
##  [93,]   93  0.316 1.212 0.404  0.781 -0.435  1.067
##  [94,]   94  0.329 1.088 0.363  0.908 -0.345  1.004
##  [95,]   95  0.404 0.565 0.188  2.141  0.053  0.754
##  [96,]   96  0.029 1.065 0.355  0.082 -0.631  0.689
##  [97,]   97 -0.064 1.172 0.391 -0.164 -0.790  0.662
##  [98,]   98  0.682 0.962 0.321  2.127  0.086  1.278
##  [99,]   99  0.230 0.598 0.199  1.153 -0.141  0.601
## [100,]  100 -0.184 1.108 0.369 -0.497 -0.871  0.503

SampleStats[996:nSamples, ]  #  print final 5 samples

##      [,1]   [,2]  [,3]  [,4]   [,5]   [,6]  [,7]
## [1,]  996  0.375 1.060 0.353  1.060 -0.282 1.032
## [2,]  997  0.603 1.053 0.351  1.717 -0.050 1.256
## [3,]  998  0.363 1.165 0.388  0.935 -0.359 1.085
## [4,]  999 -0.208 1.170 0.390 -0.533 -0.934 0.518
## [5,] 1000 -0.085 1.126 0.375 -0.227 -0.783 0.613



# -- Get percentiles of all 1000 sample means
# ----------------------------------
round(quantile(SampleStats[, 2], probs = c(Alpha/2, 0.5, 1 - Alpha/2)), 3)

##     5%    50%    95% 
## -0.541 -0.001  0.563



# -- Confidence Intervals :: Figure 2
# ------------------------------------------ # -- Figure 2: first line

# -- Plot the initial 100 confidence intervals
# ---------------------------------
xMin <- trunc(min(SampleStats[1:nCIs, 6])) - 1  #  x axis: minimum
xMax <- trunc(max(SampleStats[1:nCIs, 7])) + 1  #  x axis: maximum
par(las = 1)  #  make axis numbers horizontal
# 
plot(SampleStats[1:nCIs, 2], SampleStats[1:nCIs, 1], axes = FALSE, frame.plot = FALSE, 
    main = paste("Initial 100", Conf_Level, "% confidence intervals"), xlab = "Confidence interval bounds", 
    ylab = " ", xlim = c(xMin, xMax), ylim = c(1, nCIs), type = "n")
axis(1)

for (j in 1:nCIs) {
    segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j, 
        1], lty = 1, lwd = 1, col = "gray50")
    if (SampleStats[j, 6] > PopMean) 
        segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j, 
            1], lwd = 2, col = "black")
    if (SampleStats[j, 7] < PopMean) 
        segments(SampleStats[j, 6], SampleStats[j, 1], SampleStats[j, 7], SampleStats[j, 
            1], lwd = 2, col = "black")
}
segments(PopMean, 0, PopMean, nCIs, lwd = 2, col = "white")

plot of chunk unnamed-chunk-1

CI_Graphic <- recordPlot()  #  store data graphic                  # -- Figure 2: last line


# -- Replay each data graphic
# --------------------------------------------------
replayPlot(CI_Graphic)