This document demonstrates the Law of Large Numbers using simulations and plots programmed in R.
library(knitr)
knitr::opts_chunk$set(tidy=T,
fig.width=10,
fig.height=5,
fig.align='left',
warning=FALSE,
message=FALSE,
echo=TRUE)
options(width = 120)
library(ggplot2)
library(scales)
Create a function to generate data simulating tosses of a coin.
tossCoin = function(n=30, p=0.5) {
# create a probability distribution, a vector of outcomes (H/T are coded using 0/1)
# and their associated probabilities
outcomes = c(0,1) # sample space
probabilities = c(1-p,p)
# create a random sample of n flips; this could also be done with
# the rbinom() function, but sample() is perhaps more useful
flips = sample(outcomes,n,replace=T,prob=probabilities)
# now create a cumulative mean vector
cum_sum = cumsum(flips)
index = c(1:n)
cum_mean = cum_sum / index
# now combine the index, flips and cum_mean vectors
# into a data frame and return it
# return(data.frame(index,flips,cum_mean))
return(data.frame(index,cum_mean))
}
Plot the data to demonstrate how the cumulative sample mean converges on the expected value when the sample gets large.
ggplotCoinTosses = function(n=30, p=.50) {
# visualize how cumulative average converges on p
# roll the dice n times and calculate means
trial1 = tossCoin(n,p)
max_y = ceiling(max(trial1$cum_mean))
if (max_y < .75) max_y = .75
min_y = floor(min(trial1$cum_mean))
if (min_y > .4) min_y = .4
# calculate last mean and standard error
last_mean = round(trial1$cum_mean[n],9)
# plot the results together
plot1 = ggplot(trial1, aes(x=index,y=cum_mean)) +
geom_line(colour = "blue") +
geom_abline(intercept=0.5,slope=0, color = 'red', size=.5) +
theme(plot.title = element_text(size=rel(1.5)),
panel.background = element_rect()) +
labs(x = "n (number of tosses)",
y = "Cumulative Average") +
scale_y_continuous(limits = c(min_y, max_y)) +
scale_x_continuous(trans = "log10",
breaks = trans_breaks("log10",function(x) 10^x),
labels = trans_format("log10",math_format(10^.x))) +
annotate("text",
label=paste("Cumulative mean =", last_mean,
"\nEV =", p,
"\nSample size =", n),
y=(max_y - .20),
x=10^(log10(n)/2), colour="darkgreen") +
annotate("text",
label=paste("P(Heads) with Fair Coin = 0.50"),
y=(max_y - .80),
x=10^(log10(n)/2), colour="red")
return(plot1)
}
# call the function; let's use a fair coin
ggplotCoinTosses(100000, .50)
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_0.4.1 ggplot2_2.2.1 knitr_1.17
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.12 digest_0.6.12 rprojroot_1.2 plyr_1.8.4 grid_3.4.1 gtable_0.2.0
## [7] backports_1.1.0 formatR_1.5 magrittr_1.5 evaluate_0.10.1 rlang_0.1.2 stringi_1.1.5
## [13] lazyeval_0.2.0 rmarkdown_1.6 labeling_0.3 tools_3.4.1 stringr_1.2.0 munsell_0.4.3
## [19] yaml_2.1.14 compiler_3.4.1 colorspace_1.3-2 htmltools_0.3.6 tibble_1.3.3