Introduction

This document demonstrates the Law of Large Numbers using simulations and plots programmed in R.

library(knitr)
knitr::opts_chunk$set(tidy=T, 
               fig.width=10,
               fig.height=5,
               fig.align='left',
               warning=FALSE,
               message=FALSE,
               echo=TRUE)
options(width = 120)
library(ggplot2)
library(scales)

Coin Toss

Generate Data

Create a function to generate data simulating tosses of a coin.

tossCoin = function(n=30, p=0.5) {

  # create a probability distribution, a vector of outcomes (H/T are coded using 0/1)
  # and their associated probabilities
  outcomes = c(0,1) # sample space
  probabilities = c(1-p,p)
  
  # create a random sample of n flips; this could also be done with
  # the rbinom() function, but sample() is perhaps more useful
  flips = sample(outcomes,n,replace=T,prob=probabilities)
  
  # now create a cumulative mean vector
  cum_sum = cumsum(flips)
  index = c(1:n)
  cum_mean = cum_sum / index
  
  # now combine the index, flips and cum_mean vectors
  # into a data frame and return it
  # return(data.frame(index,flips,cum_mean))
  return(data.frame(index,cum_mean))
}

Plot the Data

Plot the data to demonstrate how the cumulative sample mean converges on the expected value when the sample gets large.

ggplotCoinTosses = function(n=30, p=.50) {
  # visualize how cumulative average converges on p  
  # roll the dice n times and calculate means
  trial1 = tossCoin(n,p)
  max_y = ceiling(max(trial1$cum_mean))
  if (max_y < .75) max_y = .75 
  min_y = floor(min(trial1$cum_mean))
  if (min_y > .4) min_y = .4
  
  # calculate last mean and standard error
  last_mean = round(trial1$cum_mean[n],9)
  
  # plot the results together
  plot1 = ggplot(trial1, aes(x=index,y=cum_mean)) +
    geom_line(colour = "blue") +
    geom_abline(intercept=0.5,slope=0, color = 'red', size=.5) +      
    theme(plot.title = element_text(size=rel(1.5)),
          panel.background = element_rect()) +
    labs(x = "n (number of tosses)", 
         y = "Cumulative Average") +
    scale_y_continuous(limits = c(min_y, max_y)) +
    scale_x_continuous(trans = "log10",
                       breaks = trans_breaks("log10",function(x) 10^x),
                       labels = trans_format("log10",math_format(10^.x))) +
    annotate("text",
             label=paste("Cumulative mean =", last_mean,
                         "\nEV =",  p,
                         "\nSample size =", n), 
             y=(max_y - .20), 
             x=10^(log10(n)/2), colour="darkgreen") +
    annotate("text",
           label=paste("P(Heads) with Fair Coin = 0.50"), 
           y=(max_y - .80), 
           x=10^(log10(n)/2), colour="red")

  return(plot1)
}  
# call the function; let's use a fair coin
ggplotCoinTosses(100000, .50)

Programming Environment

sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] scales_0.4.1  ggplot2_2.2.1 knitr_1.17   
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.12     digest_0.6.12    rprojroot_1.2    plyr_1.8.4       grid_3.4.1       gtable_0.2.0    
##  [7] backports_1.1.0  formatR_1.5      magrittr_1.5     evaluate_0.10.1  rlang_0.1.2      stringi_1.1.5   
## [13] lazyeval_0.2.0   rmarkdown_1.6    labeling_0.3     tools_3.4.1      stringr_1.2.0    munsell_0.4.3   
## [19] yaml_2.1.14      compiler_3.4.1   colorspace_1.3-2 htmltools_0.3.6  tibble_1.3.3