oi_simulatie_markdown.utf8

Is pi random

Quick analysis on whether or not the first 1000 digits of pi are random

# load data as character, since r won't store that many digits in numeric form
a <- "3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420198"

#split into a tibble
d <- tibble("col" = str_replace(a, "\\.", "") %>% str_split("")) %>%
  unnest(col)

Doublec check that we have the first 1000 digits following decimal

nrow(d)

## [1] 1000

What’s our observed distribution of digits

d %>%
  count(col) %>%
  mutate(prop = n / sum(n)) %>%
  ggplot(aes(col, prop)) + 
  geom_col() +
  geom_hline(yintercept = .1)

Not quite uniform, but is this just due to noise?

Let’s do our test - we’re going to find the chi-square stat under the assumption that each number from 0-9 should make up 10% (i.e. random)

#calculate the chisq statistic
obs <- d %>%
  specify(response = col) %>%
  hypothesize(null = "point",
              p = c("1" = .1, "2" = .1, "3" = .1, "4" = .1, "5" = .1, "6" = .1, "7" = .1, "8" = .1, "9" = .1, "0" = .1)) %>%
  calculate("Chisq")

# show statistic
obs

## # A tibble: 1 x 1
##    stat
##   <dbl>
## 1  4.68

Generate 10,000 sample universes in which our hypothesis is true (i.e. the sample is drawn from a world in which 0-9 each are 10%)

test <- d %>%
  specify(response = col) %>%
  hypothesize(null = "point",
              p = c("1" = .1, "2" = .1, "3" = .1, "4" = .1, "5" = .1, "6" = .1, "7" = .1, "8" = .1, "9" = .1, "0" = .1)) %>%
  generate(10000, type = "simulate") %>%
  calculate("Chisq")

# get p value
test %>%
  get_p_value(obs = obs, direction = "greater")

## # A tibble: 1 x 1
##   p_value
##     <dbl>
## 1   0.859

86% of our simulations had chi-squared stats higher than what we observed, so we conclude that indeed, the first 1000 are “random”, even if they really aren’t