February 21, 2017
Run one simulation for catching 26 unowns, assuming each one is equally likely.
simulate_unown <- function() {
n_unique <- 0
target <- 26
encountered <- character(0)
# Take 26 samples on first iteration, 26 - n_unique on next iteration, etc.
while (target - n_unique > 0) {
batch <- sample(letters, size = target - n_unique, replace = TRUE)
encountered <- c(encountered, batch)
n_unique <- length(unique(encountered))
}
length(encountered)
}simulate_unown()
#> [1] 93
simulate_unown()
#> [1] 148
simulate_unown()
#> [1] 70
simulate_unown()
#> [1] 107Run 10,000 simulations.
simulations <- replicate(10000, simulate_unown())Expected value:
round(mean(simulations))
#> [1] 100Good news is that the median is less than the mean.
median(simulations)
#> [1] 95Confidence intervals:
quantile(simulations, probs = c(.05, .10, .25, .75, .90, .95))
#> 5% 10% 25% 75% 90% 95%
#> 60 66 78 117 141 158A long tail for unlucky trainers.
library(ggplot2)
ggplot(data.frame(x = simulations)) +
aes(x = x) +
geom_histogram(binwidth = 5, color = "white", center = 102.5) +
labs(x = "Num. unowns encountered until 26 unique unowns encountered",
y = "Num. simulations")Now assume a 50% catch rate.
simulate_unown_catches <- function(p_catch) {
n_unique <- 0
target <- 26
caught <- character(0)
encountered <- character(0)
catch_probs <- c(p_catch, 1 - p_catch)
# Take 26 samples on first iteration, 26 - n_unique on next iteration, etc.
while (target - n_unique > 0) {
batch <- sample(letters, size = target - n_unique, replace = TRUE)
catches <- sample(c(TRUE, FALSE), size = target - n_unique,
replace = TRUE, prob = catch_probs)
caught_in_batch <- batch[catches]
encountered <- c(encountered, batch)
caught <- c(caught, caught_in_batch)
n_unique <- length(unique(caught))
}
length(encountered)
}
simulate_unown_catches(.5)
#> [1] 243
simulate_unown_catches(.5)
#> [1] 223
simulate_unown_catches(.5)
#> [1] 221
simulate_unown_catches(.5)
#> [1] 303Run 10,000 simulations. Not surprisingly, it will take twice as long with a 50% catch rate.
simulations <- replicate(10000, simulate_unown_catches(.5))
round(mean(simulations))
#> [1] 200
median(simulations)
#> [1] 189
quantile(simulations, probs = c(.05, .10, .25, .75, .90, .95))
#> 5% 10% 25% 75% 90% 95%
#> 118 131 155 233 284 319
ggplot(data.frame(x = simulations)) +
aes(x = x) +
geom_histogram(binwidth = 5, color = "white", center = 102.5) +
labs(x = "Num. unowns encountered until 26 unique unowns caught",
y = "Num. simulations",
caption = "50% catch rate")