4.7 - Simulations across parameter grids

Motivation

  • Suppose \(Y\sim EXP(\lambda)\), and let \(X = \lfloor{Y}\rfloor\).
  • Can show that \(X \sim GEO(p)\) (“number of failures” version over 0,1,2,…) with \(p = 1-e^{-\lambda}\)
  • You have the tools to simulate this for a single \(\lambda\), but in practice we want to verify results for a grid of parameters to ensure we didn’t get “lucky”.

parameters() function

  • The purrrfect package has a parameters() function we can use to set up a parameter grid over which to simulate data.
  • Process:
    • Use parameters() to set up parameter grid
    • Use add_trials(N) to add N rows per parameter combo
    • Use mutate() to add columns of simulated data; may require use of map_* functions
  • All of this should exist in a single chain of %>% pipes!

Basic set up

library(purrrfect)
library(tidyverse)
(parameters(~lambda, 
            seq(0.25,1,by=0.25)
            )
)
# A tibble: 4 × 1
  lambda
   <dbl>
1   0.25
2   0.5 
3   0.75
4   1   
(parameters(~lambda, 
            seq(0.25,1,by=0.25)
            )
  %>% add_trials(3)
)
# A tibble: 12 × 2
   lambda .trial
    <dbl>  <dbl>
 1   0.25      1
 2   0.25      2
 3   0.25      3
 4   0.5       1
 5   0.5       2
 6   0.5       3
 7   0.75      1
 8   0.75      2
 9   0.75      3
10   1         1
11   1         2
12   1         3

Exponential floor example

N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda, 
                        seq(0.25,1,by=0.25)
                        )
  %>% add_trials(N)
) %>% head
# A tibble: 6 × 2
  lambda .trial
   <dbl>  <dbl>
1   0.25      1
2   0.25      2
3   0.25      3
4   0.25      4
5   0.25      5
6   0.25      6

Exponential floor example

N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda, 
                        seq(0.25,1,by=0.25)
                        )
  %>% add_trials(N)
  %>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
) %>% head
# A tibble: 6 × 3
  lambda .trial     Y
   <dbl>  <dbl> <dbl>
1   0.25      1  1.53
2   0.25      2 13.8 
3   0.25      3  6.57
4   0.25      4  3.17
5   0.25      5  1.87
6   0.25      6  2.86

Exponential floor example

N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda, 
                        seq(0.25,1,by=0.25)
                        )
  %>% add_trials(N)
  %>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
  %>% mutate(X = floor(Y))
) %>% head
# A tibble: 6 × 4
  lambda .trial     Y     X
   <dbl>  <dbl> <dbl> <dbl>
1   0.25      1  1.53     1
2   0.25      2 13.8     13
3   0.25      3  6.57     6
4   0.25      4  3.17     3
5   0.25      5  1.87     1
6   0.25      6  2.86     2

Exponential floor example

N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda, 
                        seq(0.25,1,by=0.25)
                        )
  %>% add_trials(N)
  %>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
  %>% mutate(X = floor(Y))
  %>% mutate(Fhat_X = cume_dist(X), .by = lambda)
) %>% head
1
Important!!! Evaluate empirical CDF by each parameter value in the grid.
# A tibble: 6 × 5
  lambda .trial     Y     X Fhat_X
   <dbl>  <dbl> <dbl> <dbl>  <dbl>
1   0.25      1  1.53     1  0.386
2   0.25      2 13.8     13  0.971
3   0.25      3  6.57     6  0.820
4   0.25      4  3.17     3  0.622
5   0.25      5  1.87     1  0.386
6   0.25      6  2.86     2  0.517

Exponential floor example

N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda, 
                        seq(0.25,1,by=0.25)
                        )
  %>% add_trials(N)
  %>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
  %>% mutate(X = floor(Y))
  %>% mutate(Fhat_X = cume_dist(X), .by = lambda)
  %>% mutate(F_X = pgeom(X, prob = 1-exp(-lambda)))
) %>% head
1
Important!!! Evaluate empirical CDF by each parameter value in the grid.
# A tibble: 6 × 6
  lambda .trial     Y     X Fhat_X   F_X
   <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>
1   0.25      1  1.53     1  0.386 0.393
2   0.25      2 13.8     13  0.971 0.970
3   0.25      3  6.57     6  0.820 0.826
4   0.25      4  3.17     3  0.622 0.632
5   0.25      5  1.87     1  0.386 0.393
6   0.25      6  2.86     2  0.517 0.528

Plotting across a single grid with facet_wrap()

  • If we just have one parameter (here, \(\lambda\)), we can use facet_wrap() to produce small multiples of our overlaid CDF plots or p-p plots:
(ggplot(aes(x=X),data=simstudy)
  + geom_step(aes(y =Fhat_X, color = 'Simulated CDF'))
  + geom_step(aes(y = F_X, color = 'Analytic CDF'))
  + labs(y=expression(P(X <= x)), x= 'x', color='')
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both)
)

(ggplot(aes(x=Fhat_X, y = F_X),data=simstudy)
  + geom_point()
  + labs(y=expression(P(X <= x)), x= expression(hat(P)(X <= x)))
  + geom_abline(intercept=0, slope=1)
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both)
)

Thinned binomial

  • Can show that if \(X\sim BIN(m,\alpha)\), and given a realization of \(X=x\), \(Y|X=x \sim BIN(x,p)\), unconditionally \(Y\sim BIN(m, \alpha p)\).
  • We now need to verify over a grid of potentially 3 parameters \((m, \alpha, p)\).
  • For simplicity, fix \(m = 50\) and consider a grid over \((\alpha, p)\).
  • For grids involving multiple parameters, we need the suite of pmap() and pmap_*() functions
    • pmap(.l, .f): input is a list of vectors, output is a list
    • pmap_dbl(), pmap_int(), pmap_chr(), etc.: input is a list of vectors, output is a dbl/int/chr

Thinned binomial simulation

N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
                             c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
                             )
) %>% head
# A tibble: 6 × 2
  alpha     p
  <dbl> <dbl>
1   0.2   0.2
2   0.2   0.5
3   0.2   0.8
4   0.5   0.2
5   0.5   0.5
6   0.5   0.8

Thinned binomial simulation

N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
                             c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
                             )
  %>% add_trials(N)
) %>% head
# A tibble: 6 × 3
  alpha     p .trial
  <dbl> <dbl>  <dbl>
1   0.2   0.2      1
2   0.2   0.2      2
3   0.2   0.2      3
4   0.2   0.2      4
5   0.2   0.2      5
6   0.2   0.2      6

Thinned binomial simulation

N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
                             c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
                             )
  %>% add_trials(N)
  %>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
) %>% head
# A tibble: 6 × 4
  alpha     p .trial     X
  <dbl> <dbl>  <dbl> <int>
1   0.2   0.2      1    11
2   0.2   0.2      2     6
3   0.2   0.2      3     8
4   0.2   0.2      4     9
5   0.2   0.2      5     9
6   0.2   0.2      6     8

Thinned binomial simulation

N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
                             c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
                             )
  %>% add_trials(N)
  %>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
  %>% mutate(Y = pmap_int(list(X,p), \(var1,var2) rbinom(1, size = var1, prob = var2)))
) %>% head
1
Use of pmap to map over two columns.
# A tibble: 6 × 5
  alpha     p .trial     X     Y
  <dbl> <dbl>  <dbl> <int> <int>
1   0.2   0.2      1    11     1
2   0.2   0.2      2     6     2
3   0.2   0.2      3     8     2
4   0.2   0.2      4     9     0
5   0.2   0.2      5     9     1
6   0.2   0.2      6     8     0

Thinned binomial simulation

N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
                             c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
                             )
  %>% add_trials(N)
  %>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
  %>% mutate(Y = pmap_int(list(X,p), \(var1,var2) rbinom(1, size = var1, prob = var2)))
  %>% mutate(Fhat_Y = cume_dist(Y), .by = c(alpha, p))
  %>% mutate(F_Y = pbinom(Y, size=50, prob=p*alpha))
) %>% head
1
Use of pmap to map over two columns.
2
Empirical CDF must be evaluated for each (alpha, p) combo.
# A tibble: 6 × 7
  alpha     p .trial     X     Y Fhat_Y   F_Y
  <dbl> <dbl>  <dbl> <int> <int>  <dbl> <dbl>
1   0.2   0.2      1    11     1  0.407 0.400
2   0.2   0.2      2     6     2  0.677 0.677
3   0.2   0.2      3     8     2  0.677 0.677
4   0.2   0.2      4     9     0  0.136 0.130
5   0.2   0.2      5     9     1  0.407 0.400
6   0.2   0.2      6     8     0  0.136 0.130

Plotting across a grid combo with facet_grid()

  • Here we have two parameters, \(\alpha\) and \(p\)
  • Thus, we use facet_grid() to produce our plots:
(ggplot(aes(x=Y),data=thinned_binom)
  + geom_step(aes(y =Fhat_Y, color = 'Simulated CDF'))
  + geom_step(aes(y = F_Y, color = 'Analytic CDF'))
  + labs(y=expression(P(Y <= y)), x= 'y', color='')
  + theme_classic(base_size = 12)
  + facet_grid(alpha~p, labeller = label_both)
)

(ggplot(aes(x=Fhat_Y, y = F_Y),data=thinned_binom)
  + geom_point()
  + labs(y=expression(P(Y<= y)), x= expression(hat(P)(Y <= y)))
  + geom_abline(intercept=0, slope=1)
  + theme_classic(base_size = 10)
  + facet_grid(alpha~p, labeller = label_both)
)

Simulating exponentials from \(UNIF(0,1)\)

  • Let \(U \sim UNIF(0,1)\)
  • Verify via simulation study across grid of \(\lambda \in \{0.1, 0.2, ..., 0.9\}\) that \(Y\equiv F_Y^{-1}(U) = -\frac{\ln(1-U)}{\lambda}\sim EXP(\lambda)\)

Code

N <- 10000
set.seed(227)
(exp_from_U <- parameters(~ lambda,
                             seq(0.1, 0.9, by = 0.1)
                             )
  %>% add_trials(N)
  %>% mutate(U = map_dbl(.trial, \(t) runif(1, min=0,  max=1))) 
  %>% mutate(Y = -log(1-U)/lambda) 
  %>% mutate(f_Y = lambda*exp(-lambda*Y))
  %>% mutate(Fhat_Y = cume_dist(Y), .by = lambda)
  %>% mutate(F_Y = pexp(Y, lambda))
) %>% head
1
Could have used dexp(), but this is a more general approach
# A tibble: 6 × 7
  lambda .trial      U      Y    f_Y Fhat_Y    F_Y
   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1    0.1      1 0.692  11.8   0.0308 0.691  0.692 
2    0.1      2 0.0527  0.542 0.0947 0.0506 0.0527
3    0.1      3 0.224   2.54  0.0776 0.223  0.224 
4    0.1      4 0.370   4.63  0.0630 0.377  0.370 
5    0.1      5 0.382   4.82  0.0618 0.388  0.382 
6    0.1      6 0.275   3.21  0.0725 0.276  0.275 

Empirical and analytic densities

  • Using facet_wrap() once again
  • Freeing up one or both axes may be optimal:
(ggplot(aes(x=Y),data=exp_from_U)
  + geom_histogram(aes(y =after_stat(density)), 
                   fill = 'goldenrod',
                   binwidth = 0.1, center = 0.05)
  + geom_line(aes(y = f_Y), col = 'cornflowerblue', linewidth=1)
  + labs(y='Exponential density', x= 'y')
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both)
)

(ggplot(aes(x=Y),data=exp_from_U)
  + geom_histogram(aes(y =after_stat(density)), 
                   fill = 'goldenrod',
                   binwidth = 0.1, center = 0.05)
  + geom_line(aes(y = f_Y), col = 'cornflowerblue', linewidth=1)
  + labs(y='Exponential density', x= 'y')
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both, scales = 'free_x')
)

Empirical and analytic CDF plots

(ggplot(aes(x=Y),data=exp_from_U)
  + geom_step(aes(y =Fhat_Y, color = 'Simulated CDF'))
  + geom_step(aes(y = F_Y, color = 'Analytic CDF'))
  + labs(y=expression(P(Y <= y)), x= 'y', color='')
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both)
)

(ggplot(aes(x=Y),data=exp_from_U)
  + geom_step(aes(y =Fhat_Y, color = 'Simulated CDF'))
  + geom_step(aes(y = F_Y, color = 'Analytic CDF'))
  + labs(y=expression(P(Y <= y)), x= 'y', color='')
  + theme_classic(base_size = 12)
  + facet_wrap(~lambda, labeller = label_both, scales = 'free_x')
)