# A tibble: 4 × 1
lambda
<dbl>
1 0.25
2 0.5
3 0.75
4 1
parameters()
functionpurrrfect
package has a parameters()
function we can use to set up a parameter grid over which to simulate data.parameters()
to set up parameter gridadd_trials(N)
to add N
rows per parameter combomutate()
to add columns of simulated data; may require use of map_*
functions%>%
pipes!N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda,
seq(0.25,1,by=0.25)
)
%>% add_trials(N)
%>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
) %>% head
# A tibble: 6 × 3
lambda .trial Y
<dbl> <dbl> <dbl>
1 0.25 1 1.53
2 0.25 2 13.8
3 0.25 3 6.57
4 0.25 4 3.17
5 0.25 5 1.87
6 0.25 6 2.86
N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda,
seq(0.25,1,by=0.25)
)
%>% add_trials(N)
%>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
%>% mutate(X = floor(Y))
) %>% head
# A tibble: 6 × 4
lambda .trial Y X
<dbl> <dbl> <dbl> <dbl>
1 0.25 1 1.53 1
2 0.25 2 13.8 13
3 0.25 3 6.57 6
4 0.25 4 3.17 3
5 0.25 5 1.87 1
6 0.25 6 2.86 2
N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda,
seq(0.25,1,by=0.25)
)
%>% add_trials(N)
%>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
%>% mutate(X = floor(Y))
%>% mutate(Fhat_X = cume_dist(X), .by = lambda)
) %>% head
# A tibble: 6 × 5
lambda .trial Y X Fhat_X
<dbl> <dbl> <dbl> <dbl> <dbl>
1 0.25 1 1.53 1 0.386
2 0.25 2 13.8 13 0.971
3 0.25 3 6.57 6 0.820
4 0.25 4 3.17 3 0.622
5 0.25 5 1.87 1 0.386
6 0.25 6 2.86 2 0.517
N <- 10000
set.seed(227)
(simstudy <- parameters(~lambda,
seq(0.25,1,by=0.25)
)
%>% add_trials(N)
%>% mutate(Y = map_dbl(lambda, \(l) rexp(1, rate = l)))
%>% mutate(X = floor(Y))
%>% mutate(Fhat_X = cume_dist(X), .by = lambda)
%>% mutate(F_X = pgeom(X, prob = 1-exp(-lambda)))
) %>% head
# A tibble: 6 × 6
lambda .trial Y X Fhat_X F_X
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.25 1 1.53 1 0.386 0.393
2 0.25 2 13.8 13 0.971 0.970
3 0.25 3 6.57 6 0.820 0.826
4 0.25 4 3.17 3 0.622 0.632
5 0.25 5 1.87 1 0.386 0.393
6 0.25 6 2.86 2 0.517 0.528
facet_wrap()
facet_wrap()
to produce small multiples of our overlaid CDF plots or p-p plots:pmap()
and pmap_*()
functions
pmap(.l, .f)
: input is a list of vectors, output is a listpmap_dbl()
, pmap_int()
, pmap_chr()
, etc.: input is a list of vectors, output is a dbl/int/chrN <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
)
%>% add_trials(N)
%>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
) %>% head
# A tibble: 6 × 4
alpha p .trial X
<dbl> <dbl> <dbl> <int>
1 0.2 0.2 1 11
2 0.2 0.2 2 6
3 0.2 0.2 3 8
4 0.2 0.2 4 9
5 0.2 0.2 5 9
6 0.2 0.2 6 8
N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
)
%>% add_trials(N)
%>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
%>% mutate(Y = pmap_int(list(X,p), \(var1,var2) rbinom(1, size = var1, prob = var2)))
) %>% head
pmap
to map over two columns.
# A tibble: 6 × 5
alpha p .trial X Y
<dbl> <dbl> <dbl> <int> <int>
1 0.2 0.2 1 11 1
2 0.2 0.2 2 6 2
3 0.2 0.2 3 8 2
4 0.2 0.2 4 9 0
5 0.2 0.2 5 9 1
6 0.2 0.2 6 8 0
N <- 10000
set.seed(227)
(thinned_binom <- parameters(~ alpha, ~ p,
c(0.2, 0.5, 0.8), c(0.2, 0.5, 0.8)
)
%>% add_trials(N)
%>% mutate(X = map_int(alpha, \(a) rbinom(1, size = 50, prob = a)))
%>% mutate(Y = pmap_int(list(X,p), \(var1,var2) rbinom(1, size = var1, prob = var2)))
%>% mutate(Fhat_Y = cume_dist(Y), .by = c(alpha, p))
%>% mutate(F_Y = pbinom(Y, size=50, prob=p*alpha))
) %>% head
pmap
to map over two columns.
(alpha, p)
combo.
# A tibble: 6 × 7
alpha p .trial X Y Fhat_Y F_Y
<dbl> <dbl> <dbl> <int> <int> <dbl> <dbl>
1 0.2 0.2 1 11 1 0.407 0.400
2 0.2 0.2 2 6 2 0.677 0.677
3 0.2 0.2 3 8 2 0.677 0.677
4 0.2 0.2 4 9 0 0.136 0.130
5 0.2 0.2 5 9 1 0.407 0.400
6 0.2 0.2 6 8 0 0.136 0.130
facet_grid()
facet_grid()
to produce our plots:N <- 10000
set.seed(227)
(exp_from_U <- parameters(~ lambda,
seq(0.1, 0.9, by = 0.1)
)
%>% add_trials(N)
%>% mutate(U = map_dbl(.trial, \(t) runif(1, min=0, max=1)))
%>% mutate(Y = -log(1-U)/lambda)
%>% mutate(f_Y = lambda*exp(-lambda*Y))
%>% mutate(Fhat_Y = cume_dist(Y), .by = lambda)
%>% mutate(F_Y = pexp(Y, lambda))
) %>% head
dexp()
, but this is a more general approach
# A tibble: 6 × 7
lambda .trial U Y f_Y Fhat_Y F_Y
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.1 1 0.692 11.8 0.0308 0.691 0.692
2 0.1 2 0.0527 0.542 0.0947 0.0506 0.0527
3 0.1 3 0.224 2.54 0.0776 0.223 0.224
4 0.1 4 0.370 4.63 0.0630 0.377 0.370
5 0.1 5 0.382 4.82 0.0618 0.388 0.382
6 0.1 6 0.275 3.21 0.0725 0.276 0.275
facet_wrap()
once again(ggplot(aes(x=Y),data=exp_from_U)
+ geom_histogram(aes(y =after_stat(density)),
fill = 'goldenrod',
binwidth = 0.1, center = 0.05)
+ geom_line(aes(y = f_Y), col = 'cornflowerblue', linewidth=1)
+ labs(y='Exponential density', x= 'y')
+ theme_classic(base_size = 12)
+ facet_wrap(~lambda, labeller = label_both)
)
(ggplot(aes(x=Y),data=exp_from_U)
+ geom_histogram(aes(y =after_stat(density)),
fill = 'goldenrod',
binwidth = 0.1, center = 0.05)
+ geom_line(aes(y = f_Y), col = 'cornflowerblue', linewidth=1)
+ labs(y='Exponential density', x= 'y')
+ theme_classic(base_size = 12)
+ facet_wrap(~lambda, labeller = label_both, scales = 'free_x')
)