Midterm

Question 1

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

N <- 10000

one_omega <- \(){
  # Choose_dice 1-7 = fair dice
  # Choose_dice 8-10 = all 6s
  choose_dice <- sample(c(1:10), 1)
  roll <- case_when(choose_dice <= 7 ~ sample(1:6, 1),
                    choose_dice > 7 ~ 6)
  outcome <- c(choose_dice, roll)
}
head(one_omega())

[1] 7 6

(many_rolls <- replicate(N, one_omega(), .as = dice_roll)
  %>% mutate(dice = map_int(dice_roll, pluck(1)))
  %>% mutate(roll = map_int(dice_roll, pluck(2)))
  )

# A tibble: 10,000 × 4
   .trial dice_roll  dice  roll
    <dbl> <list>    <int> <int>
 1      1 <dbl [2]>     7     2
 2      2 <dbl [2]>     5     5
 3      3 <dbl [2]>     8     6
 4      4 <dbl [2]>     9     6
 5      5 <dbl [2]>    10     6
 6      6 <dbl [2]>     2     1
 7      7 <dbl [2]>     2     2
 8      8 <dbl [2]>     9     6
 9      9 <dbl [2]>    10     6
10     10 <dbl [2]>     3     6
# ℹ 9,990 more rows

(many_rolls
  %>% filter(roll == 6)
  %>% summarize('P(Fair | Rolled a 6)' = mean(dice <= 7))
)

# A tibble: 1 × 1
  `P(Fair | Rolled a 6)`
                   <dbl>
1                  0.291

Question 2

library(tidyverse)
library(purrrfect)

(simstudy <- parameters(~alpha, ~beta, 
                        c(1,2,4), c(1,2,4))
  %>% add_trials(10000)
  %>% mutate(X = map_dbl(alpha, \(a) rgamma(1, a)))
  %>% mutate(Y = map_dbl(beta, \(b) rgamma(1, b)))
  %>% mutate(U = X/(X+Y))
  %>% mutate(f_U = dbeta(U, alpha, beta))
  %>% mutate(Fhat_U = cume_dist(U), .by = c(alpha, beta))
  %>% mutate(F_U = pbeta(U, alpha, beta))
 )

# A tibble: 90,000 × 9
   alpha  beta .trial     X     Y     U   f_U Fhat_U   F_U
   <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
 1     1     1      1 2.15  1.71  0.557     1  0.569 0.557
 2     1     1      2 1.37  1.13  0.549     1  0.558 0.549
 3     1     1      3 0.553 0.390 0.587     1  0.595 0.587
 4     1     1      4 0.184 0.598 0.235     1  0.233 0.235
 5     1     1      5 0.384 2.04  0.158     1  0.156 0.158
 6     1     1      6 0.118 0.803 0.128     1  0.128 0.128
 7     1     1      7 1.62  3.24  0.333     1  0.335 0.333
 8     1     1      8 0.414 1.35  0.235     1  0.233 0.235
 9     1     1      9 0.794 1.28  0.384     1  0.387 0.384
10     1     1     10 0.426 0.386 0.525     1  0.533 0.525
# ℹ 89,990 more rows

(ggplot(aes(x = U), data = simstudy)
 +geom_histogram(aes(y = after_stat(density)),
                 fill = 'goldenrod',
                 binwidth = 0.01, center = 0.005)
 +geom_line(aes(y = f_U), col = 'cornflowerblue', linewidth =1)
 + labs(y = 'Density', x = 'U')
 + theme_classic()
 +facet_grid(alpha~beta, labeller = label_both)
 )

(ggplot(aes(x = U), data = simstudy)
 + geom_step(aes(y = Fhat_U, color = 'Simulated CDF'))
 + geom_step(aes(y = F_U, color = 'Analytic CDF'))
 + labs(y = expression(P(U <= u)), x = 'u', color = '')
 + theme_classic()
 + facet_grid(alpha~beta, labeller = label_both)
 )