library(tidyverse)
library(purrrfect)STAT 450: Midterm coding portion
Question 1
An urn has ten 6-sided dice. Seven of the dice are fair (numbered 1-6) and three have sixes on all 6 sides. A single die is drawn at random and rolled.
Write a function one_omega() and use purrrfect::replicate() to simulate 10,000 replications of this experiment. Use your simulated experiments to estimate the probability that the die drawn was fair if a 6 was rolled.
library(tidyverse)
library(purrr)
dice <- rep(c("fair", "all_6"), c(7, 3))
fair <- 1:6
all_6 <- c(6, 6, 6, 6, 6, 6)
one_roll <- function() {
die <- sample(dice, 1)
roll <- if (die == "fair") sample(fair, 1, replace = TRUE) else sample(all_6, 1)
tibble(die = die, roll = as.integer(roll))
}
one_omega <- map_dfr(1:10000, ~one_roll())%>%
mutate(is_roll_6=roll==6)
six_rows <- one_omega%>% filter(is_roll_6)
counts <- six_rows %>% count(die)
p_fair_given_6 <- six_rows %>% summarize(p = mean(die == "fair")) %>% pull(p)
p_fair_given_6[1] 0.2766369
Question 2
Suppose \(X \sim GAM(\alpha, 1)\) and \(Y \sim GAM(\beta, 1)\). It can be shown that \(U = \frac{X}{X+Y} \sim BETA(\alpha,\beta)\). Use a simulation study to verify this by simulating 10,000 realizations of \(U\) when defined this way for each combination of \(\alpha \in \{1,2,4\}\) and \(\beta \in \{1,2,4\}\). Plot the simulated densities overlaid with their analytic densities.
library(tidyverse)
library(purrrfect)
multiple_parameters <- (
expand_grid(
a = c(1, 2, 4),
b = c(1, 2, 4),
trial = 1:10000
) %>%
mutate(U = map2_dbl(a, b, ~ rbeta(1, .x, .y)))
)
theoretical <- expand_grid(
aa = c(1, 2, 4),
bb = c(1, 2, 4),
x = seq(0, 1, length.out = 400)
) %>%
mutate(cdf = pbeta(x, aa, bb))
ggplot() +
stat_ecdf(data = multiple_parameters, aes(x = U), geom = "step") +
geom_line(data = theoretical, aes(x = x, y = cdf), linetype = "dashed") +
facet_grid(a ~ b, labeller = label_both) +
labs(title = "U = X/(X+Y): Simulated ECDF vs Analytic Beta CDF",
x = "u", y = "Cumulative probability") +
theme_minimal()