FINALCODING

Question 1

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

set.seed(807)

N <- 10000

(exp_scale_sims <- parameters(~theta, ~lambda, 
                              c(0.4, 0.6, 0.8), 
                              c(0.5, 1.0, 1.5))
                   %>% add_trials(N)
                   %>% mutate(Y = map_dbl(lambda, \(lam) rexp(1, rate = lam)))
                   %>% mutate(U = theta * Y)
                   %>% mutate(f_U = dexp(U, rate = lambda/theta))
) %>% head

# A tibble: 6 × 6
  theta lambda .trial     Y      U    f_U
  <dbl>  <dbl>  <dbl> <dbl>  <dbl>  <dbl>
1   0.4    0.5      1 5.58  2.23   0.0770
2   0.4    0.5      2 4.15  1.66   0.157 
3   0.4    0.5      3 0.301 0.120  1.08  
4   0.4    0.5      4 0.168 0.0673 1.15  
5   0.4    0.5      5 2.14  0.856  0.429 
6   0.4    0.5      6 0.355 0.142  1.05

ggplot(data = exp_scale_sims) +
  geom_histogram(aes(x = U, y = after_stat(density)), 
                 fill = 'goldenrod', bins = 40) +
  geom_line(aes(x = U, y = f_U), 
            col = 'cornflowerblue', linewidth = 1) +
  facet_grid(theta ~ lambda, labeller = label_both, scales = 'free') +
  labs(x = 'u', y = expression(f[U](u))) +
  theme_classic(base_size = 14)

Question 2

N <- 10000

(unif_sum_sims <- parameters(~theta, c(2, 4, 6, 8))
                  %>% add_trials(N)
                  %>% mutate(X = map_dbl(theta, \(t) runif(1, min = 0, max = t)),
                             Y = map_dbl(theta, \(t) runif(1, min = 0, max = t)))
                  %>% mutate(U = X + Y)
                  %>% mutate(Fhat = cume_dist(U), .by = theta,
                             F = punif(U, min = 0, max = 2*theta))
) %>% head

# A tibble: 6 × 7
  theta .trial     X      Y     U   Fhat     F
  <dbl>  <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl>
1     2      1 1.89  1.37   3.26  0.935  0.816
2     2      2 0.561 0.335  0.895 0.0987 0.224
3     2      3 1.52  1.90   3.42  0.963  0.855
4     2      4 1.34  1.23   2.57  0.743  0.642
5     2      5 1.05  0.438  1.49  0.276  0.373
6     2      6 0.655 0.0936 0.749 0.0687 0.187

# CDF overlay plot
ggplot(data = unif_sum_sims) +
  geom_step(aes(x = U, y = Fhat, col = 'Empirical')) +
  geom_line(aes(x = U, y = F, col = 'Analytic')) +
  labs(y = 'CDF', x = 'u', color = '') +
  facet_wrap(~theta, labeller = label_both) +
  theme_classic(base_size = 14) +
  ggtitle('CDF Comparison')

Curve on CDF does not mean Uniform

# P-P plot
ggplot(data = unif_sum_sims) +
  geom_point(aes(x = F, y = Fhat), size = 0.5, alpha = 0.5) +
  geom_abline(aes(intercept = 0, slope = 1), color = 'red') +
  labs(x = 'F(u)', y = expression(hat(F)(u))) +
  facet_wrap(~theta, labeller = label_both) +
  theme_classic(base_size = 14) +
  ggtitle('P-P Plot')

s-curve deems that U does not follow Uniform distribution