PS 7.2

7

Verify that Y_((k) ), the k^th order statistic from an i.i.d. sample of UNIF(0,1) random variables, follows a BETA(k,n-k+1) density by simulating Y_((k) ) over a grid of n∈{4,8,12,16}.  Obtain 10,000 replications per n of Y_((1) ),Y_((3) ), and Y_((n) ).  Plot the simulated densities superimposed with their analytic counterparts.  Publish your results to Rpubs and submit a link to your published simulation study.  (Hint: one way to find the general k^th order statistic is to write a helper function that takes a sample x as input, sorts the sample, and returns the k^th value.  Then use this function in map_dbl.)
library(purrrfect)

Attaching package: 'purrrfect'
The following objects are masked from 'package:base':

    replicate, tabulate
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
(uniform_order_stat_sim <-
  parameters(~n,
             c(4, 8, 12, 16)) %>%
  add_trials(10000) %>%
  mutate(y_sample = pmap(list(n, .trial),\(n, t) runif(n, min = 0, max = 1))) %>%
  mutate(
    y1 = map_dbl(y_sample, ~ sort(.x)[1]),
    y3 = map_dbl(y_sample, ~ sort(.x)[3]),
    yn = map2_dbl(y_sample, n, ~ sort(.x)[.y])
  ) %>%
  mutate(
    f_1 = dbeta(y1, 1, n),
    f_3 = dbeta(y3, 3, n - 2),
    f_n = dbeta(yn, n, 1)
  )
)
# A tibble: 40,000 × 9
       n .trial y_sample      y1    y3    yn   f_1   f_3   f_n
   <dbl>  <dbl> <list>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1     4      1 <dbl [4]> 0.437  0.817 0.835 0.715 1.46  2.33 
 2     4      2 <dbl [4]> 0.0743 0.133 0.796 3.17  0.185 2.02 
 3     4      3 <dbl [4]> 0.0986 0.671 0.855 2.93  1.78  2.50 
 4     4      4 <dbl [4]> 0.451  0.880 0.996 0.662 1.12  3.96 
 5     4      5 <dbl [4]> 0.304  0.834 0.890 1.35  1.39  2.82 
 6     4      6 <dbl [4]> 0.0964 0.227 0.472 2.95  0.480 0.420
 7     4      7 <dbl [4]> 0.0676 0.498 0.859 3.24  1.49  2.53 
 8     4      8 <dbl [4]> 0.195  0.786 0.953 2.09  1.58  3.46 
 9     4      9 <dbl [4]> 0.0481 0.485 0.873 3.45  1.45  2.67 
10     4     10 <dbl [4]> 0.170  0.637 0.966 2.28  1.77  3.60 
# ℹ 39,990 more rows
uniform_order_stat_sim %>% head()
# A tibble: 6 × 9
      n .trial y_sample      y1    y3    yn   f_1   f_3   f_n
  <dbl>  <dbl> <list>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     4      1 <dbl [4]> 0.437  0.817 0.835 0.715 1.46  2.33 
2     4      2 <dbl [4]> 0.0743 0.133 0.796 3.17  0.185 2.02 
3     4      3 <dbl [4]> 0.0986 0.671 0.855 2.93  1.78  2.50 
4     4      4 <dbl [4]> 0.451  0.880 0.996 0.662 1.12  3.96 
5     4      5 <dbl [4]> 0.304  0.834 0.890 1.35  1.39  2.82 
6     4      6 <dbl [4]> 0.0964 0.227 0.472 2.95  0.480 0.420
library(patchwork)

p1 <-
  ggplot(data = uniform_order_stat_sim,
         aes(x = y1)) +
    geom_histogram(aes(y = after_stat(density)),
                   fill = "goldenrod",
                   binwidth = 0.02) +
    geom_line(aes(y = f_1),
              color = "cornflowerblue",
              linewidth = 1) +
    facet_grid(n ~ ., labeller = label_both, scales = "free_y") +
    theme_classic(base_size = 16) +
    labs(title = "Y(1)", x = "Value", y = "Density")

p3 <-
  ggplot(data = uniform_order_stat_sim,
         aes(x = y3)) +
    geom_histogram(aes(y = after_stat(density)),
                   fill = "goldenrod",
                   binwidth = 0.02) +
    geom_line(aes(y = f_3),
              color = "cornflowerblue",
              linewidth = 1) +
    facet_grid(n ~ ., labeller = label_both, scales = "free_y") +
    theme_classic(base_size = 16) +
    labs(title = "Y(3)", x = "Value", y = "Density")

pn <-
  ggplot(data = uniform_order_stat_sim,
         aes(x = yn)) +
    geom_histogram(aes(y = after_stat(density)),
                   fill = "goldenrod",
                   binwidth = 0.02) +
    geom_line(aes(y = f_n),
              color = "cornflowerblue",
              linewidth = 1) +
    facet_grid(n ~ ., labeller = label_both, scales = "free_y") +
    theme_classic(base_size = 16) +
    labs(title = "Y(n)", x = "Value", y = "Density")

(p1 | p3 | pn)