7.2

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.4.3

Warning: package 'tidyr' was built under R version 4.4.3

Warning: package 'readr' was built under R version 4.4.3

Warning: package 'purrr' was built under R version 4.4.3

Warning: package 'dplyr' was built under R version 4.4.3

Warning: package 'stringr' was built under R version 4.4.3

Warning: package 'forcats' was built under R version 4.4.3

Warning: package 'lubridate' was built under R version 4.4.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

order_stat <- function(x, k){
  sort(x)[k]
}

(order_stat_sim <- parameters( ~n, 
                              c(4,8,12,16)
        )
%>%  add_trials(10000)
%>% mutate(sample = map(n, ~ runif(.x)))
%>% mutate( y1 = map_dbl(sample, order_stat, k=1))
%>% mutate ( y3 = map_dbl(sample, order_stat, k = 3))
%>% mutate( yn = map_dbl(sample, ~ order_stat(.x, length(.x))))
)

# A tibble: 40,000 × 6
       n .trial sample        y1    y3    yn
   <dbl>  <dbl> <list>     <dbl> <dbl> <dbl>
 1     4      1 <dbl [4]> 0.227  0.343 0.740
 2     4      2 <dbl [4]> 0.0909 0.353 0.874
 3     4      3 <dbl [4]> 0.0450 0.682 0.923
 4     4      4 <dbl [4]> 0.273  0.567 0.609
 5     4      5 <dbl [4]> 0.0822 0.605 0.923
 6     4      6 <dbl [4]> 0.0241 0.657 0.838
 7     4      7 <dbl [4]> 0.139  0.398 0.449
 8     4      8 <dbl [4]> 0.126  0.741 0.780
 9     4      9 <dbl [4]> 0.0294 0.255 0.309
10     4     10 <dbl [4]> 0.391  0.712 0.922
# ℹ 39,990 more rows

(order_stat_plot <-
  order_stat_sim %>%
  pivot_longer(
    cols = c(y1, y3, yn),
    names_to = "stat",
    values_to = "y"
  ) 
 %>% mutate(k = if_else(stat == "y1", 1L, NA_integer_)) 
 %>% mutate(k = if_else(stat == "y3", 3L, k)) 
 %>% mutate(k = if_else(stat == "yn", n, k)) 
 %>% mutate(f_k = dbeta(y, shape1 = k, shape2 = n - k + 1))  
)

# A tibble: 120,000 × 7
       n .trial sample    stat       y     k   f_k
   <dbl>  <dbl> <list>    <chr>  <dbl> <dbl> <dbl>
 1     4      1 <dbl [4]> y1    0.227      1 1.84 
 2     4      1 <dbl [4]> y3    0.343      3 0.929
 3     4      1 <dbl [4]> yn    0.740      4 1.62 
 4     4      2 <dbl [4]> y1    0.0909     1 3.00 
 5     4      2 <dbl [4]> y3    0.353      3 0.967
 6     4      2 <dbl [4]> yn    0.874      4 2.67 
 7     4      3 <dbl [4]> y1    0.0450     1 3.48 
 8     4      3 <dbl [4]> y3    0.682      3 1.78 
 9     4      3 <dbl [4]> yn    0.923      4 3.14 
10     4      4 <dbl [4]> y1    0.273      1 1.54 
# ℹ 119,990 more rows

ggplot(data = order_stat_plot, aes(x = y))+
  geom_histogram(aes(y = after_stat(density)),
                 fill = "goldenrod",
color = 'black', 
bins = 40)+
  geom_line(aes(y = f_k),
            col = "cornflowerblue",
            linewidth = 1)+
  facet_grid(stat ~ n, labeller = label_both, scales = "free_y")+
  theme_classic(base_size = 16)+
  labs(
    title= "simulated and Anlytic Densities",
    x = expression(Y[k]),
    y = "Density"
  )