\(Y_{(1)}\): the sample minimum
\(Y_{(n)}\): the sample maximum
The sample median: \(m = \left\{ \begin{array}{ll} Y_{\left(\frac{n+1}{2}\right)} & \mbox{if n is odd}\\ \frac{Y_{\left(\frac{n}{2}\right)} + Y_{\left(\frac{n+1}{2}\right)}}{2}& \mbox{if n is even} \end{array} \right.\)
The sample range: \(R = Y_{(n)}-Y_{(1)}\)
The \(k^{th}\) percentile: \(Y_{(\lfloor nk/100 \rfloor)}\)
The IQR: \(Y_{(\lfloor 75n/100 \rfloor)} - Y_{(\lfloor 25n/100 \rfloor)}\)
\[f_{Y_1,...,Y_n}(y_1,...,y_n) = \left\{\begin{array}{ll} \prod_{i=1}^n f_Y(y_i)& y_i \in \mbox{Support}\\ 0& otherwise \end{array} \right.\]
Since there are \(n!\) ways to order the \(Y_i\), so the joint distribution of the order statistics becomes:
\[f_{Y_{(1)},...,Y_{(n)}}(y_1,...,y_n) = \left\{\begin{array}{ll} n!\prod_{i=1}^n f_Y(y_i)& y_i \in Support;\ y_1<y_2<...<y_n\\ 0& otherwise \end{array} \right.\]
Suppose \(Y_1,Y_2,Y_3\) are drawn i.i.d. from a population with pdf:
\[f_Y(y) = \left\{\begin{array}{ll} 2y& 0\leq y \leq 1\\ 0& otherwise \end{array} \right.\]
\[f_{Y_1,Y_2, Y_3}(y_1,y_2,y_3) = \prod_{i=1}^3 f_Y(y_i) = \prod_{i=1}^3 2y_i\]
\[=\begin{cases}8y_1y_2y_3& 0<y_1<1, 0<y_2<1, 0<y_3<1 \\ 0 & otherwise \end{cases}\]
\[f_{Y_{(1)},Y_{(2)},Y_{(3)}}(y_1,y_2,y_3) =3! \prod_{i=1}^3 2y_i\]
\[=\begin{cases}48y_1y_2y_3& 0<y_1<y_2 < y_3 < 1 \\ 0 & otherwise \end{cases}\]
\[F_{(n)}(y) = P(Y_{(n)} \le y) = P(\mbox{maximum of sample is} \le y)\]
\[= P(\mbox{everything is } \le y ) = P(Y_1 \le y, Y_2 \le y, ... , Y_n \le y)\]
\[= P(Y_1 \le y)P(Y_2 \le y)...P(Y_n \le y) \mbox{ (by independence)}\]
\[= F_Y(y)^n \mbox{ (because data are identically distributed)}\]
\[\Rightarrow f_{(n)}(y) = \frac{d}{dy} F_{(n)}(y) = \frac{d}{dy} F_Y(y)^n = nF_Y(y)^{n-1} f_Y(y), y \in \mbox{original support}\]
\[F_{(1)}(y) = P(Y_{(1)} \le y) = 1-P(Y_{(1)} > y) = 1-P(\mbox{minimum of sample is} > y)\]
\[= 1-P(\mbox{everything is } > y ) = 1-P(Y_1 > y, Y_2 > y, ... , Y_n > y)\]
\[ 1- P(Y_1 > y)P(Y_2 > y)...P(Y_n > y) \mbox{ (by independence)}\]
\[= 1-(1-F_Y(y))^n \mbox{ (because data are identically distributed)}\]
\[\small \Rightarrow f_{(1)}(y) = \frac{d}{dy} F_{(1)}(y) = \frac{d}{dy} [1-(1-F_Y(y))^n] = n(1-F_Y(y))^{n-1} f_Y(y), y \in \mbox{original support}\]
To heurisically derive the pdf of the general \(j^{th}\) order statistic, recall the multinomial distribution
Context: throw \(n\) balls into one of \(k\) mutually exclusive buckets
Let \(p_i\) represent probability any ball lands in bucket \(i\)
Let \(Y_i\) represent number of balls in \(i^{th}\) bucket
Then:
\[P(Y_1 = y_1, Y_2 = y_2, ... , Y_k = y_k) = \frac{n!}{y_1!y_2!... y_k!}p_1^{y_1}p_2^{y_2}...p_k^{y_k}\]
\[\small f_{(j)}(y) ``=" P(j^{th}\mbox{ order statistic occurs at }y)\]
\[\small ``=" P(j-1\ data\ points\ in\ <1 \ bucket, 1\ data\ point\ in\ =y\ bucket, n-j\ data\ points\ in\ >y\ bucket)\]
\[\small =\frac{n!}{(j-1)!1!(n-j)!}P(in<y\ bucket)^{j-1}P(in=y\ bucket)^{1}P(in>y\ bucket)^{n-j}\]
\[\small =\frac{n!}{(j-1)!(n-j)!}F_Y(y)^{j-1}f_Y(y)(1-F_Y(y))^{n-j}, y \in original\ support\]
\[f_{(j)}(y) = \frac{n!}{(j-1)!(n-j)!}F_Y(y)^{j-1}f_Y(y)(1-F_Y(y))^{n-j}\]
\[f_{(1)}(y) = \frac{n!}{(1-1)!(n-1)!}F_Y(y)^{1-1}f_Y(y)(1-F_Y(y))^{n-1} = n(1-F_Y(y))^{n-1} f_Y(y)\]
\[f_{(n)}(y) = \frac{n!}{(n-1)!(n-n)!}F_Y(y)^{n-1}f_Y(y)(1-F_Y(y))^{n-n} = nF_Y(y)^{n-1} f_Y(y)\]
\[F_Y(y) = 1-e^{-\lambda y}\]
\[f_Y(y) = \lambda e^{-\lambda y }\]
\[f_{(1)}(y) = n(1-F_Y(y))^{n-1}f_Y(y)\]
\[= n (e^{-\lambda y})^{n-1} \lambda e^{-\lambda y} = n\lambda e^{-n\lambda y}, y>0\]
\[\Rightarrow Y_{(1)} \sim EXP(n\lambda)!\]
\[F_Y(y) = 1-e^{-\lambda y}\]
\[f_Y(y) = \lambda e^{-\lambda y }\]
\[f_{(n)}(y) = nF_Y(y)^{n-1}f_Y(y)\]
\[= n (1-e^{-\lambda y})^{n-1} \lambda e^{-\lambda y} , y>0\]
\[F_Y(y) = 1-e^{-\lambda y}\]
\[f_Y(y) = \lambda e^{-\lambda y }\]
\[f_{(j)}(y) = \frac{n!}{\left(\frac{n-1}{2}\right)!\left(\frac{n-1}{2}\right)!}(1-e^{-\lambda y})^{\left(\frac{n-1}{2}\right)}\lambda e^{-\lambda y}(e^{-\lambda y})^{\left(\frac{n-1}{2}\right)}, y > 0\]
library(purrrfect)
library(tidyverse)
(exponential_order_stat_sim <- parameters(~n, ~lambda,
c(2, 5, 10), c(0.5, 1.0, 1.5)
)
%>% add_trials(10000)
%>% mutate(y_sample = pmap(list(n, lambda), .f = \(n, l) rexp(n, rate = l)))
%>% mutate(y1 = map_dbl(y_sample, .f = min),
yn = map_dbl(y_sample, .f = max))
%>% mutate(f_1 = dexp(y1, rate = n*lambda),
f_n = n*(1-exp(-lambda*yn))^(n-1) * lambda*exp(-lambda*yn)
)
) %>% head()# A tibble: 6 × 8
n lambda .trial y_sample y1 yn f_1 f_n
<dbl> <dbl> <dbl> <list> <dbl> <dbl> <dbl> <dbl>
1 2 0.5 1 <dbl [2]> 0.132 2.04 0.876 0.230
2 2 0.5 2 <dbl [2]> 0.458 0.615 0.632 0.195
3 2 0.5 3 <dbl [2]> 5.40 8.82 0.00451 0.0120
4 2 0.5 4 <dbl [2]> 0.227 1.97 0.797 0.234
5 2 0.5 5 <dbl [2]> 0.688 1.03 0.502 0.241
6 2 0.5 6 <dbl [2]> 0.913 2.38 0.401 0.212
ggplot(data = exponential_order_stat_sim, aes(x = y1))+
geom_histogram(aes(y = after_stat(density)),
fill = 'goldenrod',
center = 0.06, binwidth = .12) +
geom_line(aes(y = f_1), col='cornflowerblue') +
facet_grid(n~lambda, labeller = label_both, scales='free_y')+
theme_classic(base_size = 16) +
labs(title='Simulated and analytic densities for min of exponentials')ggplot(data = exponential_order_stat_sim, aes(x = yn))+
geom_histogram(aes(y = after_stat(density)),
fill = 'goldenrod',
center = 0.06, binwidth = .12) +
geom_line(aes(y = f_n), col='cornflowerblue') +
facet_grid(n~lambda, labeller = label_both, scales='free_y')+
theme_classic(base_size = 16) +
labs(title='Simulated and analytic densities for max of exponentials' )