\[P(Y_1 = y_1, Y_2 = y_2,...,Y_n = y_n| U=u) \mbox{ is } \theta\mbox{-free}\]
\[f(y_1,...,y_n| U=u) \mbox{ is } \theta\mbox{-free}\]
\[P(Y_1 = y_1, Y_2 = y_2,...,Y_n = y_n| U=u) = \frac{P(Y_1 = y_1, Y_2 = y_2,...,Y_n = y_n \cap U=u)}{P(U=u)}\] \[= \frac{P(Y_1 = y_1, Y_2 = y_2,...,Y_n = y_n )}{P(U=u)}=\frac{\prod_{i=1}^nP(Y_i=y_i)}{P(U=u)}\]
Proof:
\[\scriptsize \frac{\prod_{i=1}^nP(Y_i=y_i)}{P(U=u)} = \frac{\prod_{i=1}^n p^{y_i}(1-p)^{1-y_i}}{{n\choose u}p^u (1-p)^u} = \frac{ p^{\sum_{i=1}^n y_i}(1-p)^{n-\sum_{i=1}^n y_i}}{{n\choose u}p^u (1-p)^u}\]
\[\scriptsize = \frac{ p^{ u}(1-p)^{n-u}}{{n\choose u}p^u (1-p)^u} = \frac{1}{{n\choose u}} \Leftarrow p\mbox{-free}\]
\[\scriptsize \therefore U = \sum_{i=1}^n Y_i \mbox{ is sufficient for } p.\]
\[\scriptsize \frac{\prod_{i=1}^nf_Y(y_i)}{f_U(u)} = \frac{\prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} e^{-\frac{(y_i-\mu)^2}{2\sigma^2}}}{\frac{1}{\sqrt{2\pi n\sigma^2}} e^{-\frac{(u-n\mu)^2}{2n\sigma^2}}} = \frac{\left(\frac{1}{\sqrt{2\pi\sigma^2}}\right)^n e^{-\sum_{i=1}^n\frac{(y_i-\mu)^2}{2\sigma^2}}}{\frac{1}{\sqrt{2\pi n\sigma^2}} e^{-\frac{(u-n\mu)^2}{2n\sigma^2}}}\]
\[\scriptsize = \frac{\left(\frac{1}{\sqrt{2\pi\sigma^2}}\right)^n e^{-\frac{\sum_i y_i^2-2\mu u +n\mu^2 }{2\sigma^2}}}{\frac{1}{\sqrt{2\pi n\sigma^2}} e^{-\frac{u^2 -2un \mu + n^2\mu^2}{2n\sigma^2}}} =\sqrt{n} \left(\frac{1}{\sqrt{2\pi\sigma^2}}\right)^{n-1} e^{-\frac{\sum_i y_i^2 }{2\sigma^2}+ \frac{u^2}{2n\sigma^2}}\Leftarrow \mu\mbox{-free only!}\]
so \(U=\sum_i Y_i\) is sufficient for \(\mu\), but not for \(\sigma^2\)!
Let \(f(y_1,...,y_n;\vec{\theta})\) represent the joint probability density function of a random sample drawn from a population governed by parameter vector \(\vec{\theta}\). Then a statistic \(\vec{U}\) (also possibly vector valued) is sufficient for \(\vec{\theta}\) if and only if there exists functions \(g(\vec{u};\vec\theta)\) and \(h(y_1,...,y_n)\) such that:
\[f(y_1,...,y_n;\vec{\theta}) = g(\vec{u};\vec{\theta}) h(y_1,...,y_n).\]
\[p(y_1,...,y_n;\vec{\theta}) = g(\vec{u};\vec{\theta}) h(y_1,...,y_n).\]
\[ p(y_1,y_2,...,y_n;p) =\prod_{i=1}^n p^{y_i}(1-p)^{1-y_i} = p^{\sum_{i=1}^n y_i}(1-p)^{n-\sum_{i=1}^n y_i}= \underbrace{p^u(1-p)^{n-u}}_{g(u;p)} \times \underbrace{1}_{h(y_1,...,y_n)} \]
\[\therefore U = \sum_{i=1}^n Y_i \mbox{ is sufficient for }p\]
\[ \prod_{i=1}^nf_Y(y_i;\mu,\sigma^2) = \prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} e^{-\frac{(y_i-\mu)^2}{2\sigma^2}} =\left(\frac{1}{\sqrt{2\pi\sigma^2}}\right)^n e^{-\sum_{i=1}^n\frac{(y_i-\mu)^2}{2\sigma^2}} \]
\[ = \underbrace{(\sigma^2)^{-n/2} e^{-\frac{\sum_i y_i^2-2\mu\sum_i y_i +n\mu^2 }{2\sigma^2}}}_{g\left(u = (\sum_i y_i,\sum_i y_i^2);\theta=(\mu,\sigma^2)\right)} \underbrace{(2\pi)^{-n/2}}_{h(y_1,...,y_n)}\] \[\therefore U = \left(\sum_i Y_i,\sum_i Y_i^2\right) \mbox{ is jointly sufficient for }(\mu,\sigma^2)\]
\[ \prod_{i=1}^nf_Y(y_i;\mu)= \underbrace{e^{-\frac{-2\mu\sum_i y_i +n\mu^2 }{2\sigma^2}}}_{g(u = \sum_i y_i;\mu)} \underbrace{e^{-\frac{\sum_i y_i^2}{2\sigma^2}}(2\pi\sigma^2)^{-n/2}}_{h(y_1,...,y_n)}\]
\[\therefore U = \sum_i Y_i \mbox{ is sufficient for }\mu\]
\[ \prod_{i=1}^nf_Y(y_i;\sigma^2)=e^{-\frac{\sum_i y_i^2}{2\sigma^2}} e^{-\frac{-n\mu^2 }{2\sigma^2}}(2\pi\sigma^2)^{-n/2} \underbrace{\times e^{-\frac{-2\mu\sum_i y_i }{2\sigma^2}}}_{\mbox{cannot factor} \sum_i y_i \mbox{ away from }\sigma^2}\]
\[\therefore U = \sum_i Y_i^2 \mbox{ alone is NOT sufficient for }\sigma^2\]
Makes sense, given classic sample variance estimator:
\[\hat\sigma^2 = \frac{\sum_{i=1}^n (Y_i -\bar Y)^2}{n-1}=\frac{\sum_{i=1}^n Y_i^2 -n\bar Y^2}{n-1}\]
makes use of both \(\sum_{i=1}^n Y_i^2\) and \(\sum_{i=1}^n Y_i\)
When \(\theta\) governs the support, we need to incorporate this with indicator functions.
\[1(condition) = \begin{cases} 1 & \mbox{condition satisfied} \\ 0 & otherwise \end{cases}\]
Suppose \(Y_1,...,Y_n\stackrel{i.i.d.}{\sim}UNIF(0,\theta)\)
Use factorization theorem to find sufficient statistic for \(\theta\).
\[ \prod_{i=1}^nf_Y(y_i;\theta) = \prod_{i=1}^n \frac{1}{\theta}\cdot 1(0<y_i<\theta) = \underbrace{\frac{1}{\theta^n}\cdot 1(Y_{(n)}< \theta)}_{g(u;\theta)}\cdot \underbrace{1(Y_{(1)}>0)}_{h(y_1,...,y_n)} \]
\[\therefore U = Y_{(n)}\mbox{ is sufficient for }\theta\]