Una estadística \(T(X)\) es
suficiente para \(\theta\) si la distribución condicional
\(X \mid T\) no
depende de \(\theta\).
Neyman–Fisher (factorización): \(T\) es suficiente ssi \[
L(x;\theta) \;=\; h(T(x),\theta)\, g(x).
\]
op_old <- par(no.readonly = TRUE)
mu_true <- 4
sigma <- 2
n <- 30
x <- rnorm(n, mu_true, sigma)
Sx <- sum(x)
cat("n =", n, " mu_true =", mu_true, " sigma =", sigma, " sum(x) =", round(Sx,3), "\n")
## n = 30 mu_true = 4 sigma = 2 sum(x) = 117.174
# log L(μ|x) ∝ - (1/(2σ^2)) * ∑ (xi - μ)^2
logL_norm <- function(mu, x, sigma){
-(1/(2*sigma^2))*sum( (x - mu)^2 )
}
# Factorización:
# L(μ|x) ∝ exp(-(1/(2σ^2))*(n μ^2 - 2 μ ∑xi)) * exp(-(1/(2σ^2))∑xi^2)
h_norm <- function(mu, x, sigma){
n <- length(x); S <- sum(x)
exp(-(1/(2*sigma^2))*(n*mu^2 - 2*mu*S))
}
g_norm <- function(x, sigma){
exp(-(1/(2*sigma^2))*sum(x^2))
}
mu_grid <- seq(mu_true - 4*sigma, mu_true + 4*sigma, length.out = 400)
L_log_vals <- sapply(mu_grid, logL_norm, x = x, sigma = sigma)
hg_log_vals <- sapply(mu_grid, function(m) log(h_norm(m,x,sigma)) + log(g_norm(x,sigma)))
ratio_sd <- sd( L_log_vals - hg_log_vals )
mu_hat <- mean(x)
cat("Proporcionalidad (Normal, log-escala): sd(diff) =", ratio_sd, " (≈0)\n")
## Proporcionalidad (Normal, log-escala): sd(diff) = 1.922853e-14 (≈0)
cat("μ_hat (MLE) =", mu_hat, " vs mean(x) =", mean(x), "\n")
## μ_hat (MLE) = 3.905792 vs mean(x) = 3.905792
par(mfrow = c(1,2))
plot(mu_grid, L_log_vals, type="l", lwd=2,
xlab=expression(mu), ylab="log L(mu|x)", main="Normal: log-verosimilitud")
abline(v=mu_hat, col="red", lty=2, lwd=2)
abline(v=mu_true, col="blue", lty=3, lwd=2)
legend("bottomleft", c(expression(hat(mu)), expression(mu[true])),
lty=c(2,3), col=c("red","blue"), lwd=2, bty="n")
plot(mu_grid, L_log_vals - hg_log_vals, type="l", lwd=2,
xlab=expression(mu), ylab=expression(ell(mu) - (log*h + log*g)),
main="Normal: factor. (constante ≈ 0)")
abline(h=0, col="gray40", lty=3)
par(op_old)
set.seed(456)
alpha <- 3
beta_true <- 2
n <- 40
x <- rgamma(n, shape = alpha, scale = beta_true)
Sx <- sum(x)
cat("n =", n, " alpha =", alpha, " beta_true =", beta_true, " sum(x) =", round(Sx,3), "\n")
## n = 40 alpha = 3 beta_true = 2 sum(x) = 254.899
# log L(β|x) ∝ - n α log β - (1/β) ∑ xi
logL_gamma <- function(beta, x, alpha){
- length(x)*alpha*log(beta) - sum(x)/beta
}
# En log-escala, h(β) = logL(β|x) (g(x) no depende de β)
h_gamma_log <- function(beta, x, alpha){
- length(x)*alpha*log(beta) - sum(x)/beta
}
g_gamma_log <- function(x, alpha) 0
beta_grid <- seq(0.2, 6, length.out = 400)
L_log_vals <- sapply(beta_grid, logL_gamma, x = x, alpha = alpha)
hg_log_vals <- sapply(beta_grid, h_gamma_log, x = x, alpha = alpha) + g_gamma_log(x, alpha)
beta_hat <- mean(x)/alpha
cat("Proporcionalidad (Gamma, log-escala): sd(diff) =", sd(L_log_vals - hg_log_vals), " (≈0)\n")
## Proporcionalidad (Gamma, log-escala): sd(diff) = 0 (≈0)
cat("β_hat (MLE con α conocido) =", beta_hat, " vs mean(x)/α =", mean(x)/alpha, "\n")
## β_hat (MLE con α conocido) = 2.124158 vs mean(x)/α = 2.124158
par(mfrow = c(1,2))
plot(beta_grid, L_log_vals, type="l", lwd=2,
xlab=expression(beta), ylab="log L(beta|x)",
main=bquote("Gamma ("*alpha==.(alpha)*"): log-verosimilitud vs "*beta))
abline(v=beta_hat, col="red", lty=2, lwd=2)
abline(v=beta_true, col="blue", lty=3, lwd=2)
legend("topright", c(expression(hat(beta)), expression(beta[true])),
lty=c(2,3), col=c("red","blue"), lwd=2, bty="n")
plot(beta_grid, L_log_vals - hg_log_vals, type="l", lwd=2,
xlab=expression(beta), ylab=expression(ell(beta) - (log*h + log*g)),
main="Gamma: factor. (constante ≈ 0)")
abline(h=0, col="gray40", lty=3)
set.seed(321)
n <- 12
lam_true <- 3
x <- rpois(n, lam_true)
# L(λ|x) = ∏ Pois(xi; λ) ∝ λ^{∑x} e^{-nλ}
L_pois <- function(lambda, x) prod(dpois(x, lambda))
h_pois <- function(lambda, x) lambda^(sum(x)) * exp(-length(x)*lambda)
g_pois <- function(x) 1 / prod(factorial(x))
lambda_grid <- seq(0.1, 8, length.out = 200)
L_vals <- sapply(lambda_grid, L_pois, x = x)
hg_vals <- sapply(lambda_grid, function(l) h_pois(l, x) * g_pois(x))
ratio <- L_vals / hg_vals
cat("Proporcionalidad (Poisson): sd(ratio) =", sd(ratio), " (≈0)\n")
## Proporcionalidad (Poisson): sd(ratio) = 3.072359e-15 (≈0)
par(mfrow=c(1,2))
plot(lambda_grid, log(L_vals), type="l", lwd=2, xlab=expression(lambda),
ylab="log L(λ|x)", main="Poisson: log-verosimilitud")
abline(v=mean(x), col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topleft", c("MLE (media muestral)", "λ verdadero"),
col=c("red","blue"), lty=c(2,3), lwd=2, bty="n")
plot(lambda_grid, ratio, type="l", lwd=2, xlab=expression(lambda),
ylab="L / (h·g)", main="Poisson: proporcionalidad (≈cte)")
abline(h=mean(ratio), col="gray40", lty=3)
Dado \(S=\sum X_i=s\), se tiene \((X_1,\dots,X_n)\mid S=s \sim \mathrm{Multinomial}\big(s; 1/n,\dots,1/n\big)\).
n_c <- 10
s_tar <- 20
B <- 20000
set.seed(1)
Xcond <- rmultinom(B, size = s_tar, prob = rep(1/n_c, n_c)) # n_c x B
mean_cond <- mean(Xcond[1, ])
cat("Media teórica E[X1 | S=s] = s/n =", s_tar/n_c, "\n")
## Media teórica E[X1 | S=s] = s/n = 2
cat("Media simulada condicional =", round(mean_cond,4), "\n")
## Media simulada condicional = 1.9823
Para cada modelo, calculamos la log-verosimilitud y marcamos el MLE vs. el valor verdadero.
set.seed(111)
n <- 30
p_true <- 0.6
x <- rbinom(n, 1, p_true)
logL_bern <- function(p, x) sum(x)*log(p) + (length(x)-sum(x))*log(1-p)
p_grid <- seq(0.01, 0.99, length.out=200)
logL_vals <- sapply(p_grid, logL_bern, x=x)
p_hat <- mean(x)
plot(p_grid, logL_vals, type="l", lwd=2,
main="Bernoulli(p): log-verosimilitud",
xlab="p", ylab="log L(p)")
abline(v=p_hat, col="red", lty=2, lwd=2)
abline(v=p_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (media muestral)", "Valor verdadero"),
col=c("red","blue"), lty=c(2,3), bty="n")
set.seed(222)
n <- 40
lam_true <- 3
x <- rpois(n, lam_true)
logL_pois <- function(lam, x) sum(x)*log(lam) - length(x)*lam
lam_grid <- seq(0.1, 6, length.out=200)
logL_vals <- sapply(lam_grid, logL_pois, x=x)
lam_hat <- mean(x)
plot(lam_grid, logL_vals, type="l", lwd=2,
main="Poisson(λ): log-verosimilitud",
xlab=expression(lambda), ylab="log L(λ)")
abline(v=lam_hat, col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (media muestral)", "Valor verdadero"),
col=c("red","blue"), lty=c(2,3), bty="n")
set.seed(333)
n <- 50
mu_true <- 5
sigma <- 2
x <- rnorm(n, mu_true, sigma)
logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x-mu)^2)
mu_grid <- seq(mu_true-4*sigma, mu_true+4*sigma, length.out=200)
logL_vals <- sapply(mu_grid, logL_norm, x=x, sigma=sigma)
mu_hat <- mean(x)
plot(mu_grid, logL_vals, type="l", lwd=2,
main="Normal(μ, σ² conocida): log-verosimilitud",
xlab=expression(mu), ylab="log L(μ)")
abline(v=mu_hat, col="red", lty=2, lwd=2)
abline(v=mu_true, col="blue", lty=3, lwd=2)
legend("bottomleft", c("MLE (media muestral)", "Valor verdadero"),
col=c("red","blue"), lty=c(2,3), bty="n")
set.seed(444)
n <- 40
lam_true <- 2
x <- rexp(n, rate=lam_true)
logL_exp <- function(lam, x) length(x)*log(lam) - lam*sum(x)
lam_grid <- seq(0.1, 5, length.out=200)
logL_vals <- sapply(lam_grid, logL_exp, x=x)
lam_hat <- 1/mean(x)
plot(lam_grid, logL_vals, type="l", lwd=2,
main="Exponencial(λ): log-verosimilitud",
xlab=expression(lambda), ylab="log L(λ)")
abline(v=lam_hat, col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (1/media)", "Valor verdadero"),
col=c("red","blue"), lty=c(2,3), bty="n")
Copiá/pegá estos bloques en RStudio si querés correr sin explicación.
set.seed(123)
mu_true <- 4; sigma <- 2; n <- 30
x <- rnorm(n, mu_true, sigma)
logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x - mu)^2)
h_norm <- function(mu, x, sigma){ n <- length(x); S <- sum(x); exp(-(1/(2*sigma^2))*(n*mu^2 - 2*mu*S)) }
g_norm <- function(x, sigma){ exp(-(1/(2*sigma^2))*sum(x^2)) }
mu_grid <- seq(mu_true - 4*sigma, mu_true + 4*sigma, length.out = 400)
L_log_vals <- sapply(mu_grid, logL_norm, x = x, sigma = sigma)
hg_log_vals <- sapply(mu_grid, function(m) log(h_norm(m,x,sigma)) + log(g_norm(x,sigma)))
plot(mu_grid, L_log_vals, type="l"); abline(v=mean(x), col="red")
plot(mu_grid, L_log_vals - hg_log_vals, type="l"); abline(h=0, col="gray")
set.seed(456)
alpha <- 3; beta_true <- 2; n <- 40
x <- rgamma(n, shape = alpha, scale = beta_true)
logL_gamma <- function(beta, x, alpha) - length(x)*alpha*log(beta) - sum(x)/beta
h_gamma_log <- function(beta, x, alpha) - length(x)*alpha*log(beta) - sum(x)/beta
g_gamma_log <- function(x, alpha) 0
beta_grid <- seq(0.2, 6, length.out = 400)
L_log_vals <- sapply(beta_grid, logL_gamma, x = x, alpha = alpha)
hg_log_vals <- sapply(beta_grid, h_gamma_log, x = x, alpha = alpha) + g_gamma_log(x, alpha)
plot(beta_grid, L_log_vals, type="l"); abline(v=mean(x)/alpha, col="red")
plot(beta_grid, L_log_vals - hg_log_vals, type="l"); abline(h=0, col="gray")
set.seed(321)
n <- 12; lam_true <- 3
x <- rpois(n, lam_true)
L_pois <- function(lambda, x) prod(dpois(x, lambda))
h_pois <- function(lambda, x) lambda^(sum(x)) * exp(-length(x)*lambda)
g_pois <- function(x) 1 / prod(factorial(x))
lambda_grid <- seq(0.1, 8, length.out = 200)
L_vals <- sapply(lambda_grid, L_pois, x = x)
hg_vals <- sapply(lambda_grid, function(l) h_pois(l, x) * g_pois(x))
plot(lambda_grid, log(L_vals), type="l"); abline(v=mean(x), col="red")
plot(lambda_grid, L_vals/hg_vals, type="l"); abline(h=mean(L_vals/hg_vals), col="gray")
n_c <- 10; s_tar <- 20; B <- 20000
Xcond <- rmultinom(B, size = s_tar, prob = rep(1/n_c, n_c))
mean(Xcond[1, ]); s_tar/n_c
## Bernoulli
set.seed(111); n <- 30; p_true <- 0.6; x <- rbinom(n, 1, p_true)
logL_bern <- function(p, x) sum(x)*log(p) + (length(x)-sum(x))*log(1-p)
p_grid <- seq(0.01,0.99,len=200); p_hat <- mean(x)
plot(p_grid, sapply(p_grid, logL_bern, x=x), type="l"); abline(v=p_hat, col="red")
## Poisson
set.seed(222); n <- 40; lam_true <- 3; x <- rpois(n, lam_true)
logL_pois <- function(lam, x) sum(x)*log(lam) - length(x)*lam
lam_grid <- seq(0.1,6,len=200); lam_hat <- mean(x)
plot(lam_grid, sapply(lam_grid, logL_pois, x=x), type="l"); abline(v=lam_hat, col="red")
## Normal (μ, σ² conocida)
set.seed(333); n <- 50; mu_true <- 5; sigma <- 2; x <- rnorm(n, mu_true, sigma)
logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x-mu)^2)
mu_grid <- seq(mu_true-4*sigma, mu_true+4*sigma, len=200); mu_hat <- mean(x)
plot(mu_grid, sapply(mu_grid, logL_norm, x=x, sigma=sigma), type="l"); abline(v=mu_hat, col="red")
## Exponencial
set.seed(444); n <- 40; lam_true <- 2; x <- rexp(n, rate=lam_true)
logL_exp <- function(lam, x) length(x)*log(lam) - lam*sum(x)
lam_grid <- seq(0.1,5,len=200); lam_hat <- 1/mean(x)
plot(lam_grid, sapply(lam_grid, logL_exp, x=x), type="l"); abline(v=lam_hat, col="red")
## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Spanish_Argentina.utf8 LC_CTYPE=Spanish_Argentina.utf8
## [3] LC_MONETARY=Spanish_Argentina.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Argentina.utf8
##
## time zone: America/Buenos_Aires
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.33 R6_2.5.1 fastmap_1.1.1 xfun_0.52
## [5] cachem_1.0.8 knitr_1.50 htmltools_0.5.7 rmarkdown_2.29
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.8 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.15.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.6.1 yaml_2.3.8 rlang_1.1.2 jsonlite_1.8.8