1 Cómo usar este material

  • Pensado para subir a RPubs y que los alumnos lo ejecuten.
  • Dos partes: (I) Suficiencia (con explicación + gráficos) y (II) MLE (con explicación + gráficos).
  • Al final hay una sección con Scripts puros (solo R) para copiar/pegar en RStudio.

2 Parte I — SUFICIENCIA (explicación + gráficos)

Una estadística \(T(X)\) es suficiente para \(\theta\) si la distribución condicional \(X \mid T\) no depende de \(\theta\).
Neyman–Fisher (factorización): \(T\) es suficiente ssi \[ L(x;\theta) \;=\; h(T(x),\theta)\, g(x). \]

2.1 Normal \((\mu, \sigma^2\text{ conocida})\): \(T=\sum X_i\) suficiente para \(\mu\)

op_old <- par(no.readonly = TRUE)

mu_true <- 4
sigma   <- 2
n  <- 30
x  <- rnorm(n, mu_true, sigma)
Sx <- sum(x)

cat("n =", n, "  mu_true =", mu_true, "  sigma =", sigma, "  sum(x) =", round(Sx,3), "\n")
## n = 30   mu_true = 4   sigma = 2   sum(x) = 117.174
# log L(μ|x) ∝ - (1/(2σ^2)) * ∑ (xi - μ)^2
logL_norm <- function(mu, x, sigma){
  -(1/(2*sigma^2))*sum( (x - mu)^2 )
}

# Factorización:
# L(μ|x) ∝ exp(-(1/(2σ^2))*(n μ^2 - 2 μ ∑xi)) * exp(-(1/(2σ^2))∑xi^2)
h_norm <- function(mu, x, sigma){
  n <- length(x); S <- sum(x)
  exp(-(1/(2*sigma^2))*(n*mu^2 - 2*mu*S))
}
g_norm <- function(x, sigma){
  exp(-(1/(2*sigma^2))*sum(x^2))
}

mu_grid <- seq(mu_true - 4*sigma, mu_true + 4*sigma, length.out = 400)
L_log_vals  <- sapply(mu_grid, logL_norm, x = x, sigma = sigma)
hg_log_vals <- sapply(mu_grid, function(m) log(h_norm(m,x,sigma)) + log(g_norm(x,sigma)))

ratio_sd <- sd( L_log_vals - hg_log_vals )
mu_hat <- mean(x)

cat("Proporcionalidad (Normal, log-escala): sd(diff) =", ratio_sd, " (≈0)\n")
## Proporcionalidad (Normal, log-escala): sd(diff) = 1.922853e-14  (≈0)
cat("μ_hat (MLE) =", mu_hat, "  vs  mean(x) =", mean(x), "\n")
## μ_hat (MLE) = 3.905792   vs  mean(x) = 3.905792
par(mfrow = c(1,2))
plot(mu_grid, L_log_vals, type="l", lwd=2,
     xlab=expression(mu), ylab="log L(mu|x)", main="Normal: log-verosimilitud")
abline(v=mu_hat, col="red", lty=2, lwd=2)
abline(v=mu_true, col="blue", lty=3, lwd=2)
legend("bottomleft", c(expression(hat(mu)), expression(mu[true])),
       lty=c(2,3), col=c("red","blue"), lwd=2, bty="n")

plot(mu_grid, L_log_vals - hg_log_vals, type="l", lwd=2,
     xlab=expression(mu), ylab=expression(ell(mu) - (log*h + log*g)),
     main="Normal: factor. (constante ≈ 0)")
abline(h=0, col="gray40", lty=3)

par(op_old)

2.2 Gamma \((\alpha\text{ conocido},\,\beta\text{ desconocido})\): \(T=\sum X_i\) suficiente para \(\beta\)

set.seed(456)
alpha <- 3
beta_true <- 2
n <- 40
x <- rgamma(n, shape = alpha, scale = beta_true)
Sx <- sum(x)

cat("n =", n, "  alpha =", alpha, "  beta_true =", beta_true, "  sum(x) =", round(Sx,3), "\n")
## n = 40   alpha = 3   beta_true = 2   sum(x) = 254.899
# log L(β|x) ∝ - n α log β - (1/β) ∑ xi
logL_gamma <- function(beta, x, alpha){
  - length(x)*alpha*log(beta) - sum(x)/beta
}

# En log-escala, h(β) = logL(β|x) (g(x) no depende de β)
h_gamma_log <- function(beta, x, alpha){
  - length(x)*alpha*log(beta) - sum(x)/beta
}
g_gamma_log <- function(x, alpha) 0

beta_grid <- seq(0.2, 6, length.out = 400)
L_log_vals  <- sapply(beta_grid, logL_gamma, x = x, alpha = alpha)
hg_log_vals <- sapply(beta_grid, h_gamma_log, x = x, alpha = alpha) + g_gamma_log(x, alpha)

beta_hat <- mean(x)/alpha
cat("Proporcionalidad (Gamma, log-escala): sd(diff) =", sd(L_log_vals - hg_log_vals), " (≈0)\n")
## Proporcionalidad (Gamma, log-escala): sd(diff) = 0  (≈0)
cat("β_hat (MLE con α conocido) =", beta_hat, "  vs  mean(x)/α =", mean(x)/alpha, "\n")
## β_hat (MLE con α conocido) = 2.124158   vs  mean(x)/α = 2.124158
par(mfrow = c(1,2))
plot(beta_grid, L_log_vals, type="l", lwd=2,
     xlab=expression(beta), ylab="log L(beta|x)",
     main=bquote("Gamma ("*alpha==.(alpha)*"): log-verosimilitud vs "*beta))
abline(v=beta_hat, col="red", lty=2, lwd=2)
abline(v=beta_true, col="blue", lty=3, lwd=2)
legend("topright", c(expression(hat(beta)), expression(beta[true])),
       lty=c(2,3), col=c("red","blue"), lwd=2, bty="n")

plot(beta_grid, L_log_vals - hg_log_vals, type="l", lwd=2,
     xlab=expression(beta), ylab=expression(ell(beta) - (log*h + log*g)),
     main="Gamma: factor. (constante ≈ 0)")
abline(h=0, col="gray40", lty=3)

2.3 Poisson \((\lambda)\): \(T=\sum X_i\) suficiente para \(\lambda\)

2.3.1 (a) Factorización

set.seed(321)
n  <- 12
lam_true <- 3
x  <- rpois(n, lam_true)

# L(λ|x) = ∏ Pois(xi; λ) ∝ λ^{∑x} e^{-nλ}
L_pois  <- function(lambda, x) prod(dpois(x, lambda))
h_pois  <- function(lambda, x) lambda^(sum(x)) * exp(-length(x)*lambda)
g_pois  <- function(x) 1 / prod(factorial(x))

lambda_grid <- seq(0.1, 8, length.out = 200)
L_vals  <- sapply(lambda_grid, L_pois, x = x)
hg_vals <- sapply(lambda_grid, function(l) h_pois(l, x) * g_pois(x))

ratio <- L_vals / hg_vals
cat("Proporcionalidad (Poisson): sd(ratio) =", sd(ratio), "  (≈0)\n")
## Proporcionalidad (Poisson): sd(ratio) = 3.072359e-15   (≈0)
par(mfrow=c(1,2))
plot(lambda_grid, log(L_vals), type="l", lwd=2, xlab=expression(lambda),
     ylab="log L(λ|x)", main="Poisson: log-verosimilitud")
abline(v=mean(x), col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topleft", c("MLE (media muestral)", "λ verdadero"),
       col=c("red","blue"), lty=c(2,3), lwd=2, bty="n")

plot(lambda_grid, ratio, type="l", lwd=2, xlab=expression(lambda),
     ylab="L / (h·g)", main="Poisson: proporcionalidad (≈cte)")
abline(h=mean(ratio), col="gray40", lty=3)

2.3.2 (b) Condicional exacto (no depende de \(\lambda\))

Dado \(S=\sum X_i=s\), se tiene \((X_1,\dots,X_n)\mid S=s \sim \mathrm{Multinomial}\big(s; 1/n,\dots,1/n\big)\).

n_c  <- 10
s_tar <- 20
B     <- 20000

set.seed(1)
Xcond <- rmultinom(B, size = s_tar, prob = rep(1/n_c, n_c))  # n_c x B
mean_cond <- mean(Xcond[1, ])
cat("Media teórica E[X1 | S=s] = s/n =", s_tar/n_c, "\n")
## Media teórica E[X1 | S=s] = s/n = 2
cat("Media simulada condicional =", round(mean_cond,4), "\n")
## Media simulada condicional = 1.9823

3 Parte II — MLE (explicación + gráficos)

Para cada modelo, calculamos la log-verosimilitud y marcamos el MLE vs. el valor verdadero.

3.1 Bernoulli \((p)\)

set.seed(111)
n  <- 30
p_true <- 0.6
x  <- rbinom(n, 1, p_true)

logL_bern <- function(p, x) sum(x)*log(p) + (length(x)-sum(x))*log(1-p)

p_grid <- seq(0.01, 0.99, length.out=200)
logL_vals <- sapply(p_grid, logL_bern, x=x)
p_hat <- mean(x)

plot(p_grid, logL_vals, type="l", lwd=2,
     main="Bernoulli(p): log-verosimilitud",
     xlab="p", ylab="log L(p)")
abline(v=p_hat, col="red", lty=2, lwd=2)
abline(v=p_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (media muestral)", "Valor verdadero"),
       col=c("red","blue"), lty=c(2,3), bty="n")

3.2 Poisson \((\lambda)\)

set.seed(222)
n <- 40
lam_true <- 3
x <- rpois(n, lam_true)

logL_pois <- function(lam, x) sum(x)*log(lam) - length(x)*lam

lam_grid <- seq(0.1, 6, length.out=200)
logL_vals <- sapply(lam_grid, logL_pois, x=x)
lam_hat <- mean(x)

plot(lam_grid, logL_vals, type="l", lwd=2,
     main="Poisson(λ): log-verosimilitud",
     xlab=expression(lambda), ylab="log L(λ)")
abline(v=lam_hat, col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (media muestral)", "Valor verdadero"),
       col=c("red","blue"), lty=c(2,3), bty="n")

3.3 Normal \((\mu, \sigma^2\text{ conocida})\)

set.seed(333)
n <- 50
mu_true <- 5
sigma <- 2
x <- rnorm(n, mu_true, sigma)

logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x-mu)^2)

mu_grid <- seq(mu_true-4*sigma, mu_true+4*sigma, length.out=200)
logL_vals <- sapply(mu_grid, logL_norm, x=x, sigma=sigma)
mu_hat <- mean(x)

plot(mu_grid, logL_vals, type="l", lwd=2,
     main="Normal(μ, σ² conocida): log-verosimilitud",
     xlab=expression(mu), ylab="log L(μ)")
abline(v=mu_hat, col="red", lty=2, lwd=2)
abline(v=mu_true, col="blue", lty=3, lwd=2)
legend("bottomleft", c("MLE (media muestral)", "Valor verdadero"),
       col=c("red","blue"), lty=c(2,3), bty="n")

3.4 Exponencial \((\lambda)\)

set.seed(444)
n <- 40
lam_true <- 2
x <- rexp(n, rate=lam_true)

logL_exp <- function(lam, x) length(x)*log(lam) - lam*sum(x)

lam_grid <- seq(0.1, 5, length.out=200)
logL_vals <- sapply(lam_grid, logL_exp, x=x)
lam_hat <- 1/mean(x)

plot(lam_grid, logL_vals, type="l", lwd=2,
     main="Exponencial(λ): log-verosimilitud",
     xlab=expression(lambda), ylab="log L(λ)")
abline(v=lam_hat, col="red", lty=2, lwd=2)
abline(v=lam_true, col="blue", lty=3, lwd=2)
legend("topright", c("MLE (1/media)", "Valor verdadero"),
       col=c("red","blue"), lty=c(2,3), bty="n")


4 Scripts puros (solo R)

Copiá/pegá estos bloques en RStudio si querés correr sin explicación.

4.1 Suficiencia — Normal (μ, σ² conocida)

set.seed(123)
mu_true <- 4; sigma <- 2; n <- 30
x  <- rnorm(n, mu_true, sigma)

logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x - mu)^2)
h_norm <- function(mu, x, sigma){ n <- length(x); S <- sum(x); exp(-(1/(2*sigma^2))*(n*mu^2 - 2*mu*S)) }
g_norm <- function(x, sigma){ exp(-(1/(2*sigma^2))*sum(x^2)) }

mu_grid <- seq(mu_true - 4*sigma, mu_true + 4*sigma, length.out = 400)
L_log_vals  <- sapply(mu_grid, logL_norm, x = x, sigma = sigma)
hg_log_vals <- sapply(mu_grid, function(m) log(h_norm(m,x,sigma)) + log(g_norm(x,sigma)))

plot(mu_grid, L_log_vals, type="l"); abline(v=mean(x), col="red")
plot(mu_grid, L_log_vals - hg_log_vals, type="l"); abline(h=0, col="gray")

4.2 Suficiencia — Gamma (α conocido, β desconocido)

set.seed(456)
alpha <- 3; beta_true <- 2; n <- 40
x <- rgamma(n, shape = alpha, scale = beta_true)

logL_gamma <- function(beta, x, alpha) - length(x)*alpha*log(beta) - sum(x)/beta
h_gamma_log <- function(beta, x, alpha) - length(x)*alpha*log(beta) - sum(x)/beta
g_gamma_log <- function(x, alpha) 0

beta_grid <- seq(0.2, 6, length.out = 400)
L_log_vals  <- sapply(beta_grid, logL_gamma, x = x, alpha = alpha)
hg_log_vals <- sapply(beta_grid, h_gamma_log, x = x, alpha = alpha) + g_gamma_log(x, alpha)

plot(beta_grid, L_log_vals, type="l"); abline(v=mean(x)/alpha, col="red")
plot(beta_grid, L_log_vals - hg_log_vals, type="l"); abline(h=0, col="gray")

4.3 Suficiencia — Poisson (λ)

set.seed(321)
n <- 12; lam_true <- 3
x <- rpois(n, lam_true)

L_pois  <- function(lambda, x) prod(dpois(x, lambda))
h_pois  <- function(lambda, x) lambda^(sum(x)) * exp(-length(x)*lambda)
g_pois  <- function(x) 1 / prod(factorial(x))

lambda_grid <- seq(0.1, 8, length.out = 200)
L_vals  <- sapply(lambda_grid, L_pois, x = x)
hg_vals <- sapply(lambda_grid, function(l) h_pois(l, x) * g_pois(x))

plot(lambda_grid, log(L_vals), type="l"); abline(v=mean(x), col="red")
plot(lambda_grid, L_vals/hg_vals, type="l"); abline(h=mean(L_vals/hg_vals), col="gray")

4.4 Condicional exacto Poisson (no depende de λ)

n_c <- 10; s_tar <- 20; B <- 20000
Xcond <- rmultinom(B, size = s_tar, prob = rep(1/n_c, n_c))
mean(Xcond[1, ]); s_tar/n_c

4.5 MLE — Bernoulli, Poisson, Normal (σ² conocida), Exponencial

## Bernoulli
set.seed(111); n <- 30; p_true <- 0.6; x <- rbinom(n, 1, p_true)
logL_bern <- function(p, x) sum(x)*log(p) + (length(x)-sum(x))*log(1-p)
p_grid <- seq(0.01,0.99,len=200); p_hat <- mean(x)
plot(p_grid, sapply(p_grid, logL_bern, x=x), type="l"); abline(v=p_hat, col="red")

## Poisson
set.seed(222); n <- 40; lam_true <- 3; x <- rpois(n, lam_true)
logL_pois <- function(lam, x) sum(x)*log(lam) - length(x)*lam
lam_grid <- seq(0.1,6,len=200); lam_hat <- mean(x)
plot(lam_grid, sapply(lam_grid, logL_pois, x=x), type="l"); abline(v=lam_hat, col="red")

## Normal (μ, σ² conocida)
set.seed(333); n <- 50; mu_true <- 5; sigma <- 2; x <- rnorm(n, mu_true, sigma)
logL_norm <- function(mu, x, sigma) -(1/(2*sigma^2))*sum((x-mu)^2)
mu_grid <- seq(mu_true-4*sigma, mu_true+4*sigma, len=200); mu_hat <- mean(x)
plot(mu_grid, sapply(mu_grid, logL_norm, x=x, sigma=sigma), type="l"); abline(v=mu_hat, col="red")

## Exponencial
set.seed(444); n <- 40; lam_true <- 2; x <- rexp(n, rate=lam_true)
logL_exp <- function(lam, x) length(x)*log(lam) - lam*sum(x)
lam_grid <- seq(0.1,5,len=200); lam_hat <- 1/mean(x)
plot(lam_grid, sapply(lam_grid, logL_exp, x=x), type="l"); abline(v=lam_hat, col="red")

5 Info de sesión

## R version 4.3.2 (2023-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Spanish_Argentina.utf8  LC_CTYPE=Spanish_Argentina.utf8   
## [3] LC_MONETARY=Spanish_Argentina.utf8 LC_NUMERIC=C                      
## [5] LC_TIME=Spanish_Argentina.utf8    
## 
## time zone: America/Buenos_Aires
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.33     R6_2.5.1          fastmap_1.1.1     xfun_0.52        
##  [5] cachem_1.0.8      knitr_1.50        htmltools_0.5.7   rmarkdown_2.29   
##  [9] lifecycle_1.0.4   cli_3.6.2         sass_0.4.8        jquerylib_0.1.4  
## [13] compiler_4.3.2    rstudioapi_0.15.0 tools_4.3.2       evaluate_0.23    
## [17] bslib_0.6.1       yaml_2.3.8        rlang_1.1.2       jsonlite_1.8.8