9.4

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

The value of beta does affect how quickly the asymptotic coverage approaches to the 95% CI. This shows that having a nonlinear transformation estimator can quickly kick in the asymptotic coverage.

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.4.3
Warning: package 'ggplot2' was built under R version 4.4.3
Warning: package 'tidyr' was built under R version 4.4.3
Warning: package 'readr' was built under R version 4.4.3
Warning: package 'purrr' was built under R version 4.4.3
Warning: package 'dplyr' was built under R version 4.4.3
Warning: package 'stringr' was built under R version 4.4.3
Warning: package 'forcats' was built under R version 4.4.3
Warning: package 'lubridate' was built under R version 4.4.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate
many_mles <- parameters(~n, ~beta, 
                        c(20, 40, 80, 160, 320, 640),
                        c(0.5, 2, 4)) %>%
  add_trials(10000)%>%
  mutate(
    ybar = map2_dbl(n, beta, ~ mean(rexp(.x, rate = 1/.y))),
    theta = exp(-5/beta),
    mle = exp(-5/ybar),
    se = mle * (5/ (ybar * sqrt(n))),
    lcl = mle - 1.96 * se,
    ucl = mle + 1.96 * se, 
    covers = as.numeric(lcl < theta & ucl > theta)
  )
coverage <- many_mles %>%
  summarize(coverage = mean(covers), .by = c(n, beta))

ggplot(coverage, aes(x = n, y = coverage)) + 
  geom_line()+
  geom_point()+
  geom_hline(yintercept = 0.95, linetype = 2)+
  facet_wrap(~beta, labeller = label_both)+
  theme_classic(base_size = 18)+
  labs(y = "Coverage", x = "Sample size n")

Both histograms show that the MLEs become increasingly centerd at the true parameter values confriming the asymptotic unbiasedness.

library(tidyverse)
library(purrrfect)

loglik.alpha <- function(alpha, yvals){
  if(alpha <= 0) return(-Inf)
  n <- length(yvals)
  ybar <- mean(yvals)
  
  ll <- n * (alpha * log(alpha) - lgamma(alpha))+
    (alpha - 1) * sum(log(yvals)) -
    n* alpha * log(ybar)
  return(ll)
}

get.mle <- function(yvals) {
  
  opt <- optimize(
    f = function(a) - loglik.alpha(a, yvals),
    interval = c(-5,5)
  )
  alpha.hat <- opt$minimum
  
  beta.hat <- mean(yvals) / alpha.hat
  
  return(c(alpha.hat, beta.hat))
}

my.y <- c(6.690889, 1.989313, 4.884504, 2.142505, 4.177150)

get.mle(my.y)
Warning in optimize(f = function(a) -loglik.alpha(a, yvals), interval = c(-5, :
NA/Inf replaced by maximum positive value
[1] 4.9999443 0.7953833
true_alpha <- 4
true_beta <- 2

many_mles <- parameters( ~n, 
                         c(5,20,50,100,150,200))%>%
  add_trials(10000)%>%
  mutate(
    sample = map(n, ~rgamma(.x, shape = true_alpha, scale = true_beta)),
    
    mle = map(sample, get.mle),
    
    alpha_hat = map_dbl(mle, 1),
    beta_hat = map_dbl(mle, 2)
  )
Warning: There were 60000 warnings in `mutate()`.
The first warning was:
ℹ In argument: `mle = map(sample, get.mle)`.
Caused by warning in `optimize()`:
! NA/Inf replaced by maximum positive value
ℹ Run `dplyr::last_dplyr_warnings()` to see the 59999 remaining warnings.
ggplot(many_mles, aes(x = alpha_hat)) + 
  geom_histogram(bins = 40, fill = "skyblue", color = "black") + 
  facet_wrap(~n, scales = "free")+
  geom_vline(xintercept = 4, linetype = 2, color = "red")+
  coord_cartesian(xlim = c(0,15)) +
  theme_classic(base_size = 14) +
  labs(title = expression(hat(alpha)), x = "MLE of alpha")

ggplot(many_mles, aes(x = beta_hat)) +
  geom_histogram(bins = 40, fill = "orange", color = "black") +
  facet_wrap(~n, scales = "free") +
  geom_vline(xintercept = 2, linetype = 2, color = "red") +
  coord_cartesian(xlim = c(0, 5)) +
  theme_classic(base_size = 14) +
  labs(title = expression(hat(theta)), x = "MLE of theta")

A.) When the large value for n is introduce, the distribution for that n value becomes symmetric and it being approximately normal. As for the small n value, the distriubution becomes stronger skewed and have a slower convergence. This shows that have a small p will make the asymptotic normality to fail.

B.) When looking at the coverage, as n increase coverage will approach near or at the 95% CI. Poor coverage exist when the p is near 0 or 1 and the n value is small. When the asymptotic normality fails like in part a, coverage will become more inaccurate.

C.) The parameter space for p is 0 and 1. This allows the MLE to hit its boundary making the asymptotic normality to break which allows r(p) to take on.

library(tidyverse)
library(purrrfect)

params <- parameters(~n, ~p,
                     c(10,20,40,80,160,320,640,1280),
                     c(0.1,0.2,0.3,0.4,0.5))%>%
  add_trials(10000)

sim<- params %>%
  mutate( 
    phat = map2(n, p, ~ {
      x <- rbinom(.x,1,.y)
      mean(x)
    }),
ghat = map(phat, ~.x * (1-.x))
) %>%
  unnest(c(phat, ghat))

ggplot(sim, aes(x = ghat))+
  geom_density(fill = "skyblue", alpha = 0.4)+
  facet_grid(p~n, scales = "free")+
  theme_classic(base_size = 12)+
  labs(x = expression(hat(p)(1-hat(p))), y = "Density")

coverage <- sim %>%
  group_by(n, p) %>%
  summarize(
    mean_hat = mean(ghat),
    sd_hat = sd(ghat),
    true = first(p) * (1 - first(p)),
    se = sqrt((1 - 2*first(p))^2 * first(p)*(1-first(p))/first(n)),
    lcl = mean_hat - 1.96 * se,
    ucl = mean_hat + 1.96 * se,
    coverage = mean(lcl < true & ucl > true),
    .groups = "drop"
  )

ggplot(coverage, aes(x = n, y = coverage)) +
  geom_line() +
  geom_point() +
  geom_hline(yintercept = 0.95, linetype = 2) +
  facet_wrap(~p) +
  theme_classic(base_size = 14) +
  labs(y = "Coverage", x = "n")