PS 9.4

S1. Consider problem 3. For n∈{20,40,80,160,320,640} and β∈{0.5,2,4}, investigate coverage of the asymptotic 95% confidence interval for S(5) when Y_1,…,Y_n are i.i.d. EXP(β). Plot your coverage as a function of n. Does the value of the β influence how quickly the asymptotics “kick in”?

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

(many_sims <- parameters(~n, ~beta,
                         c(20,40,80,160,320,640), c(0.5,2,4))
  
  %>% add_trials(10000)
  
  %>% mutate(Ysample = pmap(list(n,beta), 
                           .f = \(n,b) rexp(n, rate = 1/b)))
  
  %>% mutate(ybar = map_dbl(Ysample, mean),
             
             # MLE of S(5)
             mle = exp(-5/ybar)
            )
) %>% head

# A tibble: 6 × 6
      n  beta .trial Ysample     ybar         mle
  <dbl> <dbl>  <dbl> <list>     <dbl>       <dbl>
1    20   0.5      1 <dbl [20]> 0.486 0.0000343  
2    20   0.5      2 <dbl [20]> 0.545 0.000103   
3    20   0.5      3 <dbl [20]> 0.552 0.000117   
4    20   0.5      4 <dbl [20]> 0.360 0.000000942
5    20   0.5      5 <dbl [20]> 0.355 0.000000770
6    20   0.5      6 <dbl [20]> 0.657 0.000498

(many_sims_with_ci <- many_sims 
  %>% mutate(lcl  = mle - 1.96*sqrt((25*exp(-10/ybar))/(n*ybar^2)),
             ucl  = mle + 1.96*sqrt((25*exp(-10/ybar))/(n*ybar^2))
             )
  %>% mutate(covers = ifelse(lcl < exp(-5/beta) & ucl > exp(-5/beta), 1, 0))
) %>% head

# A tibble: 6 × 9
      n  beta .trial Ysample     ybar         mle         lcl        ucl covers
  <dbl> <dbl>  <dbl> <list>     <dbl>       <dbl>       <dbl>      <dbl>  <dbl>
1    20   0.5      1 <dbl [20]> 0.486 0.0000343   -0.000120   0.000189        1
2    20   0.5      2 <dbl [20]> 0.545 0.000103    -0.000311   0.000517        1
3    20   0.5      3 <dbl [20]> 0.552 0.000117    -0.000346   0.000579        1
4    20   0.5      4 <dbl [20]> 0.360 0.000000942 -0.00000478 0.00000667      0
5    20   0.5      5 <dbl [20]> 0.355 0.000000770 -0.00000398 0.00000552      0
6    20   0.5      6 <dbl [20]> 0.657 0.000498    -0.00116    0.00216         1

(coverage <- many_sims_with_ci 
  %>% summarize(coverage = mean(covers),
                .by = c(n,beta)
            )
) %>% head

# A tibble: 6 × 3
      n  beta coverage
  <dbl> <dbl>    <dbl>
1    20   0.5    0.737
2    20   2      0.886
3    20   4      0.922
4    40   0.5    0.800
5    40   2      0.917
6    40   4      0.937

ggplot(data = coverage) + 
  geom_line(aes(x=n, y =coverage)) + 
  theme_classic(base_size =18)+ 
  labs(y='Coverage')+ 
  geom_hline(aes(yintercept = 0.95), linetype=2)+
  facet_wrap(~beta, labeller = label_both)

We can see from our plot that as our Beta value grows we converge quicker/our asymptotics kick in. We can also see that as our n grows our coverage grows.

S2. Consider problem 4. Given a sample of observed data values y_1,…,y_n from a GAM(α,β) distribution we want to find the MLEs of (α,β). We will turn this into a one-parameter optimization problem, as outlined above, as single parameter optimization problems are usually easier numerically. Write a function loglik.alpha(alpha,yvals) that takes as input alpha and a vector of data values yvals and returns the log-likelihood as a function of α given the observed y_i’s. Write a function called get.mle(yvals) that takes a sample as input and uses loglik.alpha() and optimize() to return the joint MLEs of (α,β). Verify that your function works using these particular observations: For α=4 and β=2, and each of n∈{5,20,50,100,150,200}, simulate 10,000 realizations of α ̂_MLE and β ̂_MLE. Create histograms of the MLEs faceted by sample size with vertical lines indicating the true values of the parameters. Verify that the MLEs are indeed asymptotically unbiased and normally distributed. (hint: restrict the x-axis to (0,15) for α ̂ and (0,5) for β ̂).

loglik.alpha <- function(alpha, yvals){
  
  n <- length(yvals)
  ybar <- mean(yvals)
  
  beta_hat <- ybar / alpha
  
  ll <- -n*lgamma(alpha) - n*alpha*log(beta_hat) +
        (alpha - 1)*sum(log(yvals)) -
        sum(yvals)/beta_hat
  
  return(ll)
}

get.mle <- function(yvals){
  
  opt <- optimize(f = loglik.alpha,
                  interval = c(0.01, 20),
                  yvals = yvals,
                  maximum = TRUE)
  
  alpha_hat <- opt$maximum
  beta_hat  <- mean(yvals) / alpha_hat
  
  return(c(alpha_hat, beta_hat))
}

my.y <- c(6.690889, 1.989313, 4.884504, 2.142505, 4.177150)

get.mle(my.y)

[1] 4.8222762 0.8246878

library(tidyverse)
library(purrrfect)

(many_mles <- parameters(~n,
                         c(5,20,50,100,150,200))
  
  %>% add_trials(10000)
  
  %>% mutate(Ysample = map(n, \(n) rgamma(n, shape = 4, scale = 2)))
  
  %>% mutate(mles = map(Ysample, get.mle),
             alpha_hat = map_dbl(mles, 1),
             beta_hat  = map_dbl(mles, 2)
            )
) %>% head

# A tibble: 6 × 6
      n .trial Ysample   mles      alpha_hat beta_hat
  <dbl>  <dbl> <list>    <list>        <dbl>    <dbl>
1     5      1 <dbl [5]> <dbl [2]>      2.88    2.76 
2     5      2 <dbl [5]> <dbl [2]>      3.37    2.49 
3     5      3 <dbl [5]> <dbl [2]>     10.0     0.862
4     5      4 <dbl [5]> <dbl [2]>      4.13    1.29 
5     5      5 <dbl [5]> <dbl [2]>      6.47    1.54 
6     5      6 <dbl [5]> <dbl [2]>      4.50    1.31

ggplot(data = many_mles) + 
  geom_histogram(aes(x = alpha_hat), bins = 100) + 
  geom_vline(xintercept = 4) + 
  coord_cartesian(xlim = c(0,15)) +
  theme_classic() + 
  xlab(expression(hat(alpha))) +
  facet_wrap(~n, labeller = label_both)

ggplot(data = many_mles) + 
  geom_histogram(aes(x = beta_hat), bins = 100) + 
  geom_vline(xintercept = 2) + 
  coord_cartesian(xlim = c(0,5)) +
  theme_classic() + 
  xlab(expression(hat(beta))) +
  facet_wrap(~n, labeller = label_both)

mle_summary <- many_mles %>% 
  summarize(alpha_bias = mean(alpha_hat - 4),
            beta_bias  = mean(beta_hat - 2),
            .by = n)

ggplot(data = mle_summary) + 
  geom_line(aes(x = n, y = alpha_bias)) +
  geom_line(aes(x = n, y = beta_bias, col = "beta")) +
  geom_hline(yintercept = 0, linetype = 2) +
  theme_classic() +
  ylab("Bias")

S3. In this problem we will explore some of the regularity conditions necessary for asymptotic efficiency and normality of MLEs. Consider Warmup Problem 1, where we have a sample of n i.i.d. BERN(p) data and we want to estimate τ(p)=p(1-p) using the MLE τ ̂(p)=p ̂(1-p ̂) (where p ̂ is the sample proportion of 1’s observed). The CRLB for any unbiased estimator of τ(p) we derived as (p(1-p) (1-2p)^2)/n. Using asymptotic normality and efficiency of MLEs, we derived the following asymptotic 95% confidence interval: τ ̂(p)±1.96√((CRLB,) ̂ ) with plug-in estimators to estimate the CRLB.

For each n∈{10,20,40,80,160,320,640,1280} and p∈{0.1,0.2,0.3,0.4,0.5}, simulate 10,000 realizations of τ ̂(p) and 95% CIs for τ(p). Then: Plot the simulated densities of τ ̂(p) faceted by each {n,p} combination (hint: free up the scales). How does the size of p impact how quickly asymptotic normality “kicks in” for τ ̂? Are there any values of p for which normality never kicks in? Plot the 95% CI coverage of τ(p) as a function of n for each p. Make sure to add a horizontal reference line at 95%. How can your results of a) be used to explain your results? Note that this study illustrates one of the regularity conditions necessary to ensure asymptotic normality of the MLE, essentially, that the MLE is not likely to occur on the boundary of the parameter space. What is the parameter space of τ(p) (that is, what are the possible values τ(p) can take on?)

library(tidyverse)
library(purrrfect)

(many_sims <- parameters(~n, ~p,
                         c(10,20,40,80,160,320,640,1280),
                         c(0.1,0.2,0.3,0.4,0.5))
  %>% add_trials(10000)
  %>% mutate(Ysample = pmap(list(n,p),
                           .f = \(n,p) rbinom(n, size = 1, prob = p)))
  %>% mutate(phat = map_dbl(Ysample, mean),
             tau_hat = phat*(1 - phat),
             var_hat = (phat*(1-phat)*(1-2*phat)^2)/n,
             lcl = tau_hat - 1.96*sqrt(var_hat),
             ucl = tau_hat + 1.96*sqrt(var_hat),
             tau_true = p*(1-p),
             covers = ifelse(lcl < tau_true & ucl > tau_true, 1, 0)
  )
) %>% head

# A tibble: 6 × 11
      n     p .trial Ysample  phat tau_hat var_hat    lcl   ucl tau_true covers
  <dbl> <dbl>  <dbl> <list>  <dbl>   <dbl>   <dbl>  <dbl> <dbl>    <dbl>  <dbl>
1    10   0.1      1 <int>     0      0    0       0      0         0.09      0
2    10   0.1      2 <int>     0      0    0       0      0         0.09      0
3    10   0.1      3 <int>     0      0    0       0      0         0.09      0
4    10   0.1      4 <int>     0.2    0.16 0.00576 0.0112 0.309     0.09      1
5    10   0.1      5 <int>     0.2    0.16 0.00576 0.0112 0.309     0.09      1
6    10   0.1      6 <int>     0      0    0       0      0         0.09      0

ggplot(data = many_sims) + 
  geom_density(aes(x = tau_hat)) + 
  theme_classic() + 
  xlab(expression(hat(tau))) +
  facet_grid(n ~ p, scales = 'free', labeller = label_both) + 
  theme_classic(base_size = 10) +
theme(
  strip.text = element_text(size = 8),
  axis.text = element_text(size = 7)
)

The size of p imapcts how quickly we converge to normality as at 1 it takes lomger and .2-.4 we converge quicker and at .5 we have a maxed tau which leads our graph to go nonnormal.

(coverage <- many_sims %>% 
  summarize(coverage = mean(covers),
            .by = c(n,p))
)

# A tibble: 40 × 3
       n     p coverage
   <dbl> <dbl>    <dbl>
 1    10   0.1    0.576
 2    10   0.2    0.772
 3    10   0.3    0.869
 4    10   0.4    0.763
 5    10   0.5    0.734
 6    20   0.1    0.831
 7    20   0.2    0.847
 8    20   0.3    0.858
 9    20   0.4    0.865
10    20   0.5    0.816
# ℹ 30 more rows

ggplot(data = coverage) + 
  geom_line(aes(x = n, y = coverage)) + 
  theme_classic(base_size = 18) + 
  labs(y = 'Coverage') + 
  geom_hline(aes(yintercept = 0.95), linetype = 2) +
  facet_wrap(~p, labeller = label_both)

Our results from a can be useful here as we can see that .5 doesn’t converge and that .2-.4 converge quicker than .1 which is what we would expect to happen based on part a.

The possible values tau can take on is 0 to .25 and thats why at .5 when we reach our parameter boundary we have issues.