Practice_Set_9.4

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.3.3
Warning: package 'tibble' was built under R version 4.3.3
Warning: package 'tidyr' was built under R version 4.3.3
Warning: package 'readr' was built under R version 4.3.3
Warning: package 'purrr' was built under R version 4.3.3
Warning: package 'dplyr' was built under R version 4.3.3
Warning: package 'stringr' was built under R version 4.3.3
Warning: package 'forcats' was built under R version 4.3.3
Warning: package 'lubridate' was built under R version 4.3.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

Practice Set 9.4

Problem S1

p9df <- (parameters(~n,~beta,c(20,40,80,160,320,640),c(.5,2,4)) %>%
           add_trials(10000) %>%
          mutate(Yn = pmap(list(n,beta),\(n,b) rexp(n,1/b)),St = map_dbl(beta,\(b) exp(-5/b)),Yhat = map_dbl(Yn,mean),LCL = exp(-5/Yhat)-1.96*5*(exp(-5/Yhat)/(Yhat*sqrt(n))), UCL = exp(-5/Yhat)+1.96*5*(exp(-5/Yhat)/(Yhat*sqrt(n))))
        %>% mutate(coverage = ifelse(LCL < St & St < UCL,1,0))
)
p9df
# A tibble: 180,000 × 9
       n  beta .trial Yn                St  Yhat        LCL       UCL coverage
   <dbl> <dbl>  <dbl> <list>         <dbl> <dbl>      <dbl>     <dbl>    <dbl>
 1    20   0.5      1 <dbl [20]> 0.0000454 0.518 -0.000207  0.000336         1
 2    20   0.5      2 <dbl [20]> 0.0000454 0.666 -0.00126   0.00235          1
 3    20   0.5      3 <dbl [20]> 0.0000454 0.395 -0.0000144 0.0000208        0
 4    20   0.5      4 <dbl [20]> 0.0000454 0.386 -0.0000110 0.0000156        0
 5    20   0.5      5 <dbl [20]> 0.0000454 0.665 -0.00125   0.00234          1
 6    20   0.5      6 <dbl [20]> 0.0000454 0.538 -0.000281  0.000464         1
 7    20   0.5      7 <dbl [20]> 0.0000454 0.473 -0.0000924 0.000143         1
 8    20   0.5      8 <dbl [20]> 0.0000454 0.685 -0.00148   0.00283          1
 9    20   0.5      9 <dbl [20]> 0.0000454 0.718 -0.00193   0.00382          1
10    20   0.5     10 <dbl [20]> 0.0000454 0.632 -0.000904  0.00164          1
# ℹ 179,990 more rows
p9dffin <- (p9df %>%
  summarize("Coverage" = mean(coverage), .by = c(n,beta)) 
)
ggplot(data = p9dffin) +
  geom_line(aes(x = n,y = Coverage)) +
  geom_hline(aes(yintercept = .95)) +
  facet_grid(~beta,labeller =label_both) +
  theme_classic()

As you can see in the plots above, the value of Beta does influence how fast the coverage kicks in, with higher beta values kicking in the 95% interval quicker.

Problem S2

loglik.alpha <- function(a,y) {
  n <- length(y)
  likel <- -n*log(gamma(a))-n*a*log(mean(y)/a) + (a-1)*sum(log(y))-a*n
  return(likel)
  
}
get.mle <- function(y) {
  a <- optimize(loglik.alpha,c(0,30),y = y,maximum = TRUE)$maximum
  b <- mean(y)/a
  return(c(a,b))
}
ps2df <- (parameters(~n,c(5,20,50,100,150,200))
          %>%add_trials(10000)
          %>%mutate(Yn = map(n,\(n) rgamma(n,4,1/2)),,AlphaMLE = map_dbl(Yn,\(y) get.mle(y)[1]),BetaMLE = map_dbl(Yn,\(y) get.mle(y)[2]))
  
  
)
ggplot(ps2df) +
  geom_histogram(aes(x= AlphaMLE),bins = 100) +
  geom_vline(aes(xintercept=4)) +
  facet_grid(~n,labeller = label_both) +
  theme_classic() +
  lims(x = c(0,15))
Warning: Removed 1401 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 12 rows containing missing values or values outside the scale range
(`geom_bar()`).

ggplot(ps2df) +
  geom_histogram(aes(x= BetaMLE),bins = 100) +
  geom_vline(aes(xintercept=2)) +
  facet_grid(~n,labeller = label_both) +
  theme_classic() +
  lims(x = c(0,5))
Warning: Removed 154 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 12 rows containing missing values or values outside the scale range
(`geom_bar()`).

Problem S3

pS3df <- (parameters(~n,~p,c(10,20,40,80,160,320,640,1280),c(.1,.2,.3,.4,.5)) %>%
           add_trials(1000) %>%
          mutate(Yn = pmap(list(n,p),\(n,p) rbernoulli(n,p)),Yhat = map_dbl(Yn,mean),staup = map_dbl(Yhat,\(x) x*(1-x)),taup = map_dbl(p,\(x) x*(1-x)),LCL = pmap_dbl(list(taup,Yhat,n), \(t,y,n) t - 1.96*sqrt((y*(1-y)*(1-2*y)^2)/n)), UCL = pmap_dbl(list(taup,Yhat,n), \(t,y,n) t + 1.96*sqrt((y*(1-y)*(1-2*y)^2)/n)))
        %>% mutate(coverage = ifelse(LCL < taup & taup < UCL,1,0))
)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Yn = pmap(list(n, p), function(n, p) rbernoulli(n, p))`.
Caused by warning:
! `rbernoulli()` was deprecated in purrr 1.0.0.
pS3df
# A tibble: 40,000 × 10
       n     p .trial Yn          Yhat staup  taup     LCL   UCL coverage
   <dbl> <dbl>  <dbl> <list>     <dbl> <dbl> <dbl>   <dbl> <dbl>    <dbl>
 1    10   0.1      1 <lgl [10]>   0.2  0.16  0.09 -0.0588 0.239        1
 2    10   0.1      2 <lgl [10]>   0.2  0.16  0.09 -0.0588 0.239        1
 3    10   0.1      3 <lgl [10]>   0.1  0.09  0.09 -0.0588 0.239        1
 4    10   0.1      4 <lgl [10]>   0.1  0.09  0.09 -0.0588 0.239        1
 5    10   0.1      5 <lgl [10]>   0    0     0.09  0.09   0.09         0
 6    10   0.1      6 <lgl [10]>   0.3  0.21  0.09 -0.0236 0.204        1
 7    10   0.1      7 <lgl [10]>   0.2  0.16  0.09 -0.0588 0.239        1
 8    10   0.1      8 <lgl [10]>   0.2  0.16  0.09 -0.0588 0.239        1
 9    10   0.1      9 <lgl [10]>   0.1  0.09  0.09 -0.0588 0.239        1
10    10   0.1     10 <lgl [10]>   0    0     0.09  0.09   0.09         0
# ℹ 39,990 more rows

A

(ggplot(pS3df) +
  geom_histogram(aes(x= staup),bins = 100,binwidth = .005) +
  facet_grid(~n~p,labeller = label_both,scales = "free") +
  theme_classic()
)

It looks like the higher the \(p\) the longer it takes for normality to kick in, with \(p= .5\) seeming to take the longest to become normal.

B

ps3dffin <- (pS3df %>%
  summarize("Coverage" = mean(coverage),.by = c(n,p))
)
ggplot(data = ps3dffin) +
  geom_line(aes(x = n,y = Coverage)) +
  geom_hline(aes(yintercept = .95)) +
  facet_grid(~p,labeller =label_both) +
  theme_classic()

As shown above by the plots, asymptotic coverage kicks in very quickly when \(p\) is \(1\) through \(4\) , but at \(p=5\) it takes a little longer for the coverage to kick in. This supports what the above charts show as \(p=5\) was the only one that had issues with normality, and the same thing is true here.

C

The parameter space of \(\tau(p) = p(1-p)\) is \([0,.25]\) as setting \(\tau'(p) = 1- 2p\) equal to zero, we get a maximum at \(p=.5\), and \(\tau(.5) = .5(1-.5)=.25\), and \(\tau(p)\) cannot be negative as \(p \in [0,1]\) .