Practice Set 7.7

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.3.3

Warning: package 'tibble' was built under R version 4.3.3

Warning: package 'tidyr' was built under R version 4.3.3

Warning: package 'readr' was built under R version 4.3.3

Warning: package 'purrr' was built under R version 4.3.3

Warning: package 'dplyr' was built under R version 4.3.3

Warning: package 'stringr' was built under R version 4.3.3

Warning: package 'forcats' was built under R version 4.3.3

Warning: package 'lubridate' was built under R version 4.3.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

Practice Set 7.7

q5df <- (parameters(~n,c(5,10,20,40,80,160))
  %>%add_trials(10000)
  %>%mutate(Y1_n= map(n,\(x) runif(x,0,8)),Y1_hat = map_dbl(Y1_n,\(x) mean(x)))
   %>%mutate(Y2_n= map(n,\(x) rgamma(x,2,1/2)),Y2_hat = map_dbl(Y2_n,\(x) mean(x)))
   %>%mutate(Y3_n= map(n,\(x) rpois(x,4)),Y3_hat = map_dbl(Y3_n,\(x) mean(x)))
  %>%mutate(AY1_hat = pmap_dbl(list(Y1_hat,n),\(y,n) dnorm(y,4,(64/(12*n))^(1/2))),AY2_hat = pmap_dbl(list(Y2_hat,n),\(y,n) dnorm(y,4,(8/n)^(1/2))),AY3_hat = pmap_dbl(list(Y3_hat,n),\(y,n) dnorm(y,4,(4/n)^(1/2))))
  %>%mutate(FY1_hat = cume_dist(Y1_hat),FY2_hat = cume_dist(Y2_hat),FY3_hat = cume_dist(Y3_hat),.by = n)
  %>%mutate(FAY1_hat = pmap_dbl(list(Y1_hat,n),\(y,n) pnorm(y,4,(64/(12*n))^(1/2))),FAY2_hat = pmap_dbl(list(Y2_hat,n),\(y,n) pnorm(y,4,(8/n)^(1/2))),FAY3_hat = pmap_dbl(list(Y3_hat,n),\(y,n) pnorm(y,4,(4/n)^(1/2))))
)
q5df

# A tibble: 60,000 × 17
       n .trial Y1_n   Y1_hat Y2_n   Y2_hat Y3_n  Y3_hat AY1_hat AY2_hat AY3_hat
   <dbl>  <dbl> <list>  <dbl> <list>  <dbl> <lis>  <dbl>   <dbl>   <dbl>   <dbl>
 1     5      1 <dbl>    5.68 <dbl>    5.13 <int>    3.2  0.103   0.211   0.299 
 2     5      2 <dbl>    3.29 <dbl>    2.01 <int>    4    0.304   0.0910  0.446 
 3     5      3 <dbl>    3.47 <dbl>    3.01 <int>    5.4  0.338   0.232   0.131 
 4     5      4 <dbl>    3.15 <dbl>    6.07 <int>    5.8  0.275   0.0831  0.0589
 5     5      5 <dbl>    2.97 <dbl>    1.78 <int>    3.6  0.235   0.0675  0.404 
 6     5      6 <dbl>    1.81 <dbl>    3.26 <int>    4    0.0404  0.266   0.446 
 7     5      7 <dbl>    4.17 <dbl>    3.64 <int>    4.2  0.381   0.303   0.435 
 8     5      8 <dbl>    5.77 <dbl>    6.02 <int>    3.4  0.0890  0.0880  0.356 
 9     5      9 <dbl>    4.69 <dbl>    5.15 <int>    4.6  0.309   0.209   0.356 
10     5     10 <dbl>    4.60 <dbl>    4.12 <int>    3.4  0.326   0.314   0.356 
# ℹ 59,990 more rows
# ℹ 6 more variables: FY1_hat <dbl>, FY2_hat <dbl>, FY3_hat <dbl>,
#   FAY1_hat <dbl>, FAY2_hat <dbl>, FAY3_hat <dbl>

(ggplot(data = q5df)
+ geom_histogram(aes(x = Y1_hat,y = after_stat(density)), fill = "gold",binwidth = .05,center = .01)
+ geom_line(aes(x=Y1_hat,y=AY1_hat),color = "blue")
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Uniform PDF")
)

(ggplot(data = q5df)
+ geom_histogram(aes(x = Y2_hat,y = after_stat(density)), fill = "gold",binwidth = .05,center = .01)
+ geom_line(aes(x=Y2_hat,y=AY2_hat),color = "blue")
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Gamma PDF")
)

(ggplot(data = q5df)
+ geom_histogram(aes(x = Y3_hat,y = after_stat(density)), fill = "gold",binwidth = .05,center = .01)
+ geom_line(aes(x=Y3_hat,y=AY3_hat),color = "blue")
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Poisson PMF")
)

(ggplot(data = q5df)
+ geom_line(aes(x=Y1_hat,y=FY1_hat,color = "Simulated"))
+ geom_line(aes(x=Y1_hat,y=FAY1_hat,color = "Analytic"))
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Uniform CDF")
)

(ggplot(data = q5df)
+ geom_line(aes(x=Y2_hat,y=FY2_hat,color = "Simulated"))
+ geom_line(aes(x=Y2_hat,y=FAY2_hat,color = "Analytic"))
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Gamma CDF")
)

(ggplot(data = q5df)
+ geom_step(aes(x=Y3_hat,y=FY3_hat,color = "Simulated"))
+ geom_step(aes(x=Y3_hat,y=FAY3_hat,color = "Analytic"))
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
+ labs(title = "Poisson CMF")
)

As shown in the above plots, all three distributions of sample means approach a normal distribution, but some do it faster than others. It looks as if the Uniform distribution is normal the quickest, with it approximately being normal at \(n=5\) , and the Gamma distribution is around the same, but still has a couple of peaks at \(n=5\) and \(n=10\) but for the most part is still normal. The Poisson Distribution on the other hand appears to have issues at \(n=5\) and \(n=10\) , but settles down more so around \(n=20\). This could also be in part due to the fact that Poisson is the only distribution out of the three that is discrete. It appears that regardless of population, at \(n=20\) all distributions are very close to the normal distribution.