PS 7.1

Question 3

pnorm(20000, mean= 19400, sd= 1000, lower.tail = FALSE)
[1] 0.2742531

Question 5

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate
(sum_of_rolls <- parameters(~n, c(20, 40, 80, 160)) %>% 
  add_trials(10000) %>% 
  mutate(y_sample = map(n, .f = \(n) sample(x = 1:6, size = n, replace = TRUE))) %>% 
  mutate(Sn = map_dbl(y_sample, sum))
)
# A tibble: 40,000 × 4
       n .trial y_sample      Sn
   <dbl>  <dbl> <list>     <dbl>
 1    20      1 <int [20]>    53
 2    20      2 <int [20]>    78
 3    20      3 <int [20]>    78
 4    20      4 <int [20]>    81
 5    20      5 <int [20]>    72
 6    20      6 <int [20]>    64
 7    20      7 <int [20]>    75
 8    20      8 <int [20]>    70
 9    20      9 <int [20]>    88
10    20     10 <int [20]>    71
# ℹ 39,990 more rows
# Graph Ybar for each n
ggplot(data = sum_of_rolls) +
  geom_histogram(aes(x = Sn, y = after_stat(density)),
                 fill = 'goldenrod', color = 'black', bins = 30) +
  facet_wrap(~n, labeller = label_both, scales = 'free') +
  theme_classic(base_size = 16) +
  labs(title = "Sampling Distributions of Sn",
       x = "Sn",
       y = "Density")

# Summarize over each n
(stats <- sum_of_rolls %>% 
  group_by(n) %>% 
  summarize(
    E_Ybar_Sim = mean(Sn / n),
    Var_Ybar_Sim = var(Sn / n),       
    Var_Ybar_Analytic = 35 / (12 * n) 
  ) %>%
  distinct(n, .keep_all = TRUE)
  )
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
dplyr 1.1.0.
ℹ Please use `reframe()` instead.
ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
  always returns an ungrouped data frame and adjust accordingly.
`summarise()` has grouped output by 'n'. You can override using the `.groups`
argument.
# A tibble: 4 × 4
# Groups:   n [4]
      n E_Ybar_Sim Var_Ybar_Sim Var_Ybar_Analytic
  <dbl>      <dbl>        <dbl>             <dbl>
1    20       3.50       0.147             0.146 
2    40       3.50       0.0725            0.0729
3    80       3.50       0.0372            0.0365
4   160       3.50       0.0178            0.0182

We can use map() because only n is varying. pmap is for multiple lists, map is for a single list.

Plots: The plots show that the higher the n, the lower the variance gets. Which makes sense since Var is calculated with n in the denominator, so n increases Var decreases.

E(Ybar) and Var(Ybar) well approximate the theoretical values.

Question 6

(sum_poi <- parameters(~n, ~lambda,
                           c(2,5,10), c(0.5,1.0,1.5)
                           ) %>% 
  add_trials(10000) %>% 
  mutate(y_sample = pmap(list(n,lambda), .f = \(n, l)rpois(n,lambda = l))
  ) %>% 
  mutate(u = map_dbl(y_sample, .f = sum)) %>% 
  mutate(f_U = ppois(u, lambda = n*lambda))
)
# A tibble: 90,000 × 6
       n lambda .trial y_sample      u   f_U
   <dbl>  <dbl>  <dbl> <list>    <dbl> <dbl>
 1     2    0.5      1 <int [2]>     1 0.736
 2     2    0.5      2 <int [2]>     1 0.736
 3     2    0.5      3 <int [2]>     1 0.736
 4     2    0.5      4 <int [2]>     1 0.736
 5     2    0.5      5 <int [2]>     0 0.368
 6     2    0.5      6 <int [2]>     2 0.920
 7     2    0.5      7 <int [2]>     1 0.736
 8     2    0.5      8 <int [2]>     0 0.368
 9     2    0.5      9 <int [2]>     2 0.920
10     2    0.5     10 <int [2]>     2 0.920
# ℹ 89,990 more rows
# Sim vs analytic CDF for each
ggplot(data = sum_poi) +
  stat_ecdf(aes(x = u), col = 'blue', linewidth = 1) +
  geom_step(aes(x = u, y = f_U), col = 'brown', linetype = "dashed") +
  facet_grid(n ~ lambda, labeller = label_both) +
  theme_classic(base_size = 16) +
  labs(y = "Cumulative Probability", x = "u")