7.1 Practice Set

library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.3.3
Warning: package 'tibble' was built under R version 4.3.3
Warning: package 'tidyr' was built under R version 4.3.3
Warning: package 'readr' was built under R version 4.3.3
Warning: package 'purrr' was built under R version 4.3.3
Warning: package 'dplyr' was built under R version 4.3.3
Warning: package 'stringr' was built under R version 4.3.3
Warning: package 'forcats' was built under R version 4.3.3
Warning: package 'lubridate' was built under R version 4.3.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

Practice Set 7.1

Problem 5

A

Creating Function and DataFrame

n_rolls <- \(n) {
  sample(1:6,n,replace=TRUE)
}

(p5df <- parameters(~n,c(20,40,80,160))
  %>%add_trials(10000)
  %>%mutate(Y = map(n,\(x) n_rolls(x)),Y_hat = pmap_dbl(list(Y,n),\(x,n) sum(x)/n))
)
# A tibble: 40,000 × 4
       n .trial Y          Y_hat
   <dbl>  <dbl> <list>     <dbl>
 1    20      1 <int [20]>  3.75
 2    20      2 <int [20]>  3.7 
 3    20      3 <int [20]>  3.25
 4    20      4 <int [20]>  3.9 
 5    20      5 <int [20]>  3.6 
 6    20      6 <int [20]>  2.95
 7    20      7 <int [20]>  3.85
 8    20      8 <int [20]>  3.7 
 9    20      9 <int [20]>  3.95
10    20     10 <int [20]>  2.8 
# ℹ 39,990 more rows

Plotting the Functions

(ggplot(data = p5df)
+ geom_histogram(aes(x = Y_hat,y = after_stat(density)), fill = "blue",binwidth = .05,center = .05)
+ facet_grid(~n,labeller =label_both,scales = "free")
+ theme_classic()
)

The center or mean of each distribution appears to be around \(3.5\) for all \(n\) , but the variance is much higher for lower \(n\)’s and decrease as \(n\) increases.

B

(p5df
  %>%summarize('E(Y_hat)' = mean(Y_hat), 'Var(Y_hat)' = var(Y_hat), .by = n)
  
)
# A tibble: 4 × 3
      n `E(Y_hat)` `Var(Y_hat)`
  <dbl>      <dbl>        <dbl>
1    20       3.50       0.145 
2    40       3.49       0.0739
3    80       3.50       0.0359
4   160       3.50       0.0185

Analytic mean \(E(\overline{Y}) = E(Y)=\frac{1}{6}(1+2+3+4+5+6) =\frac{7}{2} = 3.5\),

Analytic variance \(Var(\overline{Y}) = \frac{1}{n}\left(\frac{1}{6}(1+4+9+16+25+36)-\frac{49}{4})\right)=\frac{35}{12n}\)

Can see that all of the simulated expectations are around \(3.5\) , and for the variances we can look at each \(n\) .

\(\frac{35}{12(20)} = .14583\) , \(\frac{35}{12(40)} = .072916\) ,\(\frac{35}{12(80)} = .0364583\) \(\frac{35}{160)} = .018229\)

And we can see that the simulated variances are all very close to their analytic counterparts.

Problem 6

Creating the dataset

(poisum <- parameters(~n,~lambda,c(2,5,10),c(.5,1,1.5))
  %>%add_trials(10000)
  %>% mutate(samp = pmap(list(n,lambda),\(x,y) rpois(x,y)),sn = map_dbl(samp,\(x) sum(x)))
 %>%mutate(Fhat=cume_dist(sn),.by = c(n,lambda))
  %>%mutate(F = ppois(sn,n*lambda))
)
# A tibble: 90,000 × 7
       n lambda .trial samp         sn  Fhat     F
   <dbl>  <dbl>  <dbl> <list>    <dbl> <dbl> <dbl>
 1     2    0.5      1 <int [2]>     0 0.365 0.368
 2     2    0.5      2 <int [2]>     1 0.737 0.736
 3     2    0.5      3 <int [2]>     0 0.365 0.368
 4     2    0.5      4 <int [2]>     1 0.737 0.736
 5     2    0.5      5 <int [2]>     0 0.365 0.368
 6     2    0.5      6 <int [2]>     2 0.922 0.920
 7     2    0.5      7 <int [2]>     1 0.737 0.736
 8     2    0.5      8 <int [2]>     1 0.737 0.736
 9     2    0.5      9 <int [2]>     1 0.737 0.736
10     2    0.5     10 <int [2]>     2 0.922 0.920
# ℹ 89,990 more rows

Plotting Simulated versus Analytic CDF’s

(
ggplot(data = poisum, aes(x = sn)) +
geom_step(aes(y = F, col = 'Analytic CDF')) +
geom_step(aes(y = Fhat, col = 'Empirical CDF')) +
facet_grid(n~lambda, labeller = label_both, scale = 'free_y') +
theme_classic()
)