7.1

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.4.3

Warning: package 'tidyr' was built under R version 4.4.3

Warning: package 'readr' was built under R version 4.4.3

Warning: package 'purrr' was built under R version 4.4.3

Warning: package 'dplyr' was built under R version 4.4.3

Warning: package 'stringr' was built under R version 4.4.3

Warning: package 'forcats' was built under R version 4.4.3

Warning: package 'lubridate' was built under R version 4.4.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

roll_dice_mean <- function(n){
  mean(sample(1:6, size = n, replace = TRUE))
}

(dice_means <- parameters(~n, 
                          c(20,40,80,160)
                          )
  %>% add_trials(10000) 
   %>% mutate(y_sample = map(n, ~ sample(1:6, size = .x, replace = TRUE)))
  %>% mutate(f_ybar = map_dbl(y_sample, mean))
  )

# A tibble: 40,000 × 4
       n .trial y_sample   f_ybar
   <dbl>  <dbl> <list>      <dbl>
 1    20      1 <int [20]>   3.65
 2    20      2 <int [20]>   3.15
 3    20      3 <int [20]>   3.05
 4    20      4 <int [20]>   2.8 
 5    20      5 <int [20]>   3.4 
 6    20      6 <int [20]>   3.5 
 7    20      7 <int [20]>   2.9 
 8    20      8 <int [20]>   3   
 9    20      9 <int [20]>   3.6 
10    20     10 <int [20]>   3.4 
# ℹ 39,990 more rows

ggplot(data = dice_means)+
  geom_histogram(aes(x = y_sample, y = after_stat(density)),
            fill = "goldenrod",
            center = 0.1, binwidth = 0.05)+
  geom_line(
  stat = "function",
  aes(y = after_stat(dnorm(x, mean = 3.5, sd = sqrt(35 / (12 * n))))),
  color = "cornflowerblue",
  linewidth = 1
)+
  facet_grid(n ~ ., labeller = label_both)+
  theme_classic(base_size = 16)+
  labs(
    title = " Y bar and n",
    x = expression(bar(Y)),
    y = "Density"
  )

Warning: Computation failed in `stat_bin()`.
Caused by error in `scales[[x]]$dimension()`:
! attempt to apply non-function

Warning: Computation failed in `stat_bin()`.
Computation failed in `stat_bin()`.
Computation failed in `stat_bin()`.
Caused by error in `scales[[x]]$dimension()`:
! attempt to apply non-function

Warning: Computation failed in `stat_function()`.
Computation failed in `stat_function()`.
Computation failed in `stat_function()`.
Computation failed in `stat_function()`.
Caused by error in `compute_group()`:
! argument "fun" is missing, with no default

# Each y hat and n have different centers and variabilty based on the differnt points from the n and y hat vaules. 
  dice_means %>%
  group_by(n) %>%
  summarise(
    E_Ybar_hat = mean(f_ybar),
    Var_Ybar_hat = var(f_ybar),
    E_Ybar_theory = 3.5,
    Var_Ybar_theory = 35 / (12 * first(n))
  )

# A tibble: 4 × 5
      n E_Ybar_hat Var_Ybar_hat E_Ybar_theory Var_Ybar_theory
  <dbl>      <dbl>        <dbl>         <dbl>           <dbl>
1    20       3.49       0.146            3.5          0.146 
2    40       3.50       0.0728           3.5          0.0729
3    80       3.50       0.0367           3.5          0.0365
4   160       3.50       0.0178           3.5          0.0182

You can add options to executable code like this

library(tidyverse)
library(purrrfect)

(sum_of_POI <- parameters(~n, ~lambda,
                          c(2,5,10), c(0.5, 1.0, 1.5)                      )
  %>% add_trials(10000)
  %>% mutate(y_sample = pmap(list(n, lambda), .f= \(n, l)rpois(n, lambda)))
  %>% mutate(u = map_dbl(y_sample, .f = sum))
  %>% mutate(f_U = dpois(n, lambda, log = FALSE))
  )

# A tibble: 90,000 × 6
       n lambda .trial y_sample      u    f_U
   <dbl>  <dbl>  <dbl> <list>    <dbl>  <dbl>
 1     2    0.5      1 <int [2]>     3 0.0758
 2     2    0.5      2 <int [2]>     2 0.0758
 3     2    0.5      3 <int [2]>     2 0.0758
 4     2    0.5      4 <int [2]>     2 0.0758
 5     2    0.5      5 <int [2]>     1 0.0758
 6     2    0.5      6 <int [2]>     0 0.0758
 7     2    0.5      7 <int [2]>     0 0.0758
 8     2    0.5      8 <int [2]>     1 0.0758
 9     2    0.5      9 <int [2]>     0 0.0758
10     2    0.5     10 <int [2]>     2 0.0758
# ℹ 89,990 more rows

ggplot(data = sum_of_POI)+
  geom_histogram(aes(x = u, y = after_stat(density)),
fill = 'goldenrod',
center = 0.1, binwidh = .2)+
    geom_line(aes(x = u, y = f_U), col='cornflowerblue')+
    facet_grid(n~lambda, labeller = label_both)+
    theme_classic(base_size = 16)+
    labs(title='Simulated and analytic densities for sum of Poision')

Warning in geom_histogram(aes(x = u, y = after_stat(density)), fill =
"goldenrod", : Ignoring unknown parameters: `binwidh`

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.