Question 6

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

(Question6 <- parameters(~x, ~y,
                         c(0:4), c(0:2))
  %>% mutate('P(x,y)' = dpois(x,2) * dbinom(y,x,0.5))
)

# A tibble: 15 × 3
       x     y `P(x,y)`
   <int> <int>    <dbl>
 1     0     0  0.135  
 2     0     1  0      
 3     0     2  0      
 4     1     0  0.135  
 5     1     1  0.135  
 6     1     2  0      
 7     2     0  0.0677 
 8     2     1  0.135  
 9     2     2  0.0677 
10     3     0  0.0226 
11     3     1  0.0677 
12     3     2  0.0677 
13     4     0  0.00564
14     4     1  0.0226 
15     4     2  0.0338

sum(Question6$'P(x,y)')

[1] 0.8965963

Question 7

library(tidyverse)
library(purrrfect)

N <- 10000

coin <- c('H', 'T')
one_omega <- \() sample(coin, size = 3, replace = TRUE)
(many_omegas <- replicate(N, one_omega(), .as = flips)
  %>% mutate(X = map_int(flips, \(x) sum(x=='H')))
  %>% mutate(Y = map_int(flips, \(x) {
    if (!"H" %in% x) {
      -1
    } else if (x[1] == 'H') {
      1
    } else if (x[2] == 'H') {
      2
    } else {
      3
    }
    }))
)

# A tibble: 10,000 × 4
   .trial flips         X     Y
    <dbl> <list>    <int> <int>
 1      1 <chr [3]>     1     1
 2      2 <chr [3]>     2     1
 3      3 <chr [3]>     3     1
 4      4 <chr [3]>     2     1
 5      5 <chr [3]>     2     1
 6      6 <chr [3]>     0    -1
 7      7 <chr [3]>     1     2
 8      8 <chr [3]>     0    -1
 9      9 <chr [3]>     2     1
10     10 <chr [3]>     1     3
# ℹ 9,990 more rows

  (many_omegas 
    %>% summarize(cnt = n(), 
               .by = c(X,Y))
    %>% mutate(joint_prob = cnt/N,
            prob_X_given_Y = cnt/sum(cnt), .by = Y)
    %>% pivot_wider(names_from = X,
                 values_from = prob_X_given_Y,
                 names_prefix = 'x=',
                 id_cols = Y)
  )

# A tibble: 4 × 5
      Y  `x=1`  `x=2`  `x=3` `x=0`
  <int>  <dbl>  <dbl>  <dbl> <dbl>
1     1  0.263  0.493  0.244    NA
2    -1 NA     NA     NA         1
3     2  0.503  0.497 NA        NA
4     3  1     NA     NA        NA

(ggplot(data = many_omegas)
+ geom_jitter(aes(x = X, y= Y), width = 0.1, height = 0.3, size = 0.4, alpha = 0.8)
+ scale_x_continuous(breaks = 0:3)
+ scale_y_continuous(breaks = -1:3)
+ theme_classic(base_size = 20)
)

(many_omegas_update <- many_omegas
 %>% summarize(cnt = n(),
               .by = c(X,Y))
 %>% mutate(joint_prob = cnt/N,
            prob_X_given_Y = cnt/sum(cnt), .by = Y)
 %>% filter(Y == 1)
 )

# A tibble: 3 × 5
      X     Y   cnt joint_prob prob_X_given_Y
  <int> <int> <int>      <dbl>          <dbl>
1     1     1  1301      0.130          0.263
2     2     1  2445      0.244          0.493
3     3     1  1210      0.121          0.244

#Question 8

library(tidyverse)
library(purrrfect)

N <- 10000

executives <- rep(c('Married', 'Never Married', 'Divorced'), times = c(4,3,2))
one_sample <- \() sample(executives, size = 3, replace = FALSE)
(many_samples <- replicate(N, one_sample(), .as = employees)
  %>% mutate(X = map_int(employees, \(x) sum(x == 'Married')))
  %>% mutate(Y = map_int(employees, \(x) sum(x == 'Never Married')))
)

# A tibble: 10,000 × 4
   .trial employees     X     Y
    <dbl> <list>    <int> <int>
 1      1 <chr [3]>     2     1
 2      2 <chr [3]>     2     1
 3      3 <chr [3]>     0     1
 4      4 <chr [3]>     1     1
 5      5 <chr [3]>     2     1
 6      6 <chr [3]>     1     1
 7      7 <chr [3]>     3     0
 8      8 <chr [3]>     1     2
 9      9 <chr [3]>     1     2
10     10 <chr [3]>     2     1
# ℹ 9,990 more rows

(many_sample_updated <- many_samples
  %>% summarize(cnt = n(), 
               .by = c(X,Y))
  %>% mutate(joint_prob = cnt/N,
            prob_X_given_Y = cnt/sum(cnt), .by = Y)
  %>% pivot_wider(names_from = X,
                 values_from = prob_X_given_Y,
                 names_prefix = 'x=',
                 id_cols = Y)
  )

# A tibble: 4 × 5
      Y  `x=2`   `x=0`  `x=1`  `x=3`
  <int>  <dbl>   <dbl>  <dbl>  <dbl>
1     1  0.399  0.0684  0.533 NA    
2     0  0.603 NA       0.194  0.202
3     2 NA      0.350   0.650 NA    
4     3 NA      1      NA     NA

(ggplot(data = many_samples)
+ geom_jitter(aes(x = X, y= Y), width = 0.1, height = 0.3, size = 0.4, alpha = 0.8)
+ scale_x_continuous(breaks = 0:3)
+ scale_y_continuous(breaks = 0:3)
+ theme_classic(base_size = 20)
)

(many_samples
 %>% summarize(cnt = n(),
               .by = c(X,Y))
 %>% mutate(joint_prob = cnt/N,
            prob_X_given_Y = cnt/sum(cnt), .by = Y)
   %>% filter(Y == 2)
)

# A tibble: 2 × 5
      X     Y   cnt joint_prob prob_X_given_Y
  <int> <int> <int>      <dbl>          <dbl>
1     1     2  1358      0.136          0.650
2     0     2   730      0.073          0.350