Problem Set 5.1 - Questions 6C, 7 & 8

Question 6C:

# F(3,4)
ppois(3,4)
[1] 0.4334701
# F(4,2)
sum(dpois(0:4, lambda = 2) * pbinom(2, size = 0:4, prob = 0.5))
[1] 0.8965963

Question 7:

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate
#Q7 - setup
set.seed(123)
coin <- c('H','T')

one_game <- \() sample(coin, size = 3, replace = TRUE)
one_game()
[1] "H" "H" "H"
N <- 10000
(many_games <- replicate(N, one_game(), .as = flips)
  %>% mutate(
    X = map_int(flips, \(f) sum(f == 'H')),
    Y = map_int(flips, \(f) case_when(
      f[1] == 'H' ~ 1L,
      f[2] == 'H' ~ 2L,
      f[3] == 'H' ~ 3L,
      TRUE ~ -1L
    ))
  )
) %>% head
# A tibble: 6 × 4
  .trial flips         X     Y
   <dbl> <list>    <int> <int>
1      1 <chr [3]>     1     2
2      2 <chr [3]>     1     3
3      3 <chr [3]>     1     1
4      4 <chr [3]>     1     2
5      5 <chr [3]>     2     1
6      6 <chr [3]>     3     1

A) Joint PMF Table

(many_games
  %>% summarize(cnt = n(), .by = c(X, Y))
  %>% mutate(prob = cnt / N)
  %>% arrange(X, Y)
  %>% select(X, Y, prob)
  %>% pivot_wider(names_from = Y, values_from = prob, values_fill = 0)
)
# A tibble: 4 × 5
      X  `-1`   `1`   `2`   `3`
  <int> <dbl> <dbl> <dbl> <dbl>
1     0 0.124 0     0     0    
2     1 0     0.124 0.125 0.120
3     2 0     0.254 0.128 0    
4     3 0     0.125 0     0    

B) Plot of Jittered (X,Y) pairs

(many_games
  %>% summarize(cnt = n(),
                .by = c(X,Y))
  %>% mutate(joint_prob = cnt/N,
             prob_X_given_Y = cnt/sum(cnt), .by = Y)
)
# A tibble: 7 × 5
      X     Y   cnt joint_prob prob_X_given_Y
  <int> <int> <int>      <dbl>          <dbl>
1     1     2  1248      0.125          0.494
2     1     3  1197      0.120          1    
3     1     1  1244      0.124          0.247
4     2     1  2544      0.254          0.505
5     3     1  1247      0.125          0.248
6     2     2  1277      0.128          0.506
7     0    -1  1243      0.124          1    
ggplot(data = many_games) +
  geom_jitter(aes(x = X, y = Y), width=.1, height=.3, size = .2, alpha = .8) +
  scale_x_continuous(breaks = 0:3) +
  scale_y_continuous(breaks = c(-1,1,2,3)) +
  theme_classic(base_size = 20)

C) Find sim Px|y (X | 1)

(many_games
  %>% filter(Y == 1)
  %>% summarize(cnt = n(), .by = X)
  %>% mutate(prob_X_given_Y1 = cnt / sum(cnt))
)
# A tibble: 3 × 3
      X   cnt prob_X_given_Y1
  <int> <int>           <dbl>
1     1  1244           0.247
2     2  2544           0.505
3     3  1247           0.248
#matches my answer

Question 8:

executives <- c(rep('M', 4), rep('N', 3), rep('D', 2))

one_selection <- \() sample(executives, size = 3, replace = FALSE)
one_selection()
[1] "N" "M" "D"
N <- 10000
(many_selections <- replicate(N, one_selection(), .as = selected)
  %>% mutate(
    X = map_int(selected, \(s) sum(s == 'M')),
    Y = map_int(selected, \(s) sum(s == 'N'))
  )
) %>% head
# A tibble: 6 × 4
  .trial selected      X     Y
   <dbl> <list>    <int> <int>
1      1 <chr [3]>     1     2
2      2 <chr [3]>     0     2
3      3 <chr [3]>     0     2
4      4 <chr [3]>     2     1
5      5 <chr [3]>     1     1
6      6 <chr [3]>     2     0

A) Joint pmf Table

(many_selections
  %>% summarize(cnt = n(), .by = c(X, Y))
  %>% mutate(prob = cnt / N)
  %>% arrange(X, Y)
  %>% select(X, Y, prob)
  %>% pivot_wider(names_from = Y, values_from = prob, values_fill = 0)
)
# A tibble: 4 × 5
      X    `1`    `2`    `3`    `0`
  <int>  <dbl>  <dbl>  <dbl>  <dbl>
1     0 0.0326 0.0757 0.0122 0     
2     1 0.279  0.142  0      0.0472
3     2 0.223  0      0      0.139 
4     3 0      0      0      0.05  

B) Plot of Jittered (X,Y) pairs

(many_selections
  %>% summarize(cnt = n(),
                .by = c(X, Y))
  %>% mutate(joint_prob = cnt/N,
             prob_X_given_Y = cnt/sum(cnt), .by = Y)
)
# A tibble: 9 × 5
      X     Y   cnt joint_prob prob_X_given_Y
  <int> <int> <int>      <dbl>          <dbl>
1     1     2  1418     0.142          0.652 
2     0     2   757     0.0757         0.348 
3     2     1  2228     0.223          0.417 
4     1     1  2787     0.279          0.522 
5     2     0  1390     0.139          0.588 
6     1     0   472     0.0472         0.200 
7     3     0   500     0.05           0.212 
8     0     1   326     0.0326         0.0610
9     0     3   122     0.0122         1     
ggplot(data = many_selections) +
  geom_jitter(aes(x = X, y = Y), width=.1, height=.3, size = .2, alpha = .8) +
  scale_x_continuous(breaks = 0:3) +
  scale_y_continuous(breaks = 0:3) +
  theme_classic(base_size = 20)

C) Find sim Px|y (X | 2)

(many_selections
  %>% filter(Y == 2)
  %>% summarize(cnt = n(), .by = X)
  %>% mutate(prob_X_given_Y2 = cnt / sum(cnt))
)
# A tibble: 2 × 3
      X   cnt prob_X_given_Y2
  <int> <int>           <dbl>
1     1  1418           0.652
2     0   757           0.348