5.1 bivariate discrete

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

##Question 6b

Question 7

using question 1 ..

library(tidyverse)
library(purrrfect)

set.seed(123)
coin <- c("H", "T")

one_round <- \() sample(coin, size = 3, replace = TRUE)
one_round()

[1] "H" "H" "H"

N <- 10000
(many_rounds <- replicate(N, one_round(), .as = flips)
  %>% mutate(X = map_int(flips, \(f) sum(f == 'H')),
             Y = map_int(flips, \(f) case_when(
               f[1] == 'H'~ 1L,
               f[2] == 'H'~ 2L,
               f[3] == 'H'~ 3L,
               TRUE ~ -1L
             )))) %>% head

# A tibble: 6 × 4
  .trial flips         X     Y
   <dbl> <list>    <int> <int>
1      1 <chr [3]>     1     2
2      2 <chr [3]>     1     3
3      3 <chr [3]>     1     1
4      4 <chr [3]>     1     2
5      5 <chr [3]>     2     1
6      6 <chr [3]>     3     1

#7A
(many_rounds
  %>% summarize(cnt = n(), .by = c(X,Y))
  %>% mutate(prob = cnt / N)
  %>% select(X, Y, prob)
  %>% pivot_wider(names_from = Y, values_from = prob, values_fill = 0)) %>% head

# A tibble: 4 × 5
      X   `2`   `3`   `1`  `-1`
  <int> <dbl> <dbl> <dbl> <dbl>
1     1 0.125 0.120 0.124 0    
2     2 0.128 0     0.254 0    
3     3 0     0     0.125 0    
4     0 0     0     0     0.124

#7B
(many_rounds %>% summarize(cnt = n(),
                           .by = c(X, Y))
  %>% mutate(joint_prob = cnt/N,
             probX_givenY = cnt/sum(cnt), .by = Y)) %>% head

# A tibble: 6 × 5
      X     Y   cnt joint_prob probX_givenY
  <int> <int> <int>      <dbl>        <dbl>
1     1     2  1248      0.125        0.494
2     1     3  1197      0.120        1    
3     1     1  1244      0.124        0.247
4     2     1  2544      0.254        0.505
5     3     1  1247      0.125        0.248
6     2     2  1277      0.128        0.506

ggplot(data = many_rounds) + 
  geom_jitter(aes(x=X, y=Y), width=.1, height=.3, size =.2, alpha = .8) +
  scale_x_continuous(breaks = 0:3) +
  scale_y_continuous(breaks = c(-1, 1, 2, 3)) +
  theme_classic(base_size = 20)

#7C
(many_rounds 
%>% filter(Y==1)
%>% summarize(cnt=n(), .by = X)
%>% mutate(probX_given_Y1 = cnt / sum(cnt)))

# A tibble: 3 × 3
      X   cnt probX_given_Y1
  <int> <int>          <dbl>
1     1  1244          0.247
2     2  2544          0.505
3     3  1247          0.248

Question 8

execs <- c(rep('M', 4), rep('N', 3), rep('D', 2))

one_group <- \() sample(execs, size = 3, replace= FALSE)
one_group()

[1] "N" "M" "D"

N <- 10000
(many_groups <- replicate(N, one_group(), .as = selected)
  %>% mutate(X = map_int(selected, \(s) sum(s == 'M')),
             Y = map_int(selected, \(s) sum(s == 'N')))
)

# A tibble: 10,000 × 4
   .trial selected      X     Y
    <dbl> <list>    <int> <int>
 1      1 <chr [3]>     1     2
 2      2 <chr [3]>     0     2
 3      3 <chr [3]>     0     2
 4      4 <chr [3]>     2     1
 5      5 <chr [3]>     1     1
 6      6 <chr [3]>     2     0
 7      7 <chr [3]>     2     0
 8      8 <chr [3]>     1     2
 9      9 <chr [3]>     1     0
10     10 <chr [3]>     1     1
# ℹ 9,990 more rows

#8A
(many_groups
  %>% summarize(cnt = n(), .by = c(X,Y))
  %>% mutate(prob = cnt / N)
  %>% select(X, Y, prob)
  %>% pivot_wider(names_from = Y, values_from = prob, values_fill = 0))

# A tibble: 4 × 5
      X    `2`    `1`    `0`    `3`
  <int>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 0.142  0.279  0.0472 0     
2     0 0.0757 0.0326 0      0.0122
3     2 0      0.223  0.139  0     
4     3 0      0      0.05   0

#8B
N <- 10000

many_groups %>% 
  summarize(cnt = n(), .by = c("X", "Y")) %>% 
  mutate(jointprob = cnt / N,
         probX_givenY = cnt / sum(cnt), .by = "Y")

# A tibble: 9 × 5
      X     Y   cnt jointprob probX_givenY
  <int> <int> <int>     <dbl>        <dbl>
1     1     2  1418    0.142        0.652 
2     0     2   757    0.0757       0.348 
3     2     1  2228    0.223        0.417 
4     1     1  2787    0.279        0.522 
5     2     0  1390    0.139        0.588 
6     1     0   472    0.0472       0.200 
7     3     0   500    0.05         0.212 
8     0     1   326    0.0326       0.0610
9     0     3   122    0.0122       1

ggplot(data = many_groups) + 
  geom_jitter(aes(x=X, y=Y), width=.1, height=.3, size =.2, alpha = .8) +
  scale_x_continuous(breaks = 0:3) +
  scale_y_continuous(breaks = c(-1, 1, 2, 3)) +
  theme_classic(base_size = 20)

#8C
(many_groups
  %>% filter(Y==2)
%>% summarize(cnt=n(), .by = X)
%>% mutate(probX_given_Y2 = cnt / sum(cnt)))

# A tibble: 2 × 3
      X   cnt probX_given_Y2
  <int> <int>          <dbl>
1     1  1418          0.652
2     0   757          0.348

#6
ppois(3, lambda=2)

[1] 0.8571235

x <- 0:4
y <- 0:4
sum(dpois(x, 2) * dbinom(y, x, prob = 0.5))

[1] 0.3665331