here’s the groundwork for 7 (simulating a trial and replicating 10000 times)!
Attaching package: 'purrrfect'
The following objects are masked from 'package:base':
replicate, tabulate
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
N <- 10000
(coin_sim <- replicate_int (N, 0 , .as = tmp) %>%
mutate (t1 = rbinom (n (), 1 , .5 ),
t2 = rbinom (n (), 1 , .5 ),
t3 = rbinom (n (), 1 , .5 ),
X = t1 + t2 + t3,
Y = if_else (t1== 1 , 1 L,
if_else (t2== 1 , 2 L,
if_else (t3== 1 , 3 L, - 1 L)))) %>%
select (.trial, X, Y))
# A tibble: 10,000 × 3
.trial X Y
<dbl> <int> <int>
1 1 1 3
2 2 2 1
3 3 2 1
4 4 1 1
5 5 1 2
6 6 0 -1
7 7 1 2
8 8 1 2
9 9 3 1
10 10 1 2
# ℹ 9,990 more rows
7a.
(coin_joint <- coin_sim %>%
summarize (cnt = n (), .by = c (X, Y)) %>%
mutate (p_XY = cnt / N) %>%
arrange (X, Y) %>%
pivot_wider (names_from = Y, values_from = p_XY,
names_prefix = "Y=" , id_cols = X))
# A tibble: 4 × 5
X `Y=-1` `Y=1` `Y=2` `Y=3`
<int> <dbl> <dbl> <dbl> <dbl>
1 0 0.127 NA NA NA
2 1 NA 0.126 0.122 0.122
3 2 NA 0.251 0.129 NA
4 3 NA 0.123 NA NA
7b.
ggplot (coin_sim) +
geom_jitter (aes (x = X, y = Y), width = .1 , height = .2 , size = .4 , alpha = .8 ) +
scale_x_continuous (breaks = 0 : 3 ) +
scale_y_continuous (breaks = c (- 1 ,1 ,2 ,3 )) +
theme_classic (base_size = 20 )
7c.
(coin_pX_given_Y1 <- coin_sim %>%
filter (Y == 1 ) %>%
summarize (cnt = n (), .by = X) %>%
mutate (p = cnt / sum (cnt)) %>%
arrange (X))
# A tibble: 3 × 3
X cnt p
<int> <int> <dbl>
1 1 1264 0.253
2 2 2506 0.501
3 3 1232 0.246
and the same basework for 8
library (purrrfect)
library (tidyverse)
N <- 10000
pop <- c (rep ("M" , 4 ), rep ("N" , 3 ), rep ("D" , 2 ))
one_pick <- \() sample (pop, 3 , replace = FALSE )
(execs_sim <- replicate_int (N, 0 , .as = tmp) %>%
mutate (sel = map (seq_len (n ()), \(i) one_pick ()),
X = map_int (sel, \(s) sum (s == "M" )),
Y = map_int (sel, \(s) sum (s == "N" ))) %>%
select (.trial, X, Y))
# A tibble: 10,000 × 3
.trial X Y
<dbl> <int> <int>
1 1 1 2
2 2 2 1
3 3 2 0
4 4 2 1
5 5 0 1
6 6 2 1
7 7 0 3
8 8 0 2
9 9 2 0
10 10 1 1
# ℹ 9,990 more rows
8a.
(execs_joint <- execs_sim %>%
summarize (cnt = n (), .by = c (X, Y)) %>%
mutate (p_XY = cnt / N) %>%
arrange (X, Y) %>%
pivot_wider (names_from = Y, values_from = p_XY,
names_prefix = "Y=" , id_cols = X))
# A tibble: 4 × 5
X `Y=1` `Y=2` `Y=3` `Y=0`
<int> <dbl> <dbl> <dbl> <dbl>
1 0 0.0353 0.074 0.0114 NA
2 1 0.286 0.140 NA 0.0493
3 2 0.214 NA NA 0.145
4 3 NA NA NA 0.0452
8b.
ggplot (execs_sim) +
geom_jitter (aes (x = X, y = Y), width = .1 , height = .2 , size = .4 , alpha = .8 ) +
scale_x_continuous (breaks = 0 : 3 ) +
scale_y_continuous (breaks = 0 : 3 ) +
theme_classic (base_size = 20 )
8c.
(execs_pX_given_Y2 <- execs_sim %>%
filter (Y == 2 ) %>%
summarize (cnt = n (), .by = X) %>%
mutate (p = cnt / sum (cnt)) %>%
arrange (X))
# A tibble: 2 × 3
X cnt p
<int> <int> <dbl>
1 0 740 0.345
2 1 1405 0.655