problem 5
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.6
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.1 ✔ tibble 3.3.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'purrrfect'
The following objects are masked from 'package:base':
replicate, tabulate
N <- 10000
(many_normal_samples <- parameters (~ n, ~ sigma,
c (4 , 8 , 15 ),
c (1 , 2 , 3 )) %>%
add_trials (N) %>%
mutate (ysample = pmap (list (n, sigma), \(nn, sig) rnorm (nn, mean = 0 , sd = sig))) %>%
mutate (ybar = map_dbl (ysample, mean),
S2 = map_dbl (ysample, var),
X = (n - 1 ) * S2 / (sigma^ 2 )))
# A tibble: 90,000 × 7
n sigma .trial ysample ybar S2 X
<dbl> <dbl> <dbl> <list> <dbl> <dbl> <dbl>
1 4 1 1 <dbl [4]> -0.346 0.542 1.63
2 4 1 2 <dbl [4]> -1.17 2.42 7.25
3 4 1 3 <dbl [4]> 0.0908 1.14 3.43
4 4 1 4 <dbl [4]> -0.948 0.279 0.837
5 4 1 5 <dbl [4]> 0.492 1.24 3.73
6 4 1 6 <dbl [4]> -0.254 3.38 10.1
7 4 1 7 <dbl [4]> 0.0103 0.571 1.71
8 4 1 8 <dbl [4]> -0.0321 0.531 1.59
9 4 1 9 <dbl [4]> 0.674 4.19 12.6
10 4 1 10 <dbl [4]> 0.150 0.246 0.737
# ℹ 89,990 more rows
library (ggh4x)
ggplot (data = many_normal_samples) +
geom_point (aes (x = ybar, y = S2),
shape= '.' )+
labs (x = expression (bar (Y)),
y = expression (S^ 2 ),
title= 'Plots of sample mean vs sample variance' )+
facet_nested (sigma ~ n, labeller = label_both, scale = 'free_y' ) +
theme_classic ()
g= 10000
(bernie <- parameters (~ n, ~ p,
c (10 , 20 , 30 ),
c (.25 , .5 , .75 )) %>%
add_trials (g) %>%
mutate (ysample = pmap (list (n, p), \(nn, pp) rbinom (nn, size = 1 , prob = pp))) %>%
mutate (ybar = map_dbl (ysample, mean),
S2 = map_dbl (ysample, var)))
# A tibble: 90,000 × 6
n p .trial ysample ybar S2
<dbl> <dbl> <dbl> <list> <dbl> <dbl>
1 10 0.25 1 <int [10]> 0.1 0.1
2 10 0.25 2 <int [10]> 0.2 0.178
3 10 0.25 3 <int [10]> 0.4 0.267
4 10 0.25 4 <int [10]> 0.3 0.233
5 10 0.25 5 <int [10]> 0.3 0.233
6 10 0.25 6 <int [10]> 0.2 0.178
7 10 0.25 7 <int [10]> 0.2 0.178
8 10 0.25 8 <int [10]> 0.4 0.267
9 10 0.25 9 <int [10]> 0.2 0.178
10 10 0.25 10 <int [10]> 0.2 0.178
# ℹ 89,990 more rows
hope it’s ok i chose .25, .5, and .75 for p since we weren’t given explicit ones !
library (ggh4x)
ggplot (data = bernie) +
geom_point (aes (x = ybar, y = S2),
shape= '.' )+
labs (x = expression (bar (Y)),
y = expression (S^ 2 ),
title= 'bernoulli sample means and variances' )+
facet_nested (p ~ n, labeller = label_both, scale = 'free_y' ) +
theme_classic ()
proves dependence as they’re all parabolic shapes (not filled in and scattered as independent ones !!