7.4 code (problems 5 + 6)

Author

courtney casey

problem 5

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

N <- 10000

(many_normal_samples <- parameters(~n, ~sigma,
                                  c(4, 8, 15),
                                  c(1, 2, 3)) %>%
  add_trials(N) %>%
  mutate(ysample = pmap(list(n, sigma), \(nn, sig) rnorm(nn, mean = 0, sd = sig))) %>%
  mutate(ybar = map_dbl(ysample, mean),
         S2   = map_dbl(ysample, var),
         X    = (n - 1) * S2 / (sigma^2)))

# A tibble: 90,000 × 7
       n sigma .trial ysample      ybar    S2      X
   <dbl> <dbl>  <dbl> <list>      <dbl> <dbl>  <dbl>
 1     4     1      1 <dbl [4]> -0.346  0.542  1.63 
 2     4     1      2 <dbl [4]> -1.17   2.42   7.25 
 3     4     1      3 <dbl [4]>  0.0908 1.14   3.43 
 4     4     1      4 <dbl [4]> -0.948  0.279  0.837
 5     4     1      5 <dbl [4]>  0.492  1.24   3.73 
 6     4     1      6 <dbl [4]> -0.254  3.38  10.1  
 7     4     1      7 <dbl [4]>  0.0103 0.571  1.71 
 8     4     1      8 <dbl [4]> -0.0321 0.531  1.59 
 9     4     1      9 <dbl [4]>  0.674  4.19  12.6  
10     4     1     10 <dbl [4]>  0.150  0.246  0.737
# ℹ 89,990 more rows

library(ggh4x)
ggplot(data = many_normal_samples) +
geom_point(aes(x = ybar, y = S2),
shape='.')+
labs(x = expression(bar(Y)),
y = expression(S^2),
title='Plots of sample mean vs sample variance')+
facet_nested(sigma ~ n, labeller = label_both, scale = 'free_y') +
theme_classic()

g=10000

(bernie <- parameters(~n, ~p,
                                  c(10, 20, 30),
                                  c(.25, .5, .75)) %>%
  add_trials(g) %>%
  mutate(ysample = pmap(list(n, p), \(nn, pp) rbinom(nn, size = 1, prob = pp))) %>%
  mutate(ybar = map_dbl(ysample, mean),
         S2   = map_dbl(ysample, var)))

# A tibble: 90,000 × 6
       n     p .trial ysample     ybar    S2
   <dbl> <dbl>  <dbl> <list>     <dbl> <dbl>
 1    10  0.25      1 <int [10]>   0.1 0.1  
 2    10  0.25      2 <int [10]>   0.2 0.178
 3    10  0.25      3 <int [10]>   0.4 0.267
 4    10  0.25      4 <int [10]>   0.3 0.233
 5    10  0.25      5 <int [10]>   0.3 0.233
 6    10  0.25      6 <int [10]>   0.2 0.178
 7    10  0.25      7 <int [10]>   0.2 0.178
 8    10  0.25      8 <int [10]>   0.4 0.267
 9    10  0.25      9 <int [10]>   0.2 0.178
10    10  0.25     10 <int [10]>   0.2 0.178
# ℹ 89,990 more rows

hope it’s ok i chose .25, .5, and .75 for p since we weren’t given explicit ones !

library(ggh4x)
ggplot(data = bernie) +
geom_point(aes(x = ybar, y = S2),
shape='.')+
labs(x = expression(bar(Y)),
y = expression(S^2),
title='bernoulli sample means and variances')+
facet_nested(p ~ n, labeller = label_both, scale = 'free_y') +
theme_classic()

proves dependence as they’re all parabolic shapes (not filled in and scattered as independent ones !!