PS 7.4

Question 1

qchisq(0.95,df=6)

[1] 12.59159

Question 2

pchisq(8.25,11, lower.tail=FALSE)

[1] 0.6907496

Question 3

pchisq(14.625, df= 9, lower.tail= FALSE)

[1] 0.1017651

Question 5

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.5.2

Warning: package 'ggplot2' was built under R version 4.5.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrrfect)


Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate

library(ggh4x)

Warning: package 'ggh4x' was built under R version 4.5.2

(sim_results <- parameters(
  ~n, ~sigma, ~mu,
  c(4, 8, 15),
  c(1, 2, 3),
  0
) %>%
  add_trials(10000) %>%
  mutate(
    ysample = pmap(list(n, mu, sigma), \(nn, m, s) rnorm(nn, m, s)),
    s2 = map_dbl(ysample, var),
    stat = (n - 1) * s2 / sigma^2
  ))

# A tibble: 90,000 × 7
       n sigma    mu .trial ysample      s2   stat
   <dbl> <dbl> <dbl>  <dbl> <list>    <dbl>  <dbl>
 1     4     1     0      1 <dbl [4]> 0.876  2.63 
 2     4     1     0      2 <dbl [4]> 1.85   5.56 
 3     4     1     0      3 <dbl [4]> 1.40   4.20 
 4     4     1     0      4 <dbl [4]> 3.53  10.6  
 5     4     1     0      5 <dbl [4]> 1.81   5.42 
 6     4     1     0      6 <dbl [4]> 0.209  0.626
 7     4     1     0      7 <dbl [4]> 0.482  1.45 
 8     4     1     0      8 <dbl [4]> 0.439  1.32 
 9     4     1     0      9 <dbl [4]> 0.766  2.30 
10     4     1     0     10 <dbl [4]> 0.883  2.65 
# ℹ 89,990 more rows

# overlay
ggplot(sim_results, aes(x = stat)) +
  geom_density(aes(y = after_stat(density)), fill = "blue", alpha = 0.3) +
  geom_line(aes(y = dchisq(stat, df = n - 1)), color = "black", linewidth = 1) +
  facet_nested(sigma ~ n, labeller = label_both, scales = "free_y") +
  theme_classic() +
  labs(x = expression((n-1)*S^2 / sigma^2), y = "Density")

# 4B verification
sim_results %>%
  reframe(
    mean_sim  = mean(s2),
    var_sim   = var(s2),
    mean_true = sigma^2,
    var_true  = 2 * sigma^4 / (n - 1),
    .by = c(n, sigma)
  )

# A tibble: 90,000 × 6
       n sigma mean_sim var_sim mean_true var_true
   <dbl> <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
 1     4     1     1.00   0.677         1    0.667
 2     4     1     1.00   0.677         1    0.667
 3     4     1     1.00   0.677         1    0.667
 4     4     1     1.00   0.677         1    0.667
 5     4     1     1.00   0.677         1    0.667
 6     4     1     1.00   0.677         1    0.667
 7     4     1     1.00   0.677         1    0.667
 8     4     1     1.00   0.677         1    0.667
 9     4     1     1.00   0.677         1    0.667
10     4     1     1.00   0.677         1    0.667
# ℹ 89,990 more rows

Question 6 - Parent Population is POI

# sim
(sim_results_poisson <- parameters(
  ~n, ~lambda, 
  c(10, 20, 30),  
  c(1, 5, 10)     
) %>%
  add_trials(1000) %>%
  mutate(
    ysample = pmap(list(n, lambda), \(n, l) rpois(n, lambda = l)),
    ybar = map_dbl(ysample, mean),
    s2 = map_dbl(ysample, var)
  ))

# A tibble: 9,000 × 6
       n lambda .trial ysample     ybar    s2
   <dbl>  <dbl>  <dbl> <list>     <dbl> <dbl>
 1    10      1      1 <int [10]>   0.8 0.4  
 2    10      1      2 <int [10]>   1.1 1.43 
 3    10      1      3 <int [10]>   0.7 1.12 
 4    10      1      4 <int [10]>   0.5 0.5  
 5    10      1      5 <int [10]>   1.3 0.456
 6    10      1      6 <int [10]>   1.4 0.711
 7    10      1      7 <int [10]>   0.6 0.711
 8    10      1      8 <int [10]>   0.6 0.489
 9    10      1      9 <int [10]>   0.9 0.544
10    10      1     10 <int [10]>   0.8 0.4  
# ℹ 8,990 more rows

# scatterplots
ggplot(sim_results_poisson, aes(x = ybar, y = s2)) +
  geom_point(alpha = 0.2, color = "darkgreen") +
  geom_smooth(method = "lm", color = "black", se = FALSE) + 
  facet_nested(lambda ~ n, labeller = label_both, scales = "free_y") +
  theme_classic() +
  labs(
    x = expression(bar(Y)),
    y = expression(S^2)
  )

`geom_smooth()` using formula = 'y ~ x'

Based on the scatterplot we can see Ybar and the sample variance are not independent of each other. We can see the fit lines are not horizontal, like they would be if they were independent.