PS_5.8

Question 1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrrfect)

Attaching package: 'purrrfect'

The following objects are masked from 'package:base':

    replicate, tabulate
library(ggplot2)
# Q - 1
dmultinom(x = c(2,1,1), prob = c(0.73,0.2,0.07))
[1] 0.0895272

Question 5

Simulated Data

# Q - 5
N <- 10000
n_fires <- 50
fire_probs <- c(family_home = 0.73, apartment = 0.20, other = 0.07)

simstudy <- rmultinom(n = N, size = n_fires, prob = fire_probs) %>%
  t() %>%
  as_tibble() %>%
  rename(Y1 = family_home, Y2 = apartment, Y3 = other)
simstudy
# A tibble: 10,000 × 3
      Y1    Y2    Y3
   <int> <int> <int>
 1    37    10     3
 2    33    11     6
 3    34     9     7
 4    38    10     2
 5    39     9     2
 6    38     8     4
 7    31    17     2
 8    38     9     3
 9    33    12     5
10    30    15     5
# ℹ 9,990 more rows

Joint Distribution Plot

joint_dist_plot <- ggplot(data = simstudy, aes(x = Y1, y = Y2)) +
  geom_jitter(size = .4, alpha = .5) +
  theme_classic()
joint_dist_plot

A) Y1 = 37, Y2= 10 is most common

most_common_combos <- simstudy %>%
  count(Y1, Y2, sort = TRUE) 
most_common_combos
# A tibble: 190 × 3
      Y1    Y2     n
   <int> <int> <int>
 1    37    10   304
 2    36    11   299
 3    37     9   282
 4    38     9   279
 5    36    10   274
 6    38    10   252
 7    38     8   250
 8    37    11   249
 9    39     9   248
10    35    12   243
# ℹ 180 more rows

B)

# B
simulated_covariance <- cov(simstudy$Y1, simstudy$Y2)
simulated_covariance
[1] -7.400049
analytic_covariance <- -n_fires * fire_probs["family_home"] * fire_probs["apartment"]
analytic_covariance
family_home 
       -7.3 

Question 6

mux <- 5
muy <- 5
n_sim <- 10000

# sim
bvnorm_sim <- expand_grid(
  sigx = c(0.5, 1),
  sigy = c(0.5, 1),
  rho  = c(-0.8, -0.4, 0, 0.4, 0.8)
) %>%
  add_trials(n_sim) %>%
  mutate(
    z1 = rnorm(n()),
    z2 = rnorm(n()),
    x_std = z1,
    y_std = rho * z1 + sqrt(1 - rho^2) * z2,
    x = sigx * x_std + mux,
    y = sigy * y_std + muy
  )
bvnorm_sim
# A tibble: 200,000 × 10
    sigx  sigy   rho .trial      z1      z2   x_std  y_std     x     y
   <dbl> <dbl> <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl> <dbl> <dbl>
 1   0.5   0.5  -0.8      1 -0.473  -0.298  -0.473   0.200  4.76  5.10
 2   0.5   0.5  -0.8      2 -0.820   0.0922 -0.820   0.712  4.59  5.36
 3   0.5   0.5  -0.8      3  2.09    0.167   2.09   -1.57   6.05  4.21
 4   0.5   0.5  -0.8      4  0.732  -0.433   0.732  -0.845  5.37  4.58
 5   0.5   0.5  -0.8      5  0.701   0.241   0.701  -0.416  5.35  4.79
 6   0.5   0.5  -0.8      6  0.0452 -0.977   0.0452 -0.623  5.02  4.69
 7   0.5   0.5  -0.8      7  1.21   -0.268   1.21   -1.13   5.61  4.43
 8   0.5   0.5  -0.8      8  1.11   -0.853   1.11   -1.40   5.55  4.30
 9   0.5   0.5  -0.8      9 -1.06   -1.06   -1.06    0.209  4.47  5.10
10   0.5   0.5  -0.8     10 -0.373  -1.35   -0.373  -0.512  4.81  4.74
# ℹ 199,990 more rows

Facet Plot

library(ggh4x)
Warning: package 'ggh4x' was built under R version 4.5.2
# Create Facet Plot
bvn_plot <- ggplot(data = bvnorm_sim, aes(x = x, y = y)) +
  geom_point(alpha = 0.5, size = .5) +
  annotate("point", x = 5, y = 5, colour = "red", size = .8) +
  facet_nested(rho ~ sigx + sigy, labeller = label_both) +
  theme_bw()
bvn_plot

Question 7

sim_study_7 <- data.frame(
  x = rexp(N, rate = 1)
) %>%
  mutate(
    y = map_dbl(x, \(x_val) x_val + rexp(1, rate = 1))
  )
head(sim_study_7,10)
            x         y
1  1.24066774 6.3013264
2  0.93422348 1.0640549
3  2.14256003 2.5569755
4  0.00868401 2.4117412
5  0.06957087 2.3691001
6  0.01592606 0.1392922
7  0.47349211 1.0442847
8  0.36025754 1.6901445
9  1.34980334 1.5997109
10 1.20284430 1.5119690

Overlay Plot

q7_plot <- ggplot(data = sim_study_7, aes(x = x, y = y)) +
  geom_point(size = .2, alpha = .5) +
  geom_smooth(
    method = 'loess', 
    se = FALSE,
    aes(color = 'Simulated E(Y|X)'),
    size = .7
  ) +
  geom_function(
    fun = \(x) x + 1,
    aes(color = 'Analytic E(Y|X)'),
    linewidth = .7
  ) +
  labs(color = '') + 
  theme_classic()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
q7_plot
`geom_smooth()` using formula = 'y ~ x'

LM

lm_model <- lm(y ~ x, data = sim_study_7)
lm_model

Call:
lm(formula = y ~ x, data = sim_study_7)

Coefficients:
(Intercept)            x  
     0.9917       1.0050