Question 1
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.1.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'purrrfect'
The following objects are masked from 'package:base':
replicate, tabulate
library(ggplot2)
# Q - 1
dmultinom(x = c(2,1,1), prob = c(0.73,0.2,0.07))
Question 5
Simulated Data
# Q - 5
N <- 10000
n_fires <- 50
fire_probs <- c(family_home = 0.73, apartment = 0.20, other = 0.07)
simstudy <- rmultinom(n = N, size = n_fires, prob = fire_probs) %>%
t() %>%
as_tibble() %>%
rename(Y1 = family_home, Y2 = apartment, Y3 = other)
simstudy
# A tibble: 10,000 × 3
Y1 Y2 Y3
<int> <int> <int>
1 37 10 3
2 33 11 6
3 34 9 7
4 38 10 2
5 39 9 2
6 38 8 4
7 31 17 2
8 38 9 3
9 33 12 5
10 30 15 5
# ℹ 9,990 more rows
Joint Distribution Plot
joint_dist_plot <- ggplot(data = simstudy, aes(x = Y1, y = Y2)) +
geom_jitter(size = .4, alpha = .5) +
theme_classic()
joint_dist_plot
A) Y1 = 37, Y2= 10 is most common
most_common_combos <- simstudy %>%
count(Y1, Y2, sort = TRUE)
most_common_combos
# A tibble: 190 × 3
Y1 Y2 n
<int> <int> <int>
1 37 10 304
2 36 11 299
3 37 9 282
4 38 9 279
5 36 10 274
6 38 10 252
7 38 8 250
8 37 11 249
9 39 9 248
10 35 12 243
# ℹ 180 more rows
B)
# B
simulated_covariance <- cov(simstudy$Y1, simstudy$Y2)
simulated_covariance
analytic_covariance <- -n_fires * fire_probs["family_home"] * fire_probs["apartment"]
analytic_covariance
Question 6
mux <- 5
muy <- 5
n_sim <- 10000
# sim
bvnorm_sim <- expand_grid(
sigx = c(0.5, 1),
sigy = c(0.5, 1),
rho = c(-0.8, -0.4, 0, 0.4, 0.8)
) %>%
add_trials(n_sim) %>%
mutate(
z1 = rnorm(n()),
z2 = rnorm(n()),
x_std = z1,
y_std = rho * z1 + sqrt(1 - rho^2) * z2,
x = sigx * x_std + mux,
y = sigy * y_std + muy
)
bvnorm_sim
# A tibble: 200,000 × 10
sigx sigy rho .trial z1 z2 x_std y_std x y
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.5 0.5 -0.8 1 -0.473 -0.298 -0.473 0.200 4.76 5.10
2 0.5 0.5 -0.8 2 -0.820 0.0922 -0.820 0.712 4.59 5.36
3 0.5 0.5 -0.8 3 2.09 0.167 2.09 -1.57 6.05 4.21
4 0.5 0.5 -0.8 4 0.732 -0.433 0.732 -0.845 5.37 4.58
5 0.5 0.5 -0.8 5 0.701 0.241 0.701 -0.416 5.35 4.79
6 0.5 0.5 -0.8 6 0.0452 -0.977 0.0452 -0.623 5.02 4.69
7 0.5 0.5 -0.8 7 1.21 -0.268 1.21 -1.13 5.61 4.43
8 0.5 0.5 -0.8 8 1.11 -0.853 1.11 -1.40 5.55 4.30
9 0.5 0.5 -0.8 9 -1.06 -1.06 -1.06 0.209 4.47 5.10
10 0.5 0.5 -0.8 10 -0.373 -1.35 -0.373 -0.512 4.81 4.74
# ℹ 199,990 more rows
Facet Plot
Warning: package 'ggh4x' was built under R version 4.5.2
# Create Facet Plot
bvn_plot <- ggplot(data = bvnorm_sim, aes(x = x, y = y)) +
geom_point(alpha = 0.5, size = .5) +
annotate("point", x = 5, y = 5, colour = "red", size = .8) +
facet_nested(rho ~ sigx + sigy, labeller = label_both) +
theme_bw()
bvn_plot
Question 7
sim_study_7 <- data.frame(
x = rexp(N, rate = 1)
) %>%
mutate(
y = map_dbl(x, \(x_val) x_val + rexp(1, rate = 1))
)
head(sim_study_7,10)
x y
1 1.24066774 6.3013264
2 0.93422348 1.0640549
3 2.14256003 2.5569755
4 0.00868401 2.4117412
5 0.06957087 2.3691001
6 0.01592606 0.1392922
7 0.47349211 1.0442847
8 0.36025754 1.6901445
9 1.34980334 1.5997109
10 1.20284430 1.5119690
Overlay Plot
q7_plot <- ggplot(data = sim_study_7, aes(x = x, y = y)) +
geom_point(size = .2, alpha = .5) +
geom_smooth(
method = 'loess',
se = FALSE,
aes(color = 'Simulated E(Y|X)'),
size = .7
) +
geom_function(
fun = \(x) x + 1,
aes(color = 'Analytic E(Y|X)'),
linewidth = .7
) +
labs(color = '') +
theme_classic()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
`geom_smooth()` using formula = 'y ~ x'
LM
lm_model <- lm(y ~ x, data = sim_study_7)
lm_model
Call:
lm(formula = y ~ x, data = sim_study_7)
Coefficients:
(Intercept) x
0.9917 1.0050