PS 6.2

#6
Verify the results of Question 1 using a simulation study with 10,000 replications of Y. Superimpose the analytic densities of U,V, and W over histograms of the realizations. Submit a published Rpubs link of your simulation study.

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ readr     2.1.5
✔ ggplot2   3.5.2     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.3.0
✔ purrr     1.1.0     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)


#1
(sim_dfU <- data.frame(Y = rbeta(10000, 1, 2))
          %>% mutate(U = 1-2*Y)
          %>% mutate(fY = dbeta(Y, 1, 2),
                     fU = (1+U)/2)
) %>% head
           Y           U        fY        fU
1 0.14871362  0.70257276 1.7025728 0.8512864
2 0.25599690  0.48800619 1.4880062 0.7440031
3 0.91536120 -0.83072239 0.1692776 0.0846388
4 0.52805013 -0.05610027 0.9438997 0.4719499
5 0.05654043  0.88691915 1.8869191 0.9434596
6 0.79465346 -0.58930693 0.4106931 0.2053465
ggplot(sim_dfU, aes(x = U)) +
  geom_histogram(aes(y = after_stat(density)),
                 fill = "cornflowerblue", color = "black",
                 binwidth = 0.04, center = -0.98) +
  geom_line(aes(y = fU), linewidth = 1) +
  xlim(-1, 1) +
  labs(x = "u", y = expression(f[U](u))) +
  theme_classic(base_size = 14)

(sim_dfV <- data.frame(Y = rbeta(10000, 1, 2))
          %>% mutate(V = 1/Y)
          %>% mutate(fY = dbeta(Y, 1, 2),
                     fV = (2*(V-1))/V^3)
) %>% head
           Y         V       fY          fV
1 0.36298206  2.754957 1.274036 0.167861837
2 0.22256471  4.493075 1.554871 0.077020591
3 0.48898935  2.045034 1.022021 0.244376110
4 0.39375419  2.539656 1.212492 0.187987564
5 0.13842140  7.224317 1.723157 0.033016526
6 0.04300437 23.253453 1.913991 0.003539689
ggplot(sim_dfV, aes(x = V)) +
  geom_histogram(aes(y = after_stat(density)),
                 fill = "cornflowerblue", color = "black",
                 binwidth = 0.04, center = 0.02) +
  geom_line(aes(y = fV), linewidth = 1) +
  xlim(1,6) +         
  labs(x = "v", y = expression(f[V](v))) +
  theme_classic(base_size = 14)
Warning: Removed 3033 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 3033 rows containing missing values or values outside the scale range
(`geom_line()`).

library(tidyverse)

set.seed(1)

sim_dfV <- data.frame(Y = rbeta(10000, 1, 2)) %>%
  mutate(V = 1 / Y)
v_seq <- seq(1, 3, length.out = 500)          
fV_seq <- 2 * (v_seq - 1) / (v_seq^3)       
analytic_df <- data.frame(V = v_seq, fV = fV_seq)
ggplot(sim_dfV, aes(x = V)) +
  geom_histogram(aes(y = after_stat(density)),
                 fill = "cornflowerblue", color = "black",
                 binwidth = 0.04) +
  geom_line(data = analytic_df, aes(x = V, y = fV),
            color = "black", size = 1) +
  coord_cartesian(xlim = c(1, 3)) +
  labs(x = "v", y = expression(f[V](v))) +
  theme_classic(base_size = 14)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Follow a similiar shape but simulated overfits analytic due to infinite bound on data points dont properly fall

(sim_dfW <- data.frame(Y = rbeta(10000, 1, 2))
          %>% mutate(W = sqrt(Y))
          %>% mutate(fY = dbeta(Y, 1, 2),
                     fW = (4*W - 4*W^3))
) %>% head
           Y         W        fY        fW
1 0.23378562 0.4835138 1.5324288 1.4819010
2 0.22587080 0.4752587 1.5482584 1.4716464
3 0.03850728 0.1962327 1.9229854 0.7547053
4 0.34771118 0.5896704 1.3045776 1.5385416
5 0.62209923 0.7887327 0.7558015 1.1922507
6 0.72451571 0.8511849 0.5509686 0.9379523
ggplot(sim_dfW, aes(x = W)) +
  geom_histogram(aes(y = after_stat(density)),
                 fill = "cornflowerblue", color = "black",
                 binwidth = 0.04, center = 0.02) +
  geom_line(aes(y = fW), linewidth = 1) +
  xlim(0, 1) +         
  labs(x = "w", y = expression(f[W](w))) +
  theme_classic(base_size = 14)