#6
Verify the results of Question 1 using a simulation study with 10,000 replications of Y. Superimpose the analytic densities of U,V, and W over histograms of the realizations. Submit a published Rpubs link of your simulation study.
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.2 ✔ stringr 1.5.1
✔ lubridate 1.9.4 ✔ tibble 3.3.0
✔ purrr 1.1.0 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (ggplot2)
#1
(sim_dfU <- data.frame (Y = rbeta (10000 , 1 , 2 ))
%>% mutate (U = 1-2 * Y)
%>% mutate (fY = dbeta (Y, 1 , 2 ),
fU = (1 + U)/ 2 )
) %>% head
Y U fY fU
1 0.14871362 0.70257276 1.7025728 0.8512864
2 0.25599690 0.48800619 1.4880062 0.7440031
3 0.91536120 -0.83072239 0.1692776 0.0846388
4 0.52805013 -0.05610027 0.9438997 0.4719499
5 0.05654043 0.88691915 1.8869191 0.9434596
6 0.79465346 -0.58930693 0.4106931 0.2053465
ggplot (sim_dfU, aes (x = U)) +
geom_histogram (aes (y = after_stat (density)),
fill = "cornflowerblue" , color = "black" ,
binwidth = 0.04 , center = - 0.98 ) +
geom_line (aes (y = fU), linewidth = 1 ) +
xlim (- 1 , 1 ) +
labs (x = "u" , y = expression (f[U](u))) +
theme_classic (base_size = 14 )
(sim_dfV <- data.frame (Y = rbeta (10000 , 1 , 2 ))
%>% mutate (V = 1 / Y)
%>% mutate (fY = dbeta (Y, 1 , 2 ),
fV = (2 * (V-1 ))/ V^ 3 )
) %>% head
Y V fY fV
1 0.36298206 2.754957 1.274036 0.167861837
2 0.22256471 4.493075 1.554871 0.077020591
3 0.48898935 2.045034 1.022021 0.244376110
4 0.39375419 2.539656 1.212492 0.187987564
5 0.13842140 7.224317 1.723157 0.033016526
6 0.04300437 23.253453 1.913991 0.003539689
ggplot (sim_dfV, aes (x = V)) +
geom_histogram (aes (y = after_stat (density)),
fill = "cornflowerblue" , color = "black" ,
binwidth = 0.04 , center = 0.02 ) +
geom_line (aes (y = fV), linewidth = 1 ) +
xlim (1 ,6 ) +
labs (x = "v" , y = expression (f[V](v))) +
theme_classic (base_size = 14 )
Warning: Removed 3033 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 3033 rows containing missing values or values outside the scale range
(`geom_line()`).
library (tidyverse)
set.seed (1 )
sim_dfV <- data.frame (Y = rbeta (10000 , 1 , 2 )) %>%
mutate (V = 1 / Y)
v_seq <- seq (1 , 3 , length.out = 500 )
fV_seq <- 2 * (v_seq - 1 ) / (v_seq^ 3 )
analytic_df <- data.frame (V = v_seq, fV = fV_seq)
ggplot (sim_dfV, aes (x = V)) +
geom_histogram (aes (y = after_stat (density)),
fill = "cornflowerblue" , color = "black" ,
binwidth = 0.04 ) +
geom_line (data = analytic_df, aes (x = V, y = fV),
color = "black" , size = 1 ) +
coord_cartesian (xlim = c (1 , 3 )) +
labs (x = "v" , y = expression (f[V](v))) +
theme_classic (base_size = 14 )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Follow a similiar shape but simulated overfits analytic due to infinite bound on data points dont properly fall
(sim_dfW <- data.frame (Y = rbeta (10000 , 1 , 2 ))
%>% mutate (W = sqrt (Y))
%>% mutate (fY = dbeta (Y, 1 , 2 ),
fW = (4 * W - 4 * W^ 3 ))
) %>% head
Y W fY fW
1 0.23378562 0.4835138 1.5324288 1.4819010
2 0.22587080 0.4752587 1.5482584 1.4716464
3 0.03850728 0.1962327 1.9229854 0.7547053
4 0.34771118 0.5896704 1.3045776 1.5385416
5 0.62209923 0.7887327 0.7558015 1.1922507
6 0.72451571 0.8511849 0.5509686 0.9379523
ggplot (sim_dfW, aes (x = W)) +
geom_histogram (aes (y = after_stat (density)),
fill = "cornflowerblue" , color = "black" ,
binwidth = 0.04 , center = 0.02 ) +
geom_line (aes (y = fW), linewidth = 1 ) +
xlim (0 , 1 ) +
labs (x = "w" , y = expression (f[W](w))) +
theme_classic (base_size = 14 )