library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
#Initialize variables for x and y
n_x <- 25
n_y<- 36
initial_mean_x <- 20
initial_mean_y <- 30
sd_x <- 5
sd_y <- (108^.5)
#sample random normal distribution of x and y 10000 times
#rnorm produces a vector, but the index only holds a single value for each iteration
#so the mean and var of each sample is calculated inside the for loop, and stored in each iteration of the index
samples_mean_x<-numeric(10000)
samples_mean_y<-numeric(10000)
samples_var_x<-numeric(10000)
samples_var_y<-numeric(10000)
for(i in 1:10000)
{
samples_mean_x[i]<-mean(rnorm(n_x, initial_mean_x, sd_x))
samples_mean_y[i]<-mean(rnorm(n_y, initial_mean_y, sd_y))
samples_var_x[i]<-var(rnorm(n_x, initial_mean_x, sd_x))
samples_var_y[i]<-var(rnorm(n_y, initial_mean_y, sd_y))
}
##caluclate the average mean and var of the 10000 samples
mean_x<- mean(samples_mean_x)
mean_y<- mean(samples_mean_y)
var_x <- mean(samples_var_x)
var_y <- mean(samples_var_y)
##initialize xbar and ybar values by calculating from previously known or found values
n_x_bar<-n_x
n_y_bar<-n_y
var_x_bar <- var_x / n_x
sd_x_bar <- (var_x_bar^.5)
var_y_bar <- var_y / n_y
sd_y_bar <- (var_y_bar^.5)
mean_x_bar <- mean_x
mean_y_bar <- mean_y
##check that my values match arnholt's values on the board
var_x
## [1] 25.09613
var_y
## [1] 108.4897
var_x_bar
## [1] 1.003845
sd_x_bar
## [1] 1.001921
var_y_bar
## [1] 3.013604
sd_y_bar
## [1] 1.735973
#sample random normal distribution of xbar and ybar 10000 times
#rnorm produces a vector, but the index only holds a single value for each iteration
#so the mean and var of each sample is calculated inside the for loop, and stored in each iteration of the index
samples_mean_x_bar<-numeric(10000)
samples_mean_y_bar<-numeric(10000)
samples_var_x_bar<-numeric(10000)
samples_var_y_bar<-numeric(10000)
for(i in 1:10000)
{
samples_mean_x_bar[i] <- mean(rnorm(n_x_bar, mean_x_bar, sd_x_bar))
samples_mean_y_bar[i] <- mean(rnorm(n_y_bar, mean_y_bar, sd_y_bar))
samples_var_x_bar[i] <- var(rnorm(n_x_bar, mean_x_bar, sd_x_bar))
samples_var_y_bar[i] <- var(rnorm(n_y_bar, mean_y_bar, sd_y_bar))
}
# 10000 samples of the mean and var of w are created by adding the values stored in each iteration of the indexes
samples_mean_w <- samples_mean_x_bar + samples_mean_y_bar
samples_var_w <- samples_var_x_bar + samples_var_y_bar
#distribution is plotted
ggplot(data.frame(samples_mean_w),aes(samples_mean_w)) +
geom_density(fill="peachpuff") +
theme_bw()

#final values of w are calulated
mean_w <- mean(samples_mean_w)
var_w <- mean(samples_var_w)
sd_w <- var_w^.5
##check that my values match arnholt's values on the board
mean_w
## [1] 49.97901
sd_w
## [1] 2.005254