library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
#Initialize variables for x and y

n_x <- 25
n_y<- 36

initial_mean_x <- 20
initial_mean_y <- 30

sd_x <- 5
sd_y <- (108^.5)

#sample random normal distribution of x and y 10000 times
#rnorm produces a vector, but the index only holds a single value for each iteration
#so the mean and var of each sample is calculated inside the for loop, and stored in each iteration of the index

samples_mean_x<-numeric(10000)
samples_mean_y<-numeric(10000)
samples_var_x<-numeric(10000)
samples_var_y<-numeric(10000)
for(i in 1:10000)
  {
    samples_mean_x[i]<-mean(rnorm(n_x, initial_mean_x, sd_x))  
    samples_mean_y[i]<-mean(rnorm(n_y, initial_mean_y, sd_y))
    samples_var_x[i]<-var(rnorm(n_x, initial_mean_x, sd_x))
    samples_var_y[i]<-var(rnorm(n_y, initial_mean_y, sd_y))
  }


##caluclate the average mean and var of the 10000 samples

mean_x<- mean(samples_mean_x)
mean_y<- mean(samples_mean_y)

var_x <- mean(samples_var_x)
var_y <- mean(samples_var_y)


##initialize xbar and ybar values by calculating from previously known or found values

n_x_bar<-n_x
n_y_bar<-n_y

var_x_bar <- var_x / n_x
sd_x_bar <- (var_x_bar^.5)

var_y_bar <- var_y / n_y
sd_y_bar <- (var_y_bar^.5)

mean_x_bar <- mean_x
mean_y_bar <- mean_y

##check that my values match arnholt's values on the board

var_x
## [1] 25.09613
var_y
## [1] 108.4897
var_x_bar
## [1] 1.003845
sd_x_bar
## [1] 1.001921
var_y_bar
## [1] 3.013604
sd_y_bar
## [1] 1.735973
#sample random normal distribution of xbar and ybar 10000 times
#rnorm produces a vector, but the index only holds a single value for each iteration
#so the mean and var of each sample is calculated inside the for loop, and stored in each iteration of the index

samples_mean_x_bar<-numeric(10000)
samples_mean_y_bar<-numeric(10000)
samples_var_x_bar<-numeric(10000)
samples_var_y_bar<-numeric(10000)
for(i in 1:10000)
  {
    samples_mean_x_bar[i] <- mean(rnorm(n_x_bar, mean_x_bar, sd_x_bar))
    samples_mean_y_bar[i] <- mean(rnorm(n_y_bar, mean_y_bar, sd_y_bar))
    samples_var_x_bar[i] <- var(rnorm(n_x_bar, mean_x_bar, sd_x_bar))
    samples_var_y_bar[i] <- var(rnorm(n_y_bar, mean_y_bar, sd_y_bar))
  }

# 10000 samples of the mean and var of w are created by adding the values stored in each iteration of the indexes

samples_mean_w <- samples_mean_x_bar + samples_mean_y_bar
samples_var_w <- samples_var_x_bar + samples_var_y_bar

#distribution is plotted
ggplot(data.frame(samples_mean_w),aes(samples_mean_w)) +
  geom_density(fill="peachpuff") +
  theme_bw()

#final values of w are calulated
mean_w <- mean(samples_mean_w)
var_w <- mean(samples_var_w)
sd_w <- var_w^.5

##check that my values match arnholt's values on the board
mean_w
## [1] 49.97901
sd_w
## [1] 2.005254