## Joanna Wu
## Kirk Lohmueller homework
## EEB 201 R Bootcamp

# set up a basic heights distribution
# define n number of draws
n = 100
height.dist <- rnorm(100, mean = 69, sd = 10)

hist(height.dist)

plot of chunk unnamed-chunk-1

# 1. Write a function (called “get_heights”) in R to draw a sample of individuals (either 100 or 1000) from the population. 
# Hint: You will want to use “rnorm” within your function. 
# Store the random heights that you’ve generated in a variable called “heights”.
get_heights <- function(n){
  heights <<- rnorm(n, mean = 69, sd = 10) # "<<" stores heights in the R environment
}

# returns heights for 100 or 1000 individuals
get_heights(100) # heights are stored in a vector called heights.

# plot samples of 100 and 1000 individuals
hist(get_heights(100), col = 2, lwd = 4)

plot of chunk unnamed-chunk-1

hist(get_heights(1000), col = 4, lwd = 4)

plot of chunk unnamed-chunk-1

# 2. Within your function, compute the average height from your “heights” vector.
get_heights_means <- function(n){
  heights <<- rnorm(n, mean = 69, sd = 10) # "<<" stores heights in the R environment
  # save output for average height
  average <- mean(heights)
  # print(average) # commenting out the print because it is 1000 lines
}

# 3. Make your function return the average height.
# run function and returns the average height
get_heights_means(100)
get_heights_means(1000)

# 4. Use a “for” loop to call your “get_heights” function 1000 times, with taking a sample 
# of size 100 from the population. Save the mean height from each replicate in a vector called “mean_heights_100”.
# create for loop to run function 1000 times

# first create a vector to save output
mean_heights_100 <- c(1, rep=1000)
# write a loop to save output iteratively
for(i in 1:1000){
  mean_heights_100[i] <- get_heights_means(100)
}
# print output
# mean_heights_100 # prints 1000 means

# 5. Use a “for” loop to call your “get_heights” function 1000 times, with taking a sample of 
# size 1000 from the population. Save the mean height from each replicate in a vector called “mean_heights_1000”.

# first create a vector to save output
mean_heights_1000 <- c(1, rep=1000)
# write a loop to save output iteratively
for(i in 1:1000){
  mean_heights_1000[i] <- get_heights_means(1000)
}
# print output
# mean_heights_1000 # prints 1000 means

# 6. Plot a histogram of the distribution of the average heights for your sample size of 100 and 1000 individuals. 
# The two sets of data should be plotted on the same axes. Add a legend. Label the axes. Plot the data from the 100 samples in red and the data 
# from the 1000 samples in blue. Your plot should look something like the one shown on the next page.

# set plot area and margins
par(mfrow=c(1,1), mar=c(4, 4, 3, 2)) 
# set bin width
bins <- seq(65, 75, by=1)
hist(mean_heights_100, breaks=bins)$breaks # looks ok
##  [1] 65 66 67 68 69 70 71 72 73 74 75
# create histogram
avg100 <- hist(mean_heights_100, breaks=bins)$counts

plot of chunk unnamed-chunk-1

avg1000 <- hist(mean_heights_1000, breaks=bins)$counts

plot of chunk unnamed-chunk-1

# create barplot
barplot(rbind(avg100, avg1000), beside=T, col=c(2,4), names.arg=seq(65,74,1), # for some reason the bins need to be one less than 75. Why?
        xlab="Average height (inches)", ylab="Count")
# add legend
legend(22, 400, c("n = 100", "n = 1000"), col=c(2,4), 
       lwd=4)

plot of chunk unnamed-chunk-1

# knit into rmd
# install.packages('knitr')
# library(knitr)
# spin('/Users/joannawu/Documents/EEB Orientation/lohmueller_joannawu_homework.R')