## Joanna Wu
## Kirk Lohmueller homework
## EEB 201 R Bootcamp
# set up a basic heights distribution
# define n number of draws
n = 100
height.dist <- rnorm(100, mean = 69, sd = 10)
hist(height.dist)

# 1. Write a function (called “get_heights”) in R to draw a sample of individuals (either 100 or 1000) from the population.
# Hint: You will want to use “rnorm” within your function.
# Store the random heights that you’ve generated in a variable called “heights”.
get_heights <- function(n){
heights <<- rnorm(n, mean = 69, sd = 10) # "<<" stores heights in the R environment
}
# returns heights for 100 or 1000 individuals
get_heights(100) # heights are stored in a vector called heights.
# plot samples of 100 and 1000 individuals
hist(get_heights(100), col = 2, lwd = 4)

hist(get_heights(1000), col = 4, lwd = 4)

# 2. Within your function, compute the average height from your “heights” vector.
get_heights_means <- function(n){
heights <<- rnorm(n, mean = 69, sd = 10) # "<<" stores heights in the R environment
# save output for average height
average <- mean(heights)
# print(average) # commenting out the print because it is 1000 lines
}
# 3. Make your function return the average height.
# run function and returns the average height
get_heights_means(100)
get_heights_means(1000)
# 4. Use a “for” loop to call your “get_heights” function 1000 times, with taking a sample
# of size 100 from the population. Save the mean height from each replicate in a vector called “mean_heights_100”.
# create for loop to run function 1000 times
# first create a vector to save output
mean_heights_100 <- c(1, rep=1000)
# write a loop to save output iteratively
for(i in 1:1000){
mean_heights_100[i] <- get_heights_means(100)
}
# print output
# mean_heights_100 # prints 1000 means
# 5. Use a “for” loop to call your “get_heights” function 1000 times, with taking a sample of
# size 1000 from the population. Save the mean height from each replicate in a vector called “mean_heights_1000”.
# first create a vector to save output
mean_heights_1000 <- c(1, rep=1000)
# write a loop to save output iteratively
for(i in 1:1000){
mean_heights_1000[i] <- get_heights_means(1000)
}
# print output
# mean_heights_1000 # prints 1000 means
# 6. Plot a histogram of the distribution of the average heights for your sample size of 100 and 1000 individuals.
# The two sets of data should be plotted on the same axes. Add a legend. Label the axes. Plot the data from the 100 samples in red and the data
# from the 1000 samples in blue. Your plot should look something like the one shown on the next page.
# set plot area and margins
par(mfrow=c(1,1), mar=c(4, 4, 3, 2))
# set bin width
bins <- seq(65, 75, by=1)
hist(mean_heights_100, breaks=bins)$breaks # looks ok
## [1] 65 66 67 68 69 70 71 72 73 74 75
# create histogram
avg100 <- hist(mean_heights_100, breaks=bins)$counts

avg1000 <- hist(mean_heights_1000, breaks=bins)$counts

# create barplot
barplot(rbind(avg100, avg1000), beside=T, col=c(2,4), names.arg=seq(65,74,1), # for some reason the bins need to be one less than 75. Why?
xlab="Average height (inches)", ylab="Count")
# add legend
legend(22, 400, c("n = 100", "n = 1000"), col=c(2,4),
lwd=4)

# knit into rmd
# install.packages('knitr')
# library(knitr)
# spin('/Users/joannawu/Documents/EEB Orientation/lohmueller_joannawu_homework.R')