pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", 
                      sep = "\t", header = T, stringsAsFactors = F)
#Question 1: Create the following histograms of the number of tattoos pirates have separately for each favorite pirate. Add appropriate labels for each plot. Hint: Use unique(pirates$favorite.pirate) as your index values. Additionally, before creating the loop, set up a 2 x 3 plotting region using par(mfrow = c(2, 3))

par(mfrow = c(2,3))

for (favorite.pirate.i in unique(pirates$favorite.pirate)  ) {
  
  data.temp <- subset(pirates, favorite.pirate == favorite.pirate.i)
                                                                            
  hist (data.temp$tattoos,
                                                                                  main = favorite.pirate.i,
                                                                                  xlab = "tattoos")
}

#Question 2: The law of large numbers says that the larger your sample size, the closer your sample statistic will be to the true population value. Let’s test this by conducting a simulation. For sample sizes of 1 to 100, calculate the average difference between the sample mean and the population mean from a Normal distribution with mean 100 and standard deviation 10.

#Step 1: Create the design matrix
design.matrix <- expand.grid(
  "sample.size" = 1:100,
  "simulation" = 1:100,
  "result" = NA)

#View(design.matrix)
  
#Step 2: Set up the loop over the rows of the design matrix

for(row.i in 1:nrow(design.matrix)) {
sample.size.i <- design.matrix$sample.size[row.i]
data <- rnorm(n=sample.size.i, mean=100, sd=10)
sample.mean = mean(data)
diff <- sample.mean - 100
design.matrix$result[row.i] <- diff
}
#design.matrix
#Question 3 Plot your aggregate results from question 2.

plot(design.matrix$sample.size, design.matrix$result, xlim = c(0,100), ylim = c(-30,30), xlab = "sample size", ylab = "Result", pch = 8, col = "blue")

#Question 4: How many people do you need in a room for the probability to be greater than 0.50 that at least two people in the room have the same birthday? Answer this question using a simulation. For example, if there are 2 people in the room, what is the probability that they have the same birthday. Now what about 3, 4, … 365 people?

design.matrix <- expand.grid(
  "people.in.room" = 1:365,
  simulation = 1:1000,
  result = NA
  )
#design.matrix

#Step 1: Create the design matrix

#Step 2: Set up the loop over the rows of the design matrix
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people.in.room[row.i]
bdays <- sample(x=1:365, size= people.i, replace = T)
           
result <- length(bdays) != length(unique(bdays))
design.matrix$result [row.i] <- result
}

N <- 10
bdays <- sample(1:365, size = N, replace = T)
length(bdays) != length(unique(bdays))
## [1] FALSE