pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt",
sep = "\t", header = T, stringsAsFactors = F)
#Question 1: Create the following histograms of the number of tattoos pirates have separately for each favorite pirate. Add appropriate labels for each plot. Hint: Use unique(pirates$favorite.pirate) as your index values. Additionally, before creating the loop, set up a 2 x 3 plotting region using par(mfrow = c(2, 3))
par(mfrow = c(2,3))
for (favorite.pirate.i in unique(pirates$favorite.pirate) ) {
data.temp <- subset(pirates, favorite.pirate == favorite.pirate.i)
hist (data.temp$tattoos,
main = favorite.pirate.i,
xlab = "tattoos")
}

#Question 2: The law of large numbers says that the larger your sample size, the closer your sample statistic will be to the true population value. Let’s test this by conducting a simulation. For sample sizes of 1 to 100, calculate the average difference between the sample mean and the population mean from a Normal distribution with mean 100 and standard deviation 10.
#Step 1: Create the design matrix
design.matrix <- expand.grid(
"sample.size" = 1:100,
"simulation" = 1:100,
"result" = NA)
#View(design.matrix)
#Step 2: Set up the loop over the rows of the design matrix
for(row.i in 1:nrow(design.matrix)) {
sample.size.i <- design.matrix$sample.size[row.i]
data <- rnorm(n=sample.size.i, mean=100, sd=10)
sample.mean = mean(data)
diff <- sample.mean - 100
design.matrix$result[row.i] <- diff
}
#design.matrix
#Question 3 Plot your aggregate results from question 2.
plot(design.matrix$sample.size, design.matrix$result, xlim = c(0,100), ylim = c(-30,30), xlab = "sample size", ylab = "Result", pch = 8, col = "blue")

#Question 4: How many people do you need in a room for the probability to be greater than 0.50 that at least two people in the room have the same birthday? Answer this question using a simulation. For example, if there are 2 people in the room, what is the probability that they have the same birthday. Now what about 3, 4, … 365 people?
design.matrix <- expand.grid(
"people.in.room" = 1:365,
simulation = 1:1000,
result = NA
)
#design.matrix
#Step 1: Create the design matrix
#Step 2: Set up the loop over the rows of the design matrix
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people.in.room[row.i]
bdays <- sample(x=1:365, size= people.i, replace = T)
result <- length(bdays) != length(unique(bdays))
design.matrix$result [row.i] <- result
}
N <- 10
bdays <- sample(1:365, size = N, replace = T)
length(bdays) != length(unique(bdays))
## [1] FALSE