pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)
#pirates
#Question 1
#Create the following histograms of the number of tattoos pirates have separately for each favorite pirate. Add appropriate labels for each plot. Hint: Use unique(pirates$favorite.pirate) as your index values. Additionally, before creating the loop, set up a 2 x 3 plotting region using par(mfrow = c(2, 3))
par(mfrow = c(2, 3))
for(favorite.pirate.i in unique(pirates$favorite.pirate))
{data.temp <- subset(pirates,favorite.pirate == favorite.pirate.i)
hist(data.temp$tattoos,
main = favorite.pirate.i, xlab = "tattoos")
}

#Question 2
#The law of large numbers says that the larger your sample size, the closer your sample statistic will be to the true population value. Let’s test this by conducting a simulation. For sample sizes of 1 to 100, calculate the average difference between the sample mean and the population mean from a Normal distribution with mean 100 and standard deviation 10.
#Step 1: Create the design matrix
#design.matrix <- expand.grid("sample" = rnorm(n= 100,mean= 100, sd=10),"population" = sample(x= rnorm(n= 100,mean= 100, sd=10), size= 1, replace= T))
design.matrix <- expand.grid(
"sample.size" = 1:100,
"simulation" = 1:100,
result = NA
)
#design.matrix
#head(design.matrix)
#View(design.matrix)
for(row.i in 1:nrow(design.matrix)) {
sample.size.i <- design.matrix$sample.size[row.i]
data <- rnorm(n= sample.size.i,mean= 100, sd=10)
sample.mean <- mean(data)
diff <- sample.mean - 100
design.matrix$result[row.i] <- diff
}
#design.matrix
#Question 3
#Plot your aggregate results from question 2
plot(design.matrix$sample.size, design.matrix$result, xlim= c(0,100), ylim= c(-30,30), xlab= "Sample Size", ylab= "Result", pch = 8, col= "pink")

#Question 4
#How many people do you need in a room for the probability to be greater than 0.50 that at least two people in the room have the same birthday? Answer this question using a simulation. For example, if there are 2 people in the room, what is the probability that they have the same birthday. Now what about 3, 4, … 365 people?
#Create the design matrix
design.matrix <- expand.grid(
"people" = 1:365,
simulation = 1:1000,
result = NA
)
#design.matrix
#Loop
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= people.i, replace=T)
}
length(birthdays) != length(unique(birthdays))
## [1] TRUE
N <- 3
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= N, replace=T)
}
length(birthdays) != length(unique(birthdays))
## [1] FALSE
N <- 365
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= N, replace=T)
}
length(birthdays) != length(unique(birthdays))
## [1] TRUE