WPA #9

pirates <- read.table("http://nathanieldphillips.com/wp-content/uploads/2015/05/pirate_survey_noerrors.txt", sep = "\t", header = T, stringsAsFactors = F)
#pirates

#Question 1 
#Create the following histograms of the number of tattoos pirates have separately for each favorite pirate. Add appropriate labels for each plot. Hint: Use unique(pirates$favorite.pirate) as your index values. Additionally, before creating the loop, set up a 2 x 3 plotting region using par(mfrow = c(2, 3))

par(mfrow = c(2, 3))
for(favorite.pirate.i in unique(pirates$favorite.pirate))
{data.temp <- subset(pirates,favorite.pirate == favorite.pirate.i)
hist(data.temp$tattoos,
main = favorite.pirate.i, xlab = "tattoos")
}

#Question 2
#The law of large numbers says that the larger your sample size, the closer your sample statistic will be to the true population value. Let’s test this by conducting a simulation. For sample sizes of 1 to 100, calculate the average difference between the sample mean and the population mean from a Normal distribution with mean 100 and standard deviation 10.

#Step 1: Create the design matrix
#design.matrix <- expand.grid("sample" = rnorm(n= 100,mean= 100, sd=10),"population" = sample(x= rnorm(n= 100,mean= 100, sd=10), size= 1, replace= T))

design.matrix <- expand.grid(
  "sample.size" = 1:100,
  "simulation" = 1:100,
  result = NA
  )
#design.matrix
#head(design.matrix)
#View(design.matrix)

for(row.i in 1:nrow(design.matrix)) {
sample.size.i <- design.matrix$sample.size[row.i]  
data <- rnorm(n= sample.size.i,mean= 100, sd=10)
sample.mean <- mean(data)
diff <- sample.mean - 100
design.matrix$result[row.i] <- diff
}
#design.matrix

#Question 3 
#Plot your aggregate results from question 2
plot(design.matrix$sample.size, design.matrix$result, xlim= c(0,100), ylim= c(-30,30), xlab= "Sample Size", ylab= "Result", pch = 8, col= "pink")

#Question 4
#How many people do you need in a room for the probability to be greater than 0.50 that at least two people in the room have the same birthday? Answer this question using a simulation. For example, if there are 2 people in the room, what is the probability that they have the same birthday. Now what about 3, 4, … 365 people?

#Create the design matrix
design.matrix <- expand.grid(
  "people" = 1:365,
  simulation = 1:1000,
  result = NA
  )
#design.matrix

#Loop
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= people.i, replace=T)
}
length(birthdays) != length(unique(birthdays))

## [1] TRUE

N <- 3
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= N, replace=T)
}
length(birthdays) != length(unique(birthdays))

## [1] FALSE

N <- 365
for(row.i in 1:nrow(design.matrix)) {
people.i <- design.matrix$people[row.i]
birthdays <- sample(x= 1:365, size= N, replace=T)
}
length(birthdays) != length(unique(birthdays))

## [1] TRUE

WPA #9

Lea Riegler

Juli 2015