Using the binomial distribution, determine if the weather during the week of your birth was more (or less) extreme than expected.

  1. Find a data source that provides weather data where you were born for your birth date.
weatherData <- read.csv("birth week weather data.csv")

#cleaning up the data columns 

weatherData <- weatherData[c(1:9)]

#splitting up the data per year 

year <- weatherData$Year

birthYear <- subset(weatherData, year == 2001)
yearPlusOne <- subset(weatherData, year == 2002)
yearPlusTwo <- subset(weatherData, year == 2003)
yearPlusThree <- subset(weatherData, year == 2004)
yearPlusFour <- subset(weatherData, year == 2005)
yearPlusFive <- subset(weatherData, year == 2006)
  1. Determine the probability of a weather statistic for that week, e.g., how many days of rain. If it rained 3 days that week you would say 3/7 or 43% rain for the week. This is the probability (p) of rain that we care about.
#I will be calculating the probability of the avg wind speeds of the day to be more than 4 MPH 


# 2001

countBirthYear <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(birthYear$Avg.Max.Wind..MPH.[i] > 5 ){
    countBirthYear <- countBirthYear + 1
  }
}

p0 <- countBirthYear/7
  1. Using historical data for 5 other years, what are the probabilities of weather the week of your birth in those years as well. Going forward n = 5.
n <- 5

# 2002
countYearPlusOne <- 0
for(i in 1:7){
  if(yearPlusOne$Avg.Max.Wind..MPH.[i] < 5 ){
    countYearPlusOne <- countYearPlusOne + 1
  }
}
p1 <- countYearPlusOne/7


# 2003
countYearPlusTwo <- 0
for(i in 1:7){
  if(yearPlusTwo$Avg.Max.Wind..MPH.[i] > 5 ){
    countYearPlusTwo <- countYearPlusTwo + 1
  }
}
p2 <- countYearPlusTwo/7


# 2004
countYearPlusThree <- 0
for(i in 1:7){
  if(yearPlusThree$Avg.Max.Wind..MPH.[i] > 5 ){
    countYearPlusThree <- countYearPlusThree + 1
  }
}
p3 <- countYearPlusThree/7


# 2005
countYearPlusFour <- 0
for(i in 1:7){
  if(yearPlusFour$Avg.Max.Wind..MPH.[i] > 5 ){
    countYearPlusFour <- countYearPlusFour + 1
  }
}
p4 <- countYearPlusFour/7


# 2006

countYearPlusFive <- 0
for(i in 1:7){
  if(yearPlusFive$Avg.Max.Wind..MPH.[i] > 5 ){
    countYearPlusFive <- countYearPlusFive + 1
  }
}
p5 <- countYearPlusFive/7
  1. Turn these into your data - e.g., for each week, determine if there was more or less rain than in the week of your actual birth.
probabilities <- c(p1,p2,p3,p4,p5)

counts <- c(countYearPlusOne, countYearPlusTwo, countYearPlusThree, countYearPlusFour, countYearPlusFive)
more <- 0

which <- c()

for (i in 1:5){
  if(countBirthYear >= counts[i]){
    more <- more + 1
    which <- append(which, i+2001)
  }
}

cat("The years that had higher wind speeds than my birth year were" , which[1] , "and", which[2])
## The years that had higher wind speeds than my birth year were 2002 and 2004
  1. We want to know if in the year of your birth there was an unusual amount of weather that week in comparison to the previous 5 years. Therefore, count how many weeks out of 5 there was weather that met or exceeded the probability in the year you were born. This becomes k.
k <- 0

for (i in 1:5){
  
  if(counts[i] >0){
    k <- k + 1
  }
}
  1. Use the binomial formula to determine the likelihood of the weather during the week you were born compared to the 5 year historical data.
k_values <- c()
dbs <- c()


for (i in 1:n+1){
  k_values[i] <- i-1
  dbs[i] <- dbinom(i-1, n, p0)
  
}

bd <- dbinom(k, n, p0)

plot(                         
  type = "b",
  k_values, dbs,
  main="Binomial distribution for the likelihood of wind speeds more than 4mph",
  xlab = "k values",
  ylab = "Binomial probabilities"
)

  1. Repeat this for a celebrity, another family member, or a historical figure.
# Suzanne Collins, the author of the Hunger Games series
#Born in Hartford, CT. August 10, 1962

#WIND SPEEDS HIGHER THAN 7 PMH



collinsWeather <- read.csv("collins weather data.csv")


#cleaning up the data columns 

collinsWeather <- collinsWeather[c(1:9)]

#splitting up the data per year 

collinsYears <- collinsWeather$Year

collinsbirthYear <- subset(collinsWeather, collinsYears == 1962)
collinsyearPlusOne <- subset(collinsWeather, collinsYears == 1963)
collinsyearPlusTwo <- subset(collinsWeather, collinsYears == 1964)
collinsyearPlusThree <- subset(collinsWeather, collinsYears == 1965)
collinsyearPlusFour <- subset(collinsWeather, collinsYears == 1966)
collinsyearPlusFive <- subset(collinsWeather, collinsYears == 1967)




# 1962

collinsBirthCount<- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsbirthYear$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsBirthCount <- collinsBirthCount + 1
  }
}

CP0 <- collinsBirthCount/7

# 1963

collinsCountOne <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsyearPlusOne$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsCountOne <- collinsCountOne + 1
  }
}

CP1 <- collinsCountOne/7

# 1964

collinsCountTwo <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsyearPlusTwo$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsCountTwo <- collinsCountTwo + 1
  }
}

CP2 <- collinsCountTwo/7

# 1965

collinsCountThree <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsyearPlusThree$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsCountThree <- collinsCountThree + 1
  }
}

CP3 <- collinsCountThree/7

# 1966

collinsCountFour <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsyearPlusFour$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsCountFour <- collinsCountFour + 1
  }
}

CP4 <- collinsCountFour/7

# 1967

collinsCountFive <- 0
 #using 7 as length of these subsets because a week has 7 days

for(i in 1:7){
  if(collinsyearPlusFive$Avg.Wind.Speed..MPH.[i] > 8 ){
    collinsCountFive <- collinsCountFive + 1
  }
}

CP5 <- collinsCountFive/7




collinProbabilities <- c(CP1,CP2,CP3,CP4,CP5)
collinsCounts <- c(collinsCountOne, collinsCountTwo, collinsCountThree, collinsCountFour, collinsCountFive)

collinsK <- 0

for (i in 1:5){
  
  if(collinsCounts[i] >0){
    collinsK <- collinsK + 1
  }
}

collinsKValues <- c()
collinsDBS <- c()


for (i in 1:n+1){
  collinsKValues[i] <- i-1
  collinsDBS[i] <- dbinom(i-1, n, CP0 )
  
}


plot(                         
  type = "b",
  collinsKValues, collinsDBS,
  main="Binomial distribution for the likelihood of wind speeds more than 7 mph (Suzanne Collins Birth Place",
  xlab = "k values",
  ylab = "Binomial probabilities"
)

  1. Provide your results in a KNIT file.