Using the binomial distribution, determine if the weather during the week of your birth was more (or less) extreme than expected.
weatherData <- read.csv("birth week weather data.csv")
#cleaning up the data columns
weatherData <- weatherData[c(1:9)]
#splitting up the data per year
year <- weatherData$Year
birthYear <- subset(weatherData, year == 2001)
yearPlusOne <- subset(weatherData, year == 2002)
yearPlusTwo <- subset(weatherData, year == 2003)
yearPlusThree <- subset(weatherData, year == 2004)
yearPlusFour <- subset(weatherData, year == 2005)
yearPlusFive <- subset(weatherData, year == 2006)
#I will be calculating the probability of the avg wind speeds of the day to be more than 4 MPH
# 2001
countBirthYear <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(birthYear$Avg.Max.Wind..MPH.[i] > 5 ){
countBirthYear <- countBirthYear + 1
}
}
p0 <- countBirthYear/7
n <- 5
# 2002
countYearPlusOne <- 0
for(i in 1:7){
if(yearPlusOne$Avg.Max.Wind..MPH.[i] < 5 ){
countYearPlusOne <- countYearPlusOne + 1
}
}
p1 <- countYearPlusOne/7
# 2003
countYearPlusTwo <- 0
for(i in 1:7){
if(yearPlusTwo$Avg.Max.Wind..MPH.[i] > 5 ){
countYearPlusTwo <- countYearPlusTwo + 1
}
}
p2 <- countYearPlusTwo/7
# 2004
countYearPlusThree <- 0
for(i in 1:7){
if(yearPlusThree$Avg.Max.Wind..MPH.[i] > 5 ){
countYearPlusThree <- countYearPlusThree + 1
}
}
p3 <- countYearPlusThree/7
# 2005
countYearPlusFour <- 0
for(i in 1:7){
if(yearPlusFour$Avg.Max.Wind..MPH.[i] > 5 ){
countYearPlusFour <- countYearPlusFour + 1
}
}
p4 <- countYearPlusFour/7
# 2006
countYearPlusFive <- 0
for(i in 1:7){
if(yearPlusFive$Avg.Max.Wind..MPH.[i] > 5 ){
countYearPlusFive <- countYearPlusFive + 1
}
}
p5 <- countYearPlusFive/7
probabilities <- c(p1,p2,p3,p4,p5)
counts <- c(countYearPlusOne, countYearPlusTwo, countYearPlusThree, countYearPlusFour, countYearPlusFive)
more <- 0
which <- c()
for (i in 1:5){
if(countBirthYear >= counts[i]){
more <- more + 1
which <- append(which, i+2001)
}
}
cat("The years that had higher wind speeds than my birth year were" , which[1] , "and", which[2])
## The years that had higher wind speeds than my birth year were 2002 and 2004
k <- 0
for (i in 1:5){
if(counts[i] >0){
k <- k + 1
}
}
k_values <- c()
dbs <- c()
for (i in 1:n+1){
k_values[i] <- i-1
dbs[i] <- dbinom(i-1, n, p0)
}
bd <- dbinom(k, n, p0)
plot(
type = "b",
k_values, dbs,
main="Binomial distribution for the likelihood of wind speeds more than 4mph",
xlab = "k values",
ylab = "Binomial probabilities"
)
# Suzanne Collins, the author of the Hunger Games series
#Born in Hartford, CT. August 10, 1962
#WIND SPEEDS HIGHER THAN 7 PMH
collinsWeather <- read.csv("collins weather data.csv")
#cleaning up the data columns
collinsWeather <- collinsWeather[c(1:9)]
#splitting up the data per year
collinsYears <- collinsWeather$Year
collinsbirthYear <- subset(collinsWeather, collinsYears == 1962)
collinsyearPlusOne <- subset(collinsWeather, collinsYears == 1963)
collinsyearPlusTwo <- subset(collinsWeather, collinsYears == 1964)
collinsyearPlusThree <- subset(collinsWeather, collinsYears == 1965)
collinsyearPlusFour <- subset(collinsWeather, collinsYears == 1966)
collinsyearPlusFive <- subset(collinsWeather, collinsYears == 1967)
# 1962
collinsBirthCount<- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsbirthYear$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsBirthCount <- collinsBirthCount + 1
}
}
CP0 <- collinsBirthCount/7
# 1963
collinsCountOne <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsyearPlusOne$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsCountOne <- collinsCountOne + 1
}
}
CP1 <- collinsCountOne/7
# 1964
collinsCountTwo <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsyearPlusTwo$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsCountTwo <- collinsCountTwo + 1
}
}
CP2 <- collinsCountTwo/7
# 1965
collinsCountThree <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsyearPlusThree$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsCountThree <- collinsCountThree + 1
}
}
CP3 <- collinsCountThree/7
# 1966
collinsCountFour <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsyearPlusFour$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsCountFour <- collinsCountFour + 1
}
}
CP4 <- collinsCountFour/7
# 1967
collinsCountFive <- 0
#using 7 as length of these subsets because a week has 7 days
for(i in 1:7){
if(collinsyearPlusFive$Avg.Wind.Speed..MPH.[i] > 8 ){
collinsCountFive <- collinsCountFive + 1
}
}
CP5 <- collinsCountFive/7
collinProbabilities <- c(CP1,CP2,CP3,CP4,CP5)
collinsCounts <- c(collinsCountOne, collinsCountTwo, collinsCountThree, collinsCountFour, collinsCountFive)
collinsK <- 0
for (i in 1:5){
if(collinsCounts[i] >0){
collinsK <- collinsK + 1
}
}
collinsKValues <- c()
collinsDBS <- c()
for (i in 1:n+1){
collinsKValues[i] <- i-1
collinsDBS[i] <- dbinom(i-1, n, CP0 )
}
plot(
type = "b",
collinsKValues, collinsDBS,
main="Binomial distribution for the likelihood of wind speeds more than 7 mph (Suzanne Collins Birth Place",
xlab = "k values",
ylab = "Binomial probabilities"
)