What is the chance that this year is in a hurricane ‘drought’? We quantify this chance using a Markov Chain in which the annual counts are coded as 1 or 0 depending on whether there was at least one hurricane or not.
Read in the data.
setwd("~/Dropbox/Hurricanes")
US = read.table("US.txt", header = TRUE)
#US = US[US$Year >= 1900 & US$Year < 2009, ]
US$W = as.integer(US$MUS > 0)
#US$W = as.integer(US$FL > 0)
w = US$W
#w = US$W[US$Year >= 1960]
#mean(US$FL[US$Year < 1960]); mean(US$FL[US$Year >= 1960])
w0 = c(NA, w)
w1 = c(w, NA)
w01 = cbind(w0, w1)[2:length(w), ]
t0 = table(w01[, 1], w01[, 2])
t1 = t0/rowSums(t0)
ww = rep(0, 100000)
tp = t1[, 2]
for(i in 2:100000){
ww[i] <- rbinom(1, prob = tp[ww[i - 1] + 1], size = 1)
}
tmp = rle(ww)
tmp2 = tmp$lengths[tmp$values == 0]
csy = sum(tmp2 >= 9)/length(ww) * 100
cy = sum(tmp2[tmp2 >= 9])/length(ww) * 100
print(csy); print(cy)
## [1] 0.34
## [1] 3.592
The first states that there is a 0.34% that any year is the start of a sequence. The second states that there is a 3.59% chance that any year is in a sequence of at least 9 years with no storms.
cy = numeric()
for(i in 5:16){
cy = c(cy, sum(tmp2[tmp2 >= i])/length(ww) * 100)
}
df = data.frame(nY = as.factor(5:16), cy)
library("ggplot2")
ggplot(df, aes(nY, cy)) +
geom_bar(stat = "identity") +
ylab("Chance (%) Any Year Is In a Major Hurricane 'Drought'\n of At Least N Years in the United States sans Hawaii") +
xlab("String of N Years Without a Major U.S. Hurricane")
# ylab("Chance (%) Any Year Is In a String\nof At Least N Years Without A Florida Hurricane") +
# xlab("String of N Years Without a Florida Hurricane")
Run the above with Year < 1960 and Year >= 1960
df2 = df
df2$Period = rep("Before 1960", dim(df)[1])
df$Period = rep("After 1960", dim(df)[1])
df3 = rbind(df2, df)
df3$PeriodF = factor(df3$Period, levels = c("Before 1960", "After 1960"))
ggplot(df3, aes(nY, cy)) +
geom_bar(stat = "identity") +
facet_wrap(~ PeriodF) +
ylab("Chance (%) Any Year Is In a String\nof At Least N Years Without A Florida Hurricane") +
xlab("String of N Years Without a Florida Hurricane")
cy = numeric()
J = 100
for(j in 1:J){
ww = rep(0, 100000)
tp = t1[, 2]
for(i in 2:100000){
ww[i] <- rbinom(1, prob = tp[ww[i - 1] + 1], size = 1)
}
tmp = rle(ww)
tmp2 = tmp$lengths[tmp$values == 0]
for(k in 5:16){
cy = c(cy, sum(tmp2[tmp2 >= k])/length(ww) * 100)
}
}
df = data.frame(j = rep(1:J, each = 12), nY = rep(as.factor(5:16), J), cy)
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df2 = df %>%
group_by(nY) %>%
summarize(cyM = mean(cy),
cySE = sd(cy))
ggplot(df2, aes(nY, cyM)) +
geom_bar(stat = "identity", fill = "blue", alpha = .3) +
geom_errorbar(aes(ymin = cyM - 2 * cySE, ymax = cyM + 2 * cySE), width = .05) +
ylab("Chance (%) Any Year Is In a Major Hurricane 'Drought'\n of At Least N Years in the United States sans Hawaii") +
xlab("'Drought' of N Years Without a Major U.S. Hurricane")