Climatic events in the US have severe economic and health-related consequences. This report identifies those events associated with the highest levels of damage to persons and property nationally.
Hurricanes/Typhoons were found to be the costliest event overall in terms of property damage. Storms, floods and tornados also resulted in high costs. Together these events accounted for 78% of all financial costs.
require(dplyr)
require(ggplot2)
require(ggthemes)
require(reshape2)
require(lubridate)
require(readr)
#download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
#"StormData.csv.bz2")
#dat <- read_csv("StormData.csv.bz2")
load("StormData.RData")
str(dat)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
The sparsity of earlier records suggests that many events went unrecorded in early years. This potentially makes comparisons between event types since certain types of events may have gone unreported in years with fewer records. From 1973 onward the number of events per year increase exponentially. For that reason, this year has been chosen as the starting point of the analysis. Data recorded before 1973 is disregarded. The red vertical line in the histogram above marks 1973.
dat <- dat[dat$EventYear >= 1973, ]
One outlier was deemed to be an error: The January 1st 2006 flood in Napa county (ref 605943) was reported to have cost $115B. According to public records, the estimated property damage did not exceed $300M (source: http://pubs.usgs.gov/of/2006/1182/pdf/ofr2006-1182.pdf). The number has therefore been revised to $115M.
dat[dat$REFNUM==605943, "PROPDMGEXP"] <- "M"
propMultiplier = sapply(dat$PROPDMGEXP, function(x) switch(x, "K"=0.000001, "M"=0.001, "B"=1, 0))
dat$propCost <- dat$PROPDMG * unlist(propMultiplier, use.names = FALSE)
cropMultiplier = sapply(dat$CROPDMGEXP, function(x) switch(x, "K"=0.000001, "M"=0.001, "B"=1, 0))
dat$cropCost <- dat$CROPDMG * unlist(cropMultiplier, use.names = FALSE)
dat$totalCost <- dat$propCost + dat$cropCost
categoriseEvent <- function(event) {
if (grepl("*HURRICANE*|*TYPHOON*", event, ignore.case = TRUE)) return("HURRICANE-TYPHOON")
if (grepl("*TORNADO*", event, ignore.case = TRUE)) return("TORNADO")
if (grepl("*WIND*", event, ignore.case = TRUE)) return("WIND")
if (grepl("*STORM*", event, ignore.case = TRUE)) return("STORM")
if (grepl("*FLOOD*", event, ignore.case = TRUE)) return("FLOOD")
if (grepl("*RAIN*", event, ignore.case = TRUE)) return("RAIN")
if (grepl("*RIP CURRENT*", event, ignore.case = TRUE)) return("RIP CURRENT")
if (grepl("*WARM*|*HEAT*", event, ignore.case = TRUE)) return("WARM-HEAT")
if (grepl("*COLD*|*FREEZE*", event, ignore.case = TRUE)) return("COLD-FREEZE")
#If none of the above, return the original event type
return(event)
}
dat$category <- sapply(dat$EVTYPE, function(x) categoriseEvent(x))
#Dollars damage
CostSummary <- dat %>%
group_by(category) %>%
summarise(EventCount = n(), TotalCost = sum(totalCost),
PropCost = sum(propCost), CropCost = sum(cropCost)) %>%
filter(TotalCost >= 0.01) %>%
arrange(desc(TotalCost)) %>%
mutate(CumCost = cumsum(TotalCost), CumPct = CumCost/sum(TotalCost)) %>%
#Calculations complete, now fix formatting:
mutate(TotalCost = round(TotalCost, 1), CumCost = round(CumCost),
PropCost = round(PropCost, 1), CropCost = round(CropCost, 1),
CumPct = paste(round(CumPct, 2) * 100, "%")) %>%
head(n=15) %>%
data.frame()
#Prep data for plot
plotData <- melt(CostSummary, id.vars = "category", measure.vars = c("PropCost", "CropCost"))
plotData$category <- reorder(plotData$category, plotData$value)
#Generate Plot
#g <- ggplot(CostSummary, aes(x=reorder(category, TotalCost), y=TotalCost))
g <- ggplot(plotData, aes(x=category, y=value, fill=variable))
g <- g + geom_bar(stat = "identity") + coord_flip()
g <- g + labs(y="Total Cost USD Billions", x="Event Type")
g <- g + ggtitle("US Top 15 most costly\nclimatic events 1973-2011")
g <- g + theme_economist()
g <- g + scale_y_continuous(expand = c(0, 0))
g <- g + scale_fill_economist(labels = c("Property", "Crops"))
g <- g + guides(fill=guide_legend(title="Cost type"))
print(g)
CostSummary
## category EventCount TotalCost PropCost CropCost CumCost CumPct
## 1 HURRICANE-TYPHOON 299 90.8 85.3 5.5 91 26 %
## 2 STORM 3691 66.8 61.1 5.7 158 44 %
## 3 FLOOD 82712 64.9 52.6 12.3 223 63 %
## 4 TORNADO 45755 52.7 52.3 0.4 275 78 %
## 5 WIND 366959 24.9 22.8 2.2 300 85 %
## 6 HAIL 276163 18.8 15.7 3.0 319 90 %
## 7 DROUGHT 2488 15.0 1.0 14.0 334 94 %
## 8 WILDFIRE 2761 5.1 4.8 0.3 339 95 %
## 9 RAIN 12196 4.1 3.2 0.8 343 97 %
## 10 COLD-FREEZE 2424 3.4 0.1 3.3 347 98 %
## 11 WILD/FOREST FIRE 1457 3.1 3.0 0.1 350 98 %
## 12 WARM-HEAT 19115 2.0 1.0 1.0 352 99 %
## 13 LIGHTNING 15754 0.9 0.9 0.0 353 99 %
## 14 BLIZZARD 2719 0.8 0.7 0.1 353 100 %
## 15 WILD FIRES 4 0.6 0.6 0.0 354 100 %
The table above shows which events caused the highest economic cost. The CumPct column shows the cumulative total cost caused by events.
#Health cost
HealthSummary <- dat %>%
group_by(category) %>%
summarise(EventCount = n(), Fatalities = sum(FATALITIES), Injuries = sum(INJURIES)) %>%
filter(Fatalities + Injuries >= 1) %>%
arrange(desc(Fatalities)) %>%
mutate(CumFatalities = cumsum(Fatalities), CumPct = CumFatalities/sum(Fatalities)) %>%
#Calculations complete, now fix formatting:
mutate(CumPct = paste(round(CumPct, 2) * 100, "%")) %>%
head(n=15) %>%
data.frame()
#Prep data for plot
plotData <- melt(HealthSummary, id.vars = "category", measure.vars = c("Fatalities", "Injuries"))
plotData$category <- reorder(plotData$category, plotData$value)
#Generate plot
g <- ggplot(plotData, aes(x=category, y=value, fill=variable))
g <- g + geom_bar(stat = "identity") + coord_flip()
g <- g + labs(y="Number of people", x="Event Type")
g <- g + ggtitle("US Top 15 most deadly\nclimatic events 1973-2011")
g <- g + theme_economist()
g <- g + scale_y_continuous(expand = c(0, 0))
g <- g + scale_fill_economist()
g <- g + guides(fill=guide_legend(title="Incident type"))
print(g)
HealthSummary
## category EventCount Fatalities Injuries CumFatalities CumPct
## 1 WARM-HEAT 19115 3358 10365 3358 27 %
## 2 TORNADO 45755 3041 54618 6399 51 %
## 3 WIND 366959 1704 13466 8103 65 %
## 4 FLOOD 82712 1525 8602 9628 77 %
## 5 LIGHTNING 15754 816 5230 10444 83 %
## 6 RIP CURRENT 777 577 529 11021 88 %
## 7 COLD-FREEZE 2424 234 295 11255 90 %
## 8 AVALANCHE 386 224 170 11479 92 %
## 9 STORM 3691 206 2899 11685 93 %
## 10 HURRICANE-TYPHOON 299 135 1333 11820 94 %
## 11 RAIN 12196 112 305 11932 95 %
## 12 BLIZZARD 2719 101 805 12033 96 %
## 13 HIGH SURF 725 101 152 12134 97 %
## 14 WILDFIRE 2761 75 911 12209 97 %
## 15 FOG 538 62 734 12271 98 %
The table above shows which events caused the highest number of fatalities. The CumPct column shows the cumulative fatalities from events.