This analyis investigates what types of storm events are most harmful in the US in terms of deaths and injuries (health), and property and crop damages (economic).

The analysis uses data on storm events as entered by NOAA’s National Weather Service (NWS), available from National Climatic Data Center (http://www.ncdc.noaa.gov/stormevents/details.jsp).

Data Processing

Load packages

library(ggplot2)
library(plyr)
library(reshape2)

Read in data

storm <- read.csv("./repdata-data-StormData.csv.bz2")

Since consistent recording of all event type did not start until 1996 and after, we will look only at entries after that date.

dates <- strptime(storm$BGN_DATE, "%m/%d/%Y %H:%M:%S")  
storm.96 <- storm[dates$year >= 96,]

Since this analysis concerns events causing most damage, we remove all observations with 0 casualties and damage

zeroes <- storm.96$FATALITIES == 0 & storm.96$INJURIES == 0 & 
        storm.96$PROPDMG == 0 & storm.96$CROPDMG == 0  

storm.good <- storm.96[!zeroes,]

There are many inconsistencies in how events have been entered. First convert all event types to upper case.

storm.good$EVTYPE <- as.character( sapply(storm.good$EVTYPE, function(x) {
        return(toupper(as.character(x)))
}))

Group together similar events in to major categories

storm.good$EVTYPE[grep("THUNDER|TSTM", storm.good$EVTYPE)] <- "THUNDER STORM"
storm.good$EVTYPE[grep("SURF", storm.good$EVTYPE)] <- "HIGH SURF"
storm.good$EVTYPE[grep("FLOOD|STORM SURGE", storm.good$EVTYPE)] <- "FLOOD"
storm.good$EVTYPE[grep("FIRE", storm.good$EVTYPE)] <- "FIRE"
storm.good$EVTYPE[grep("FROST|FREEZE", storm.good$EVTYPE)] <- "FROST/FREEZE"
storm.good$EVTYPE[grep("SNOW|WINT|IC.*ROAD", storm.good$EVTYPE)] <- "WINTER WEATHER"
storm.good$EVTYPE[grep("RAIN", storm.good$EVTYPE)] <- "HEAVY RAIN"
storm.good$EVTYPE[grep("GUSTY WIND|HIGH WIND|STRONG WIND|^WIND", storm.good$EVTYPE)] <- "STRONG WIND"
storm.good$EVTYPE[grep("HEAT", storm.good$EVTYPE)] <- "EXCESSIVE HEAT"
storm.good$EVTYPE[grep("HURRICANE|TYPHOON", storm.good$EVTYPE)] <- "HURRICANE (TYPHOON)"
storm.good$EVTYPE[grep("CURRENT", storm.good$EVTYPE)] <- "RIP CURRENT"

For crop and property damage, two exponent columns indicate a multiplier that is to be applied to the damage numbers. First we remove the issue of having blank multipliers and replace with 0.

storm.good[!storm.good$PROPDMGEXP %in% c("K", "M", "B"),]$PROPDMGEXP <- 0
storm.good[!storm.good$CROPDMGEXP %in% c("K", "M", "B"),]$CROPDMGEXP <- 0

Then multiply property and crop damages with exponents and add as new columns as respective totals

expkey <- c("0" = 1, "K" = 1000, "M" = 1E6, "B" = 1E9)
TOT.PROPDMG <-  storm.good$PROPDMG*expkey[as.vector(storm.good$PROPDMGEXP)]
TOT.CROPDMG <-  storm.good$CROPDMG*expkey[as.vector(storm.good$CROPDMGEXP)]
storm.dmg <- mutate(storm.good, TOT.PROPDMG = TOT.PROPDMG, TOT.CROPDMG = TOT.CROPDMG)

We perform a number of structural transformation to the data frame to perpare for further analysis

# Create a data frame that looks at the sum of fatalaties and injuries by event tupe
health.melt <- melt(storm.good, id="EVTYPE",  measure.vars=c("FATALITIES", "INJURIES"))

# List one observation for each evtype for total fatalities, and one for total injuries
health.counts <- ddply(health.melt, .(EVTYPE, variable), summarize, total = sum(value))

# Add upp injuries and deaths for each evtype
health.agg <- aggregate(total ~ EVTYPE, health.counts, sum)



# Break down EVTYPE by total property and crop damage
econ.melt <- melt(storm.dmg, id="EVTYPE",  measure.vars=c("TOT.PROPDMG", "TOT.CROPDMG"))


# Summarize each evtype over prop dmg and crop dmg
econ.counts <- ddply(econ.melt, .(EVTYPE, variable), summarize, total = sum(value))

# Sum up prop and crop dmg for each event type
econ.agg <- aggregate(total ~ EVTYPE, econ.counts, sum)

Results

Across the United States, which types of events are most harmful with respect to population health?

First we limit our analysis to event with 1000 or more casualties

health.agg.sorted <- health.agg[with(health.agg, order(-total, EVTYPE))  ,]
health.types <- health.agg.sorted[health.agg.sorted$total > 1000,]$EVTYPE
health.top <- health.counts[health.counts$EVTYPE %in% health.types, ]
health.top <- health.top[with(health.top, order(-total, EVTYPE)),]
health.top$EVTYPE <- factor(health.top$EVTYPE, levels = health.top$EVTYPE, ordered = T)
health.agg.sorted[health.agg.sorted$total > 1000,]
##                 EVTYPE total
## 73             TORNADO 22178
## 30               FLOOD  9805
## 24      EXCESSIVE HEAT  9719
## 72       THUNDER STORM  5562
## 54           LIGHTNING  4792
## 86      WINTER WEATHER  3022
## 71         STRONG WIND  1884
## 29                FIRE  1545
## 46 HURRICANE (TYPHOON)  1453
## 65         RIP CURRENT  1045

The following graph shows all even types with total casualties over 1000, with color indicating portion of injuries and deaths.

g <- ggplot(health.top, aes(EVTYPE, total))
g + geom_bar(stat = "identity", aes(fill = variable )) +
        theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 10)) +
        labs(title = "Types of events most harmful to population health")

plot of chunk unnamed-chunk-12

The graph shows all even types with total casualties over 1000, with color indicating portion of injuries and deaths.

Across the United States, which types of events have the greatest economic consequences?

Look at the top 5 event types in terms of total damgaes

econ.types <- econ.agg[with(econ.agg, order(-total, EVTYPE)),][1:5,]$EVTYPE
econ.top <- econ.counts[econ.counts$EVTYPE %in% econ.types, ]
econ.top <- econ.top[with(econ.top, order(-total, EVTYPE)),]
econ.top$EVTYPE <- factor(econ.top$EVTYPE, levels = econ.top$EVTYPE, ordered = T)
econ.agg[with(econ.agg, order(-total, EVTYPE)),][1:5,]
##                 EVTYPE     total
## 30               FLOOD 2.139e+11
## 46 HURRICANE (TYPHOON) 8.707e+10
## 73             TORNADO 2.490e+10
## 39                HAIL 1.707e+10
## 19             DROUGHT 1.441e+10
g <- ggplot(econ.top, aes(EVTYPE, total))
g + geom_bar(stat = "identity", aes(fill = variable )) +
        theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 10)) +
labs(title = "Types of events with greatest economic consequences")

plot of chunk unnamed-chunk-14

The graph shows the five top even types in terms of total damage, with colors indicating what portion of damage is property and crop damage.