This analyis investigates what types of storm events are most harmful in the US in terms of deaths and injuries (health), and property and crop damages (economic).
The analysis uses data on storm events as entered by NOAA’s National Weather Service (NWS), available from National Climatic Data Center (http://www.ncdc.noaa.gov/stormevents/details.jsp).
Load packages
library(ggplot2)
library(plyr)
library(reshape2)
Read in data
storm <- read.csv("./repdata-data-StormData.csv.bz2")
Since consistent recording of all event type did not start until 1996 and after, we will look only at entries after that date.
dates <- strptime(storm$BGN_DATE, "%m/%d/%Y %H:%M:%S")
storm.96 <- storm[dates$year >= 96,]
Since this analysis concerns events causing most damage, we remove all observations with 0 casualties and damage
zeroes <- storm.96$FATALITIES == 0 & storm.96$INJURIES == 0 &
storm.96$PROPDMG == 0 & storm.96$CROPDMG == 0
storm.good <- storm.96[!zeroes,]
There are many inconsistencies in how events have been entered. First convert all event types to upper case.
storm.good$EVTYPE <- as.character( sapply(storm.good$EVTYPE, function(x) {
return(toupper(as.character(x)))
}))
Group together similar events in to major categories
storm.good$EVTYPE[grep("THUNDER|TSTM", storm.good$EVTYPE)] <- "THUNDER STORM"
storm.good$EVTYPE[grep("SURF", storm.good$EVTYPE)] <- "HIGH SURF"
storm.good$EVTYPE[grep("FLOOD|STORM SURGE", storm.good$EVTYPE)] <- "FLOOD"
storm.good$EVTYPE[grep("FIRE", storm.good$EVTYPE)] <- "FIRE"
storm.good$EVTYPE[grep("FROST|FREEZE", storm.good$EVTYPE)] <- "FROST/FREEZE"
storm.good$EVTYPE[grep("SNOW|WINT|IC.*ROAD", storm.good$EVTYPE)] <- "WINTER WEATHER"
storm.good$EVTYPE[grep("RAIN", storm.good$EVTYPE)] <- "HEAVY RAIN"
storm.good$EVTYPE[grep("GUSTY WIND|HIGH WIND|STRONG WIND|^WIND", storm.good$EVTYPE)] <- "STRONG WIND"
storm.good$EVTYPE[grep("HEAT", storm.good$EVTYPE)] <- "EXCESSIVE HEAT"
storm.good$EVTYPE[grep("HURRICANE|TYPHOON", storm.good$EVTYPE)] <- "HURRICANE (TYPHOON)"
storm.good$EVTYPE[grep("CURRENT", storm.good$EVTYPE)] <- "RIP CURRENT"
For crop and property damage, two exponent columns indicate a multiplier that is to be applied to the damage numbers. First we remove the issue of having blank multipliers and replace with 0.
storm.good[!storm.good$PROPDMGEXP %in% c("K", "M", "B"),]$PROPDMGEXP <- 0
storm.good[!storm.good$CROPDMGEXP %in% c("K", "M", "B"),]$CROPDMGEXP <- 0
Then multiply property and crop damages with exponents and add as new columns as respective totals
expkey <- c("0" = 1, "K" = 1000, "M" = 1E6, "B" = 1E9)
TOT.PROPDMG <- storm.good$PROPDMG*expkey[as.vector(storm.good$PROPDMGEXP)]
TOT.CROPDMG <- storm.good$CROPDMG*expkey[as.vector(storm.good$CROPDMGEXP)]
storm.dmg <- mutate(storm.good, TOT.PROPDMG = TOT.PROPDMG, TOT.CROPDMG = TOT.CROPDMG)
We perform a number of structural transformation to the data frame to perpare for further analysis
# Create a data frame that looks at the sum of fatalaties and injuries by event tupe
health.melt <- melt(storm.good, id="EVTYPE", measure.vars=c("FATALITIES", "INJURIES"))
# List one observation for each evtype for total fatalities, and one for total injuries
health.counts <- ddply(health.melt, .(EVTYPE, variable), summarize, total = sum(value))
# Add upp injuries and deaths for each evtype
health.agg <- aggregate(total ~ EVTYPE, health.counts, sum)
# Break down EVTYPE by total property and crop damage
econ.melt <- melt(storm.dmg, id="EVTYPE", measure.vars=c("TOT.PROPDMG", "TOT.CROPDMG"))
# Summarize each evtype over prop dmg and crop dmg
econ.counts <- ddply(econ.melt, .(EVTYPE, variable), summarize, total = sum(value))
# Sum up prop and crop dmg for each event type
econ.agg <- aggregate(total ~ EVTYPE, econ.counts, sum)
First we limit our analysis to event with 1000 or more casualties
health.agg.sorted <- health.agg[with(health.agg, order(-total, EVTYPE)) ,]
health.types <- health.agg.sorted[health.agg.sorted$total > 1000,]$EVTYPE
health.top <- health.counts[health.counts$EVTYPE %in% health.types, ]
health.top <- health.top[with(health.top, order(-total, EVTYPE)),]
health.top$EVTYPE <- factor(health.top$EVTYPE, levels = health.top$EVTYPE, ordered = T)
health.agg.sorted[health.agg.sorted$total > 1000,]
## EVTYPE total
## 73 TORNADO 22178
## 30 FLOOD 9805
## 24 EXCESSIVE HEAT 9719
## 72 THUNDER STORM 5562
## 54 LIGHTNING 4792
## 86 WINTER WEATHER 3022
## 71 STRONG WIND 1884
## 29 FIRE 1545
## 46 HURRICANE (TYPHOON) 1453
## 65 RIP CURRENT 1045
The following graph shows all even types with total casualties over 1000, with color indicating portion of injuries and deaths.
g <- ggplot(health.top, aes(EVTYPE, total))
g + geom_bar(stat = "identity", aes(fill = variable )) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 10)) +
labs(title = "Types of events most harmful to population health")
The graph shows all even types with total casualties over 1000, with color indicating portion of injuries and deaths.
Look at the top 5 event types in terms of total damgaes
econ.types <- econ.agg[with(econ.agg, order(-total, EVTYPE)),][1:5,]$EVTYPE
econ.top <- econ.counts[econ.counts$EVTYPE %in% econ.types, ]
econ.top <- econ.top[with(econ.top, order(-total, EVTYPE)),]
econ.top$EVTYPE <- factor(econ.top$EVTYPE, levels = econ.top$EVTYPE, ordered = T)
econ.agg[with(econ.agg, order(-total, EVTYPE)),][1:5,]
## EVTYPE total
## 30 FLOOD 2.139e+11
## 46 HURRICANE (TYPHOON) 8.707e+10
## 73 TORNADO 2.490e+10
## 39 HAIL 1.707e+10
## 19 DROUGHT 1.441e+10
g <- ggplot(econ.top, aes(EVTYPE, total))
g + geom_bar(stat = "identity", aes(fill = variable )) +
theme(axis.text.x = element_text(angle = 30, hjust = 1, size = 10)) +
labs(title = "Types of events with greatest economic consequences")
The graph shows the five top even types in terms of total damage, with colors indicating what portion of damage is property and crop damage.