Synopsis

We examine National Weather Service Storm Data to discover what types of extreme weather events are responsible for the most economic damage (measured in property damage and drop damage done), and what types are the most dangerous (measured in fatalities and injuries attributed to the storm). We find that floods are clearly responsible for the most economic damage, followed by hurricanes, tornados, and storm surges. Tornados, meanwhile, are the most dangerous natural weather events, followed by heat, floods, wind, and lightning.

Processing

We first clean the data to prepare for analysis.

We first discard unwanted columns, as well as data which has nonsensical entries in either of the “exponent” columns.

importCols <- rep("NULL", 37)
importCols[c(8,23:28)] <- NA
download.file(url = 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2',
              destfile = "weather_data.csv.bz2", method="curl")
weatherData <- read.csv("weather_data.csv.bz2", colClasses = importCols)
exps <- c('h','H','k','K','m','M','b','B')

cleanData <- weatherData[((weatherData$PROPDMG == 0 | weatherData$PROPDMGEXP %in% exps)
                    & (weatherData$CROPDMG == 0 | weatherData$CROPDMGEXP %in% exps)), ]

multVals <- c(100, 100, 1000, 1000, 1000000, 1000000, 1000000000, 1000000000, 0)
multUnits <- c('h','H','k','K','m','M','b','B','NOMATCH')
multLookup <- cbind(multUnits, multVals)
cleanData$PROPDMG <- as.numeric(cleanData$PROPDMG) *
               multVals[match(cleanData$PROPDMGEXP, multUnits, nomatch = 9)] / 1000
cleanData$CROPDMG <- as.numeric(cleanData$CROPDMG) *
               multVals[match(cleanData$CROPDMGEXP, multUnits, nomatch = 9)] / 1000
cleanData$FATALITIES <- as.numeric(cleanData$FATALITIES)
cleanData$INJURIES <- as.numeric(cleanData$INJURIES)
cleanData$EVTYPE <- toupper(as.character(cleanData$EVTYPE))

cleanData$DMG <- cleanData$PROPDMG + cleanData$CROPDMG

Note that at this point, dollar values are in units of thousands of dollars.

It will be useful for us to combine many event types that mean essentially the same types of events. Note that we do not combine ALL similar event types, as we will throw many out at a later stage. We simply combine similar event types that might end up appearing at the last stage of our analysis.

blizzardType <- c('BLIZZARD')
cleanData$EVTYPE[cleanData$EVTYPE %in% blizzardType] <- 'BLIZZARD'

floodType <- c('COASTAL FLOOD', 'FLASH FLOOD', 'FLASH FLOOD/FLOOD', 'FLASH FLOODING',
               'RIVER FLOOD', 'FLOOD/FLASH FLOOD', 'URBAN/SML STREAM FLD',
               'FLOODING')
cleanData$EVTYPE[cleanData$EVTYPE %in% floodType] <- 'FLOOD'

freezeType <- c('DAMAGING FREEZE', 'FREEZE', 'FROST/FREEZE')
cleanData$EVTYPE[cleanData$EVTYPE %in% freezeType] <- 'FREEZE'

droughtType <- c('DROUGHT')
cleanData$EVTYPE[cleanData$EVTYPE %in% droughtType] <- 'DROUGHT'

heatType <- c('EXCESSIVE HEAT', 'HEAT', 'HEAT WAVE', 'EXTREME HEAT',
              'UNSEASONABLY WARM AND DRY', 'RECORD/EXCESSIVE HEAT',
              'UNSEASONABLY WARM')
cleanData$EVTYPE[cleanData$EVTYPE %in% heatType] <- 'HEAT'

coldType <- c('EXTREME COLD', 'EXTREME COLD/WIND CHILL', 'COLD/WIND CHILL',
              'EXTREME WINDCHILL', 'COLD AND SNOW', 'HYPOTHERMIA/EXPOSURE',
              'LOW TEMPERATURE')
cleanData$EVTYPE[cleanData$EVTYPE %in% coldType] <- 'COLD'

hailType <- c('HAIL', 'HAILSTORM')
cleanData$EVTYPE[cleanData$EVTYPE %in% hailType] <- 'HAIL'

rainType <- c('HEAVY RAIN', 'HEAVY RAIN/SEVERE WEATHER', 'FREEZING RAIN')
cleanData$EVTYPE[cleanData$EVTYPE %in% rainType] <- 'RAIN'

snowType <- c('HEAVY SNOW')
cleanData$EVTYPE[cleanData$EVTYPE %in% snowType] <- 'SNOW'

windType <- c('HIGH WIND', 'HIGH WINDS', 'STRONG WIND', 'THUNDERSTORM WIND',
              'THUNDERSTORM WINDS', 'TSTM WIND', 'TSTM WIND/HAIL',
              'MARINE STRONG WIND', 'MARINE THUNDERSTORM WIND',
              'MARINE TSTM WIND', 'STRONG WINDS')
cleanData$EVTYPE[cleanData$EVTYPE %in% windType] <- 'WIND'

hurricaneType <- c('HURRICANE', 'HURRICANE ERIN', 'HURRICANE OPAL',
                   'HURRICANE/TYPHOON')
cleanData$EVTYPE[cleanData$EVTYPE %in% hurricaneType] <- 'HURRICANE'

iceType <- c('ICE STORM')
cleanData$EVTYPE[cleanData$EVTYPE %in% iceType] <- 'ICE'

landslideType <- c('LANDSLIDE')
cleanData$EVTYPE[cleanData$EVTYPE %in% landslideType] <- 'LANDSLIDE'

lightningType <- c('LIGHTNING')
cleanData$EVTYPE[cleanData$EVTYPE %in% lightningType] <- 'LIGHTNING'

thunderstormType <- c('SEVERE THUNDERSTORM')
cleanData$EVTYPE[cleanData$EVTYPE %in% thunderstormType] <- 'THUNDERSTORM'

surgeType <- c('STORM SURGE', 'STORM SURGE/TIDE')
cleanData$EVTYPE[cleanData$EVTYPE %in% surgeType] <- 'STORM SURGE'

tornadoType <- c('TORNADO', 'TORNADOES, TSTM WIND, HAIL')
cleanData$EVTYPE[cleanData$EVTYPE %in% tornadoType] <- 'TORNADO'

tropicalType <- c('TROPICAL STORM', 'TROPICAL STORM GORDON')
cleanData$EVTYPE[cleanData$EVTYPE %in% tropicalType] <- 'TROPICAL STORM'

typhoonType <- c('TYPHOON')
cleanData$EVTYPE[cleanData$EVTYPE %in% typhoonType] <- 'TYPHOON'

wildfireType <- c('WILDFIRE', 'WILD FIRES', 'WILD/FOREST FIRE')
cleanData$EVTYPE[cleanData$EVTYPE %in% wildfireType] <- 'WILDFIRE'

winterstormType <- c('WINTER STORM', 'WINTER WEATHER', 'WINTER WEATHER/MIX',
                     'WINTER STORMS')
cleanData$EVTYPE[cleanData$EVTYPE %in% winterstormType] <- 'WINTER STORM'

fogType <- c('DENSE FOG', 'FOG')
cleanData$EVTYPE[cleanData$EVTYPE %in% fogType] <- 'FOG'

ripType <- c('RIP CURRENT', 'RIP CURRENTS')
cleanData$EVTYPE[cleanData$EVTYPE %in% ripType] <- 'RIP CURRENTS'

surfType <- c('HEAVY SURF/HIGH SURF', 'HIGH SURF', 'HEAVY SURF')
cleanData$EVTYPE[cleanData$EVTYPE %in% surfType] <- 'HIGH SURF'

Results

Economic Costs of Weather Events

For damage, we will sum up property damage and crop damage and simply compare the total amount of damage caused by each event type.

dmgData <- aggregate(cleanData$DMG, by=list(cleanData$EVTYPE), FUN=sum, na.rm=TRUE)
names(dmgData) <- c('EVTYPE', 'DMG')

dmgData$DMG <- as.numeric(dmgData$DMG)

We are interested in the events that cause the most damage. Simply finding the max might be good enough, but looking at the different event types shows that the same types of event are sometimes coded differently due to typos or simply variation in style. We will therefore try to add together events that are actually essentially the same.

Instead of examining every event type, we discard those types which have done total damage less than one one-thousandth of the maximum. These event types will not contribute enough to change the final analysis, as we shall see.

maxDmgData <- dmgData[dmgData$DMG >= max(dmgData$DMG) / 1000, ]

Before we examine the results, we add the damage that we just dropped, all added together under the “DROPPED” label, to make sure that we did not remove pertinent information.

missingEventsDmg <- sum(dmgData$DMG[dmgData$DMG < max(dmgData$DMG) / 1000])
maxDmgData <- rbind(maxDmgData, c('DROPPED', missingEventsDmg))

maxDmgSortedData <- maxDmgData[order(as.numeric(maxDmgData$DMG), 
                                               decreasing = TRUE),]
maxDmgSortedData
##             EVTYPE          DMG
## 133          FLOOD 179337997.82
## 329      HURRICANE  90109897.81
## 707        TORNADO  58904440.59
## 553    STORM SURGE     47965579
## 186           HAIL  18974211.67
## 820           WIND  17787622.56
## 71         DROUGHT     15018672
## 339            ICE   8979692.81
## 817       WILDFIRE   8793313.13
## 720 TROPICAL STORM   8383236.55
## 832   WINTER STORM   6758679.25
## 443           RAIN   3941844.44
## 22         DROPPED   2238313.77
## 150         FREEZE      1865826
## 48            COLD    1410307.4
## 630   THUNDERSTORM   1207480.55
## 517           SNOW   1082079.79
## 374      LIGHTNING    940751.37
## 216           HEAT    924549.25
## 20        BLIZZARD    771273.95
## 749        TYPHOON       601055
## 360      LANDSLIDE       344613

Note that the total amount of damage done by dropped events is much smaller than the damage done by the most economically impactful events, and therefore will not affect the analysis of which events cause the most economic damage.

We now plot the damage done by event types that do the most damage. Flooding clearly causes the most damage, with hurricanes, tornados, and storm surges following. Beyond those four types of events, relative economic impacts of weather types becomes less clear from our analysis.

par(mar=c(10,8,4,2))
barplot(as.numeric(maxDmgSortedData$DMG[1:15])/1000000, las = 2, ps = 7,
        names.arg = maxDmgSortedData$EVTYPE[1:15], ylim = c(0,200),
        main = "Economic Damage done by Weather Events",
        ylab = "Economic Damage (in billions of dollars)")

plot of chunk unnamed-chunk-5

Human Costs of Weather Events

We cannot justify simply summing deaths and injuries caused by extremem weather events, so we will analyze these statistics separately.

fatalityData <- aggregate(cleanData$FATALITIES, by=list(cleanData$EVTYPE), FUN=sum,
                          na.rm=TRUE)
injuryData <- aggregate(cleanData$INJURIES, by=list(cleanData$EVTYPE), FUN=sum,
                        na.rm=TRUE)
names(fatalityData) <- c('EVTYPE', 'FATALITIES')
names(injuryData) <- c('EVTYPE', 'INJURIES')

fatalityData$FATALITIES <- as.numeric(fatalityData$FATALITIES)
injuryData$INJURIES <- as.numeric(injuryData$INJURIES)

As with economic information, we are interested in which type of event is most dangerous to our health. We can therefore discard events as in the analysis of the economic question.

maxInjData <- injuryData[injuryData$INJURIES >= max(injuryData$INJURIES) / 1000, ]
maxFatData <- fatalityData[fatalityData$FATALITIES >=
                           max(fatalityData$FATALITIES) / 1000, ]
missingEventsInj <- sum(injuryData$INJURIES[injuryData$INJURIES <
                                            max(injuryData$INJURIES) / 1000])
missingEventsFat <- sum(fatalityData$FATALITIES[fatalityData$FATALITIES <
                                                max(fatalityData$FATALITIES) / 1000])

maxInjData <- rbind(maxInjData, c('DROPPED', missingEventsInj))
maxFatData <- rbind(maxFatData, c('DROPPED', missingEventsFat))

maxInjSortedData <- maxInjData[order(as.numeric(maxInjData$INJURIES), 
                                               decreasing = TRUE),]
maxFatSortedData <- maxFatData[order(as.numeric(maxFatData$FATALITIES), 
                                               decreasing = TRUE),]
maxInjSortedData
##             EVTYPE INJURIES
## 707        TORNADO    91322
## 820           WIND    11327
## 216           HEAT     9176
## 133          FLOOD     8674
## 374      LIGHTNING     5228
## 339            ICE     2112
## 832   WINTER STORM     1808
## 817       WILDFIRE     1606
## 186           HAIL     1360
## 329      HURRICANE     1323
## 147            FOG     1076
## 517           SNOW     1052
## 23         DROPPED      924
## 20        BLIZZARD      805
## 481   RIP CURRENTS      529
## 93      DUST STORM      440
## 720 TROPICAL STORM      383
## 48            COLD      320
## 443           RAIN      274
## 277      HIGH SURF      244
## 169          GLAZE      216
## 11       AVALANCHE      170
## 747        TSUNAMI      129
maxFatSortedData
##             EVTYPE FATALITIES
## 707        TORNADO       5655
## 216           HEAT       3165
## 133          FLOOD       1537
## 820           WIND       1154
## 374      LIGHTNING        816
## 481   RIP CURRENTS        572
## 48            COLD        465
## 832   WINTER STORM        277
## 11       AVALANCHE        224
## 27         DROPPED        166
## 277      HIGH SURF        154
## 329      HURRICANE        132
## 517           SNOW        130
## 443           RAIN        105
## 20        BLIZZARD        101
## 339            ICE         95
## 817       WILDFIRE         90
## 147            FOG         80
## 720 TROPICAL STORM         66
## 360      LANDSLIDE         38
## 747        TSUNAMI         33
## 553    STORM SURGE         24
## 93      DUST STORM         22
## 186           HAIL         15
## 489     ROUGH SEAS          8
## 169          GLAZE          7
## 402  MARINE MISHAP          7
par(mar=c(10,8,4,2))
barplot(as.numeric(maxInjSortedData$INJURIES[1:15]), las = 2, ps = 7,
        names.arg = maxInjSortedData$EVTYPE[1:15],
        main = "Injuries by Weather Events", ylim = c(0,100000),
        ylab = "Injuries")

plot of chunk unnamed-chunk-9

barplot(as.numeric(maxFatSortedData$FATALITIES[1:15]), las = 2, ps = 7,
        names.arg = maxFatSortedData$EVTYPE[1:15],
        main = "Fatalities by Weather Events", ylim = c(0,6000),
        ylab = "Fatalities")

plot of chunk unnamed-chunk-9 We see from the above graphs that tornados are by far the most dangerous natural weather events, followed by heat, floods, wind, and lightning, in approximately that order (that order does not quite match the injury statistics, those four events cause similar numbers of injuries, and as fatalities are more serious, the fatality statistics ought to be given more weight in the analysis). After these five types of events, analysis becomes difficult with our methods.