Title

Top 10 Health and Economic Impacts of Storm and Severe Weather Events, in the USA, from 1950 to 2011

Synopsis

This report presents the top ten (10) health and economic impacts of storm and severe weather events, in the USA, during the period 1950 to 2011. Health impacts are measured as the total number of injuries and fatalities by event, and economic impact is presented as the total cost of property and crop damages by event. The data comes from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

The following preprocessing steps were employed:-

Event label preprocessing: Event labels are entered as free text and the same event is described by multiple labels. Additionally both upper and lower case letters are used to create lables.

Property and crop damage exponent transformation: The damages exponent fields - (PROPDMGEXP,CROPDMGEXP) - are character fields that catagorizes the magnitude of damages credited to an event. The exponent rule takes the following format:-

A transformation will be done to convert this exponent into a numeric value for calculation purposes. However, some of the categories in the data are invalid as per the above conversion rule. Records for these invalid categories will be excluded from the analysis.

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","stormdata.bz2")

storms <- read.csv(bzfile("stormdata.bz2"))

data <- subset(storms, 
               storms$INJURIES > 0 | storms$FATALITIES > 0 | storms$PROPDMG > 0 | storms$CROPDMG > 0,
               select = c(EVTYPE,INJURIES,FATALITIES,PROPDMG,PROPDMGEXP, CROPDMG, CROPDMGEXP))

data$EVTYPE <- toupper(data$EVTYPE)
data$PROPDMGEXP <- toupper(data$PROPDMGEXP)
data$CROPDMGEXP <- toupper(data$CROPDMGEXP)

# Build the phrase substitution list, where we replace :-
# word or pharse on the old list with 
# word or pharse on the new list
x <- data.frame(
    old = c("^AVALANCE$"), 
    new = c("AVALANCHE"), 
    stringsAsFactors = FALSE )

x[nrow(x)+1,1] <- c("^COASTAL FLOODING/EROSION$|^COASTAL FLOODING$|^COASTAL FLOOD$")
x[nrow(x)  ,2] <- c("COASTAL FLOOD/EROSION")

x[nrow(x)+1,1] <- c("^SNOW/ BITTER COLD$")
x[nrow(x)  ,2] <- c("COLD AND SNOW")

x[nrow(x)+1,1] <- c("^COLD$|^COLD TEMPERATURE$|^COLD WAVE$|^COLD WEATHER$|^COLD/WINDS$|^EXTENDED COLD$|^EXTREME COLD$|^EXTREME COLD/WIND CHILL$|^HIGH WINDS/COLD$|^RECORD COLD$|^UNSEASONABLY COLD$|^EXTREME WINDCHILL$|^LOW TEMPERATURE$")
x[nrow(x)  ,2] <- c("COLD/WIND CHILL")

x[nrow(x)+1,1] <- c("^COASTALSTORM$")
x[nrow(x)  ,2] <- c("COASTAL STORM")

x[nrow(x)+1,1] <- c("^LANDSLIDES$|^LANDSLIDE$|^MUDSLIDE$|^MUDSLIDES$")
x[nrow(x)  ,2] <- c("DEBRIS FLOW")

x[nrow(x)+1,1] <- c("^FLASH FLOOD LANDSLIDES$")
x[nrow(x)  ,2] <- c("FLASH FLOOD/DEBRIS FLOW")

x[nrow(x)+1,1] <- c("^DRY MICROBURST$")
x[nrow(x)  ,2] <- c("DRY MIRCOBURST WINDS")

x[nrow(x)+1,1] <- c("^HEAT WAVE DROUGHT$")
x[nrow(x)  ,2] <- c("DROUGHT/EXCESSIVE HEAT")

x[nrow(x)+1,1] <- c("^EXTREME HEAT$|^RECORD/EXCESSIVE HEAT$|^HEAT$|^HEAT WAVE$|^HEAT WAVES$|^RECORD HEAT$")
x[nrow(x)  ,2] <- c("EXCESSIVE HEAT")

x[nrow(x)+1,1] <- c("^FLASH FLOODING$|^( )?FLASH FLOOD(S)?(/)?( STREET)?( FROM ICE JAMS)?$|^RAPIDLY RISING WATER$|^DAM BREAK$")
x[nrow(x)  ,2] <- c("FLASH FLOOD")

x[nrow(x)+1,1] <- c("^FLASH FLOODING/FLOOD$|^FLOOD/FLASHFLOOD$|^FLOOD/FLASH/FLOOD$|^FLOOD/FLASH FLOODING$|^FLASH FLOOD/FLOOD$")
x[nrow(x)  ,2] <- c("FLOOD/FLASH FLOOD")

x[nrow(x)+1,1] <- c("^FLOODING$|^( )?HIGH WATER$|^ RAPIDLY RISING WATER$|^URBAN AND SMALL STREAM FLOODING$|^URBAN/SML STREAM FLD$|^URBAN AND SMALL STREAM FLOODIN$|^TIDAL FLOOD$|^RIVER FLOOD$|^MINOR FLOODING$|^FLOOD/RIVER FLOOD$|^FLOOD/HEAVY RAIN$")
x[nrow(x)  ,2] <- c("FLOOD")

x[nrow(x)+1,1] <- c("^FLOOD & HEAVY RAIN$")
x[nrow(x)  ,2] <- c("FLOOD/HEAVY RAIN")

x[nrow(x)+1,1] <- c("^FOG AND COLD TEMPERATURES$")
x[nrow(x)  ,2] <- c("FOG AND COLD/WIND CHILL")

x[nrow(x)+1,1] <- c("^FOG$|^PATCHY DENSE FOG$")
x[nrow(x)  ,2] <- c("DENSE FOG")

x[nrow(x)+1,1] <- c("^EXCESSIVE RAINFALL$|^HEAVY RAINS$")
x[nrow(x)  ,2] <- c("HEAVY RAIN")

x[nrow(x)+1,1] <- c("^EXCESSIVE SNOW$|^HEAVY SNOW SHOWER$")
x[nrow(x)  ,2] <- c("HEAVY SNOW")

x[nrow(x)+1,1] <- c("^HEAVY SNOW AND HIGH WINDS$|^HIGH WIND/HEAVY SNOW$")
x[nrow(x)  ,2] <- c("HEAVY SNOW/HIGH WINDS")

x[nrow(x)+1,1] <- c("^HURRICANE$|^HURRICANE EDOUARD$|^HURRICANE EMILY$|^HURRICANE ERIN$|^HURRICANE FELIX$|^HURRICANE GORDON$|^HURRICANE OPAL$|^HURRICANE OPAL/HIGH WINDS$")
x[nrow(x)  ,2] <- c("HURRICANE/TYPHOON")

x[nrow(x)+1,1] <- c("^BLACK ICE$|^GLAZE/ICE STORM$|^ICE$|^ICE ROADS$|^ICE STORM$|^SNOW AND ICE$|^GLAZE$|^ICE ON ROAD$")
x[nrow(x)  ,2] <- c("GLAZE/ICE STORM/ICE ROADS")

x[nrow(x)+1,1] <- c("^MARINE (THUNDERSTORM|TSTM) WIND$")
x[nrow(x)  ,2] <- c("MARINE THUNDERSTORM WINDS")

x[nrow(x)+1,1] <- c("^MARINE MISHAP$")
x[nrow(x)  ,2] <- c("MARINE ACCIDENT")

x[nrow(x)+1,1] <- c("^FREEZING RAIN/SNOW$")
x[nrow(x)  ,2] <- c("RAIN/SNOW")

x[nrow(x)+1,1] <- c("^RIVER FLOODING$")
x[nrow(x)  ,2] <- c("RIVER FLOOD")

x[nrow(x)+1,1] <- c("^FALLING SNOW/ICE$|^HEAVY SNOW/ICE$")
x[nrow(x)  ,2] <- c("SNOW AND ICE")

x[nrow(x)+1,1] <- c("^SNOW SQUALLS$")
x[nrow(x)  ,2] <- c("SNOW SQUALL")

x[nrow(x)+1,1] <- c("^BLOWING SNOW$|^HIGH WINDS/SNOW$")
x[nrow(x)  ,2] <- c("SNOW/HIGH WINDS")

x[nrow(x)+1,1] <- c("^GUSTY WINDS$|^GUSTY WIND$|^HIGH WIND$|^HIGH WIND 48$|^HIGH WINDS$|^STRONG WIND$|^STRONG WINDS$|^WIND$|^WIND STORM$|^WINDS$|^NON TSTM WIND$|^NON-TSTM WIND$")
x[nrow(x)  ,2] <- c("STRONG WIND")

x[nrow(x)+1,1] <- c("^RIP CURRENTS$")
x[nrow(x)  ,2] <- c("RIP CURRENT")

x[nrow(x)+1,1] <- c("^THUNDERSTORMS$")
x[nrow(x)  ,2] <- c("THUNDERSTORM")

x[nrow(x)+1,1] <- c("^(LIGHTNING AND )?THUNDER(S)?TORM(S)?( ){0,2}?W(IN)?(D)?(S){0,2}?( )?(\\()?(G)?[0-9]{0,2}?(\\))?( ){0,2}?(MPH)?$|^( )?TSTM( )?W(IND)?(S)?( )?(DAMAGE|AND LIGHTNING|\\(G45\\))?$")
x[nrow(x)  ,2] <- c("THUNDERSTORM WINDS")

x[nrow(x)+1,1] <- c("^LIGHTNING.$|^LIGHTNING INJURY$")
x[nrow(x)  ,2] <- c("LIGHTNING")

x[nrow(x)+1,1] <- c("^HAZARDOUS SURF$|^ROUGH SURF$|^HEAVY SURF$|^HIGH SURF$|^( ){0,3}?HIGH SURF ADVISORY$")
x[nrow(x)  ,2] <- c("HEAVY SURF/HIGH SURF")

x[nrow(x)+1,1] <- c("^TIDAL FLOODING$")
x[nrow(x)  ,2] <- c("TIDAL FLOOD")

x[nrow(x)+1,1] <- c("^TSTM WIND/HAIL$")
x[nrow(x)  ,2] <- c("THUNDERSTORM WINDS/HAIL")

x[nrow(x)+1,1] <- c("^SMALL HAIL$")
x[nrow(x)  ,2] <- c("HAIL")

x[nrow(x)+1,1] <- c("^HEAVY SEAS$|^ROUGH SEAS$|^HIGH SWELLS$|^HURRICANE-GENERATED SWELLS$|^HIGH WAVES$|^ROGUE WAVE$|^HIGH SEAS$")
x[nrow(x)  ,2] <- c("HIGH SEAS/HIGH SWELLS")

x[nrow(x)+1,1] <- c("^HIGH WIND AND SEAS$")
x[nrow(x)  ,2] <- c("HIGH WIND/SEAS")

x[nrow(x)+1,1] <- c("^TORNADO F2$|^TORNADO F3$")
x[nrow(x)  ,2] <- c("TORNADO")

x[nrow(x)+1,1] <- c("^TSUNAMI WIND \\(G35\\)$|^TSUNAMI WIND \\(G40\\)$|^TSUNAMI WIND \\(G45\\)$")
x[nrow(x)  ,2] <- c("TSUNAMI WIND")

x[nrow(x)+1,1] <- c("^TROPICAL STORM GORDON$")
x[nrow(x)  ,2] <- c("TROPICAL STORM")

x[nrow(x)+1,1] <- c("^WATERSPOUT$|^WATERSPOUT TORNADO$")
x[nrow(x)  ,2] <- c("WATERSPOUT/TORNADO")

x[nrow(x)+1,1] <- c("^BRUSH FIRE$|^WILD/FOREST FIRE$|^WILDFIRE$")
x[nrow(x)  ,2] <- c("WILD FIRES")

x[nrow(x)+1,1] <- c("^WIND$")
x[nrow(x)  ,2] <- c("WIND")

x[nrow(x)+1,1] <- c("^WINTER STORMS$")
x[nrow(x)  ,2] <- c("WINTER STORM")

x[nrow(x)+1,1] <- c("^WINTER WEATHER MIX$|^WINTER WEATHER/MIX$|^WINTRY MIX$")
x[nrow(x)  ,2] <- c("WINTER WEATHER")

# substitute the event lables
n <- nrow(x)
for(i in 1:n) {
    data$EVTYPE <- gsub(pattern = x[i,1], 
                        replacement = x[i,2], 
                        x = data$EVTYPE)
}

#Function to transform the damages exponents into numbers
exp_transform <- function(e) {
    # h -> hundred, k -> thousand, m -> million, b -> billion
    if (e %in% c('h', 'H'))
        return(2)
    else if (e %in% c('k', 'K'))
        return(3)
    else if (e %in% c('m', 'M'))
        return(6)
    else if (e %in% c('b', 'B'))
        return(9)
    else if (!is.na(as.numeric(e))) # if a digit
        return(as.numeric(e))
    else if (e %in% c('', '-', '?', '+'))
        return(0)
    else {
        stop("Invalid exponent value.")
    }
}

# subset the property with damage
property <- subset(data, 
                   data$PROPDMG >0, 
                   select = c(EVTYPE,PROPDMG,PROPDMGEXP))

#remove property damages where the exponent does not fit the required classification  critera
property <- property[property$PROPDMGEXP %in% c("H","K","M","B"),]


# recalculate the property damage using the numeric exponent value
pwer <- sapply(property$PROPDMGEXP, FUN = exp_transform)
property$PROPDMG <- property$PROPDMG * (10 ** pwer)

# subset the crop damage data
crop <- subset(data, 
                   data$CROPDMG >0, 
                   select = c(EVTYPE,CROPDMG,CROPDMGEXP))

#remove crop damages where the exponent does not fit the required classification  critera
crop <- crop[crop$CROPDMGEXP %in% c("H","K","M","B"),]


# recalculate the crop damage using the numeric exponent value
pwer <- sapply(crop$CROPDMGEXP, FUN = exp_transform)
crop$CROPDMG <- crop$CROPDMG * (10 ** pwer)

# summarize datasets for plotting
# people with injuries
injury <- ddply(data[data$INJURIES>0,], .(EVTYPE), 
                summarise, 
                INJURIES = sum(INJURIES))

# people who died
fatality <- ddply(data[data$FATALITIES>0,], .(EVTYPE), 
                  summarise, 
                  FATALITIES = sum(FATALITIES))

# property damages
property <- ddply(data[data$PROPDMG>0,], .(EVTYPE), 
                  summarise, 
                  PROPDMG = sum(PROPDMG))

# crop damages
crop <- ddply(data[data$CROPDMG>0,], .(EVTYPE), 
                  summarise, 
                  CROPDMG = sum(CROPDMG))

Results

The analysis is presented as three (3) graphs. Individual charts for injuries and fatalities and a two (2) panel graph showing crop and property damages. All graghs focus on the top 10 outcomes for the various data sets.

# Find events that caused most injury
injury <- head(injury[order(injury$INJURIES, decreasing = T), ], 10)

#Plot the graph
ggplot(data = injury) + 
    geom_col(mapping = aes(reorder(EVTYPE,INJURIES),y=INJURIES)) + 
    coord_flip() +
    labs(title = "Top 10 Causes of Injuries, 1950-2011",
         x = "Events" ,
         y = "Injuries",
         caption = "Fig1: Top 10 causes of injuries for the period 1950-2011, sorted in descending order of event")

# Find events that caused most death
fatality <- head(fatality[order(fatality$FATALITIES, decreasing = T), ], 10)

#Plot the graph
ggplot(data = fatality) + 
    geom_col(mapping = aes(reorder(EVTYPE,FATALITIES),y=FATALITIES)) + 
    coord_flip() +
    labs(title = "Top 10 Causes of Fatalites, 1950-2011",
         x = "Events" ,
         y = "Fatalities",
         caption = "Fig2: Top 10 causes of fatalities for the period 1950-2011, sorted in descending order of event")

# Find events that caused most property damage
property <- head(property[order(property$PROPDMG, decreasing = T), ], 10)

# Find events that caused most crop damage
crop <- head(crop[order(crop$CROPDMG, decreasing = T), ], 10)

#Plot the graphs
p1 <- ggplot(data = property) + 
    geom_col(mapping = aes(reorder(EVTYPE,PROPDMG),y=PROPDMG)) + 
    coord_flip() +
    labs(title = "Top 10 Causes of Property Damage, 1950-2011",
         x = "Events" ,
         y = "Value",
         caption = "Fig3: panel 1: Top 10 causes of property damages for the period 1950-2011, sorted in descending order of event")

p2 <- ggplot(data = crop) + 
    geom_col(mapping = aes(reorder(EVTYPE,CROPDMG),y=CROPDMG)) + 
    coord_flip() +
    labs(title = "Top 10 Causes of Crop Damage, 1950-2011",
         x = "Events" ,
         y = "Value",
         caption = "Fig3: panel 2: Top 10 causes of crop damages for the period 1950-2011, sorted in descending order of event")
    
grid.arrange(p1, p2)