Synopsis

Tornados and Thunderstorms have the highest levels of health impact (defined as fatalities plus injuries) per occurance. Floods and hurricanes cause the most economic damage (defined as property plus crop damage) per occurrence.

Data Processing

Load packages

library(plyr)
library(dplyr)
library(ggplot2)

Load Data

First we load the dataset - either from the URL or locally.

if(!file.exists("StormData.csv")) {
  download.file(url="https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="StormData.csv.bz2")
  bunzip2(filename="StormData.csv.bz2", destname="StormData.csv", remove=TRUE)
}

stormData <-read.csv(file="StormData.csv")

Data Transformations

Clean up the property and crop damage values, using the multipliers

exponentLevel <- c("", "-", "?", "+", 0:9, "B", "b", "H", "h", "K", "k", "M", "m")
exponentLevel.Fixed <- c(0, 0, 0, 0, 0:9, 9, 9, 2, 2, 3, 3, 6, 6)
stormData$CROPDMGEXP <-  plyr::mapvalues(stormData$CROPDMGEXP, 
                                         from = exponentLevel, 
                                         to = exponentLevel.Fixed, warn_missing = F)
stormData$PROPDMGEXP <-  plyr::mapvalues(stormData$PROPDMGEXP, 
                                         from = exponentLevel, 
                                         to = exponentLevel.Fixed, warn_missing = F)
stormData$CROPDMGEXP <- as.numeric(stormData$CROPDMGEXP)
stormData$PROPDMGEXP <- as.numeric(stormData$PROPDMGEXP)
stormData$PROPDMG = stormData$PROPDMG * 10 ^ stormData$PROPDMGEXP
stormData$CROPDMG = stormData$CROPDMG * 10 ^ stormData$CROPDMGEXP
stormData <- subset(stormData, select = -c(PROPDMGEXP, CROPDMGEXP))

Clean up event types (EVTYPE)

There are many anomolies in the event type (EVTYPE) in this dataset. This section will attempt to clean and group like events.

levels(stormData$EVTYPE) <- toupper(levels(stormData$EVTYPE))

stormData$EVTYPE <- factor(stormData$EVTYPE)

levels(stormData$EVTYPE) <- gsub('^TSTM.*|^THU.*|^\\sTSTM.*|.*THUNDER.*', 'THUNDERSTORM', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^TORN.*', 'TORNADO', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^HIGH\\sWIND.*', 'HIGH WIND', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^TROPICAL\\sS.*', 'TROPICAL STORM', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^HURRICANE.*|^TYPHOON.*', 'HURRICANE (TYPHOON)', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^LIGHTNING.*|^LIGHTING.*', 'LIGHTNING', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('.*HAIL.*', 'HAIL', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('.*FLASH.*', 'FLASH FLOOD', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^(?!FLASH).*FLOOD.*', 'FLOOD', levels(stormData$EVTYPE), perl=TRUE)
levels(stormData$EVTYPE) <- gsub('^HEAVY.*SNOW.*', 'HEAVY SNOW', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^HEAVY.*RAIN.*', 'HEAVY RAIN', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^SNOW.*', 'HEAVY SNOW', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^WILD.*', 'WILDFIRE', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^GUST.*', 'HIGH WIND', levels(stormData$EVTYPE))
levels(stormData$EVTYPE) <- gsub('^WATERSP.*', 'WATERSPOUT', levels(stormData$EVTYPE))

Calculate the health and economic impacts

This code creates seperate data frames for economic damage and health damage. While creating those data sets we create new variables with total health and econimic impacts.

By creating these datasets the code required to create the plots will be easier to read and understand.

stormData <- stormData %>% mutate(ECONOMICDMG = PROPDMG + CROPDMG, HEALTHDMG = FATALITIES + INJURIES)

stormDataHealth <- stormData %>% select(EVTYPE, FATALITIES, INJURIES, HEALTHDMG) %>% group_by(EVTYPE) %>% summarise(fatalityTotal=sum(FATALITIES), injuryTotall=sum(INJURIES), totalHealthImpact=sum(HEALTHDMG)) %>% filter(totalHealthImpact > 0) %>% arrange(desc(totalHealthImpact)) %>% head(10)

stormDataEcon <- stormData %>% select(EVTYPE, PROPDMG, CROPDMG, ECONOMICDMG) %>% group_by(EVTYPE) %>% summarise(propTotal=sum(PROPDMG), cropTotal=sum(CROPDMG), totalEconImpact=sum(ECONOMICDMG)/1000000000) %>% filter(totalEconImpact > 0) %>% arrange(desc(totalEconImpact)) %>% head(10)

Results

Health Impact

The results the weather and health analysis are shown below. Health effects are defined as fatalities plus injuries and are grouped together under a new variable, totalHealthImpact. The weather events with the greatest health impacts for this data set were Tornados and Thunderstorms.

ggplot(stormDataHealth, aes(x=reorder(EVTYPE, -totalHealthImpact), y=totalHealthImpact)) + geom_bar(stat="identity") + labs(title="Top 10 Weather Events\nWith Highest Impact To Human Health", x="Event", y="Number of Fatalities/Injuries") + theme(axis.text.x=element_text(angle = -90, hjust = 0))

Economic Impact

The results of the weather and economic analysis are shown below. Economic effects are defined as total property and crop damage per weather instance and are grouped together under a new variable, totalEconImpact. The weather events with the greatest economic damage were floods and hurricanes.

ggplot(stormDataEcon, aes(x=reorder(EVTYPE, -totalEconImpact), y=totalEconImpact)) + geom_bar(stat="identity") + labs(title="Top 10 Weather Events\nWith Highest Economic Damage", x="Event", y="Dollars (in Billions)") + theme(axis.text.x=element_text(angle = -90, hjust = 0))