SYNOPSIS

Storms and severe weather create risks to public health and can have an economic impact on communities and municipalities. This analysis is a brief exploration of the NOAA Storm Database to derive some basic observations relating to severe weather events. Since this analysis is limited in scope, it will focus on types of events, costs of damage, injuries and fatalities, and storm locations.

DATA PROCESSING

Read in the dataset url = “http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

StormData <- read.csv(file="C:/Users/Songbird2015/Desktop/Coursera/Reproducible Research/StormData.csv", header=T, sep=",", na.strings="NA")

Review the data elements that are available for the analysis

names(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Subset the data to only included relevant fields

StormDataSub <- StormData[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]

Initial observation of the data showed that dollars were stored as H, K, M, B. Reformat units to calculate Property and Crop Damages.

StormDataSub$pd <- 0
StormDataSub[StormDataSub$PROPDMGEXP == "H", ]$pd <- 
        StormDataSub[StormDataSub$PROPDMGEXP == "H", ]$PROPDMG * 10^2

StormDataSub[StormDataSub$PROPDMGEXP == "K", ]$pd <- 
        StormDataSub[StormDataSub$PROPDMGEXP == "K", ]$PROPDMG * 10^3

StormDataSub[StormDataSub$PROPDMGEXP == "M", ]$pd <- 
        StormDataSub[StormDataSub$PROPDMGEXP == "M", ]$PROPDMG * 10^6

StormDataSub[StormDataSub$PROPDMGEXP == "B", ]$pd <- 
        StormDataSub[StormDataSub$PROPDMGEXP == "B", ]$PROPDMG * 10^9

Converting the H, K, M, B into units to be able to calculate Crop Damage

StormDataSub$cd <- 0

StormDataSub[StormDataSub$CROPDMGEXP == "H", ]$cd <- 
        StormDataSub[StormDataSub$CROPDMGEXP == "H", ]$CROPDMG * 10^2

StormDataSub[StormDataSub$CROPDMGEXP == "K", ]$cd <- 
        StormDataSub[StormDataSub$CROPDMGEXP == "K", ]$CROPDMG * 10^3

StormDataSub[StormDataSub$CROPDMGEXP == "M", ]$cd <- 
        StormDataSub[StormDataSub$CROPDMGEXP == "M", ]$CROPDMG * 10^6

StormDataSub[StormDataSub$CROPDMGEXP == "B", ]$cd <- 
        StormDataSub[StormDataSub$CROPDMGEXP == "B", ]$CROPDMG * 10^9

Conducted some data cleanup

StormDataSub <- rename.vars(StormDataSub, c(“Extreme Heat”), c(“heat”“))

StormDataSub$CleanEV <- StormDataSub$EVTYPE

StormDataSub$EVTYPE[grepl("EXCESSIVE HEAT", StormDataSub$CleanEV)] <- "HEAT"
StormDataSub$EVTYPE[grepl("HEAT WAVE", StormDataSub$CleanEV)] <- "HEAT"
StormDataSub$EVTYPE[grepl("FLASH FLOOD", StormDataSub$CleanEV)] <- "FLOOD" 
StormDataSub$EVTYPE[grepl("TSTM WIND", StormDataSub$CleanEV)] <- "WIND" 
StormDataSub$EVTYPE[grepl ("HIGH WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl("STRONG WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl ("RIP CURRENT", StormDataSub$CleanEV)] <- "RIP CURRENTS"
StormDataSub$EVTYPE[grepl("THUNDERSTORM WIND", StormDataSub$CleanEV)] <- "WIND"
StormDataSub$EVTYPE[grepl("HEAVY SNOW", StormDataSub$CleanEV)] <- "WINTER STORM"
StormDataSub$EVTYPE[grepl("BLIZZARD", StormDataSub$CleanEV)] <- "WINTER STORM"
StormDataSub$EVTYPE[grepl("EXTREME COLD", StormDataSub$CleanEV)] <- "EXTREME COLD/WIND CHILL"

Created plots to visualize the data, starting with the Number of Fatalities by Event Type

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
fatal <- aggregate(FATALITIES ~ EVTYPE, data = StormDataSub, sum)
fatal <- fatal[order(-fatal$FATALITIES), ][1:10, ]
fatal$EVTY <- factor(fatal$EVTYPE, levels = fatal$EVTYPE)

ggplot(fatal, aes(x = EVTYPE, y = FATALITIES)) + 
        geom_bar(stat = "identity", fill = "red") +
        theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
        xlab("EVENT TYPE") + ylab("FATALITIES") +
        ggtitle("Number of Fatalities by Top 10 Weather Events")

Then the Number of Damages Catagorized By the Most Harmful Event Types

damage <- aggregate(pd + cd ~ EVTYPE, data = StormDataSub, sum)
names(damage) <- c("EVTYPE", "TDAMAGE")
damage <- damage[order(-damage$TDAMAGE), ][1:10, ]
damage$EVTYPE <- factor(damage$EVTYPE, levels = damage$EVTYPE)

ggplot(damage, aes(x = EVTYPE, y = TDAMAGE)) + 
        geom_bar(stat = "identity", fill = "red") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
        xlab("EVENT TYPE") +
        scale_y_continuous(name="Damages(US$)", labels = scales::comma) +
        ggtitle("Property & Crop Damages by Top 10 Weather Events")

RESULTS

Based on the data analysis described above, it is clear that the flooding is the most costly type of damage to property and crops, exceeding the next closest event by more than $80B. Large storms are also very expensive as the next three events are hurricanes / typhoons, tornados and storm surges.

This is not mirrored in the fatality data, where tornados claim the highest number of lives, followed by heat.