Synopsis

We identify types of storms and other severe weather events with adverse health and economic consequences. The data is provided by National Weather Service. We specify top-10 event types ranked by fatalities, injuries, and combined property and crop damage.

Data Processing

The raw data are downloaded and the date and time are noted.

# Download raw dt and record the download date and time
if (!file.exists("dt")) {dir.create("dt")}

url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "./data/stormData.csv.bz2"
download.file(url=url, destfile=destfile)
downloaded<-format(Sys.time(), "%b %d, %Y %X")

# Load dt
dt<-read.csv(file=bzfile(destfile),header=TRUE)

The file was download from http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 on Mar 27, 2016 09:51:20 PM.

Fatalities, injuries, property damage, and crop damages are subsetted for subsequent processing.

dt<-dt[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
colnames(dt)[1:3]<-c("EventType", "Fatalities", "Injuries")

Damages are converted into dollars and event types are standardized and corrected for spellings.

dollars<-function(damage, unit) {
        factor <- 1
        if(unit == "h" || unit == "H") {factor <- 100}
                else if(unit == "k" || unit == "K") {factor <- 1000}
                        else if(unit == "m" || unit == "M") {factor <- 1000000}
                                else if(unit == "b" || unit == "B") {factor <- 1000000000}
        return(factor*damage)
}
dt$PropertyDamage<-dollars(dt$PROPDMG,dt$PROPDMGEXP)
dt$CropDamage<-dollars(dt$CROPDMG,dt$CROPDMGEXP)

dt$EventType[grep("avalanche", dt$EventType, ignore.case = TRUE)] <- "AVALANCHE"
dt$EventType[grep("THUNDERSTORM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("THUDERSTORM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("THUNDERESTORM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("THUNDERSTROM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("THUNDEERSTORM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("TSTM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("THUNERSTORM", dt$EventType, ignore.case = TRUE)] <- "THUNDERSTORM"
dt$EventType[grep("FLOOD", dt$EventType, ignore.case = TRUE)] <- "FLOOD"
dt$EventType[grep("TORNADO", dt$EventType, ignore.case = TRUE)] <- "TORNADO"
dt$EventType[grep("TORNDAO", dt$EventType, ignore.case = TRUE)] <- "TORNADO"
dt$EventType[grep("HAIL", dt$EventType, ignore.case = TRUE)] <- "HAIL"
dt$EventType[grep("HURRICANE", dt$EventType, ignore.case = TRUE)] <- "HURRICANE"
dt$EventType[grep("HEAT", dt$EventType, ignore.case = TRUE)] <- "HEAT"
dt$EventType[grep("FREEZ", dt$EventType, ignore.case = TRUE)] <- "FREEZE"
dt$EventType[grep("Frost", dt$EventType, ignore.case = TRUE)] <- "FREEZE"
dt$EventType[grep("dust", dt$EventType, ignore.case = TRUE)] <- "DUST STORM"
dt$EventType[grep("slide", dt$EventType, ignore.case = TRUE)] <- "LANDSLIDE"
dt$EventType[grep("wind", dt$EventType, ignore.case = TRUE)] <- "HIGH WIND"
dt$EventType[grep("wnd", dt$EventType, ignore.case = TRUE)] <- "HIGH WIND"
dt$EventType[grep("snow", dt$EventType, ignore.case = TRUE)] <- "SNOW"
dt$EventType[grep("COLD", dt$EventType, ignore.case = TRUE)] <- "COLD"
dt$EventType[grep("ice", dt$EventType, ignore.case = TRUE)] <- "ICE"
dt$EventType[grep("icy", dt$EventType, ignore.case = TRUE)] <- "ICE"
dt$EventType[grep("LIGHT", dt$EventType, ignore.case = TRUE)] <- "LIGHTNING"
dt$EventType[grep("Volca", dt$EventType, ignore.case = TRUE)] <- "VOLCANIC ERUPTION"
dt$EventType[grep("dry", dt$EventType, ignore.case = TRUE)] <- "DRY"
dt$EventType[grep("WATERSPOUT", dt$EventType, ignore.case = TRUE)] <- "WATERSPOUT"
dt$EventType[grep("Rain", dt$EventType, ignore.case = TRUE)] <- "RAIN"

Fatalities, injuries, and damages are aggregated by storm event type.

library(dplyr)
dt<-select(dt, EventType, Fatalities, Injuries, PropertyDamage, CropDamage)
dt<-summarise(group_by(dt, EventType), sum(Fatalities), sum(Injuries), sum(PropertyDamage)+sum(CropDamage))
names(dt)<-c("EventType", "Fatalities", "Injuries", "Damage")
# fatal<-head(arrange(dt,desc(Fatalities)),10)

Top ten categories of event type are identified for each type of loss.

fatal<-head(dt[order(dt$Fatalities, decreasing=TRUE), ],10)
topFatalities<-sum(fatal$Fatalities)/sum(dt$Fatalities)

injury<-head(dt[order(dt$Injuries, decreasing=TRUE), ],10)
topInjury<-sum(injury$Injuries)/sum(dt$Injuries)

damage<-head(dt[order(dt$Damage, decreasing=TRUE), ],10)
topDamage<-sum(damage$Damage)/sum(dt$Damage)

Results

Fatalities

The 10-ten categories of fatal storm events account for 89.7 percent of all fatalities in storm events. Tornadoes are the most deadly event, with 37 percent of all storm fatalities.

library(ggplot2)
g <- ggplot(fatal, aes(x = reorder(EventType, Fatalities), y = Fatalities))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + labs(x = "Event type")
g <- g + labs(y = "Fatalities")
g <- g + labs(title = "Top-10 Event Types for Fatalities")
print(g)

Injuries

The 10-ten categories storm events inflicting injuries account for 94.1 percent of all injuries in storm events. Tornadoes also produce the most injuries, with 65 percent of all storm-related injuries.

g <- ggplot(injury, aes(x = reorder(EventType, Injuries), y = Injuries))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + labs(x = "Event type")
g <- g + labs(y = "Injuries")
g <- g + labs(title = "Top-10 Event Types for Injuries")
print(g)

Property and Crop damage

The 10-ten categories storm events for property and crop damage account for 96.7 percent of all damage in storm events. The combination of tornadoes, thunderstorms, and floods account for 77 percent of storm damage.

g <- ggplot(damage, aes(x = reorder(EventType, Damage), y = Damage))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g <- g + labs(x = "Event type")
g <- g + labs(y = "Damage ($)")
g <- g + labs(title = "Top-10 Event Types for Property and Crop Damage")
print(g)