Synopsis

The purpose of this analysis is to figure out, which types of events are most harmful with respect to population health and which types of events have the greatest economic consequences ? These questions are limited to the United States.
The data comes from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
First, the maximum of event type are mapped to the “Storm Data Event Table”“, that counts 48 entries.
After, the fatalities and injuries are grouped by event type, sorted and plotted.
And the property and crop damage amounts are summed by event type, sorted and plotted.

Loading and Processing the Raw Data

library(R.utils)
library(dplyr)
library(ggplot2)
library(reshape2)

Load the NOAA Storm Database from repdata-data-StormData.csv.bz2 file
Available here

The variables kept for this analysis are :

filename <- "repdata-data-StormData.csv"
fileevent <- "event.csv"
if(!file.exists(filename)) {
  URL<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(URL,destfile=paste0(filename,".bz2"),method="curl")
  bunzip2(paste0(filename,".bz2"), filename, remove = FALSE, skip = TRUE)
}

storm <- read.csv(filename)
event <- read.csv(fileevent)
stormTbl <- select(tbl_df(storm), BGN_DATE, EVTYPE, FATALITIES, INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

Cleaning the data

# put the event type to lower case
event$EVENT <- tolower(event$EVENT)
stormTbl$EVTYPE <- tolower(stormTbl$EVTYPE)
stormTbl$EVENT <- NA
# Get the year of the event
stormTbl$BGN_DATE <- as.POSIXct(strptime(stormTbl$BGN_DATE, "%m/%d/%Y %H:%M:%S"))
stormTbl$year <-  format(stormTbl$BGN_DATE,"%Y")

# convert the property damage amount - Put 0 if there is no value
stormTbl$DAMAGE_PROPERTY <- NA
stormTbl[stormTbl$PROPDMGEXP == "K",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "K",]$PROPDMG*1000
stormTbl[stormTbl$PROPDMGEXP == "M",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "M",]$PROPDMG*1000000
stormTbl[stormTbl$PROPDMGEXP == "B",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "B",]$PROPDMG*1000000000
stormTbl[which(is.na(stormTbl$DAMAGE_PROPERTY)),]$DAMAGE_PROPERTY <- 0

# convert the crop damage amount - Put 0 if there is no value
stormTbl$DAMAGE_CROP <- NA
stormTbl[stormTbl$CROPDMGEXP == "K",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "K",]$CROPDMG*1000
stormTbl[stormTbl$CROPDMGEXP == "M",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "M",]$CROPDMG*1000000
stormTbl[stormTbl$CROPDMGEXP == "B",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "B",]$CROPDMG*1000000000
stormTbl[which(is.na(stormTbl$DAMAGE_CROP)),]$DAMAGE_CROP <- 0

Group the event type with the 48 standard event types

for (var in event$EVENT){
  test <- grepl(var, stormTbl$EVTYPE,ignore.case = TRUE)
  if (sum(test)>0) stormTbl[test,]$EVENT <- var
}

# Custom correction
stormTbl[grepl("TSTM WIND", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "thunderstorm wind"
stormTbl[grepl("FUNNEL|WATERSPOUT", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "thunderstorm wind"
stormTbl[grepl("COLD", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "cold"
stormTbl[grepl("FLD|FLOOD", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "flood"
stormTbl[grepl("FIRE", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "fire"
stormTbl[grepl("SNOW", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "snow"
stormTbl[grepl("Storm Surge|Storm Tide", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "storm surge/tide"
stormTbl[grepl("hurricane|typhoon", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "hurricane"
stormTbl[which(is.na(stormTbl$EVENT)),]$EVENT <- stormTbl[which(is.na(stormTbl$EVENT)),]$EVTYPE

Results

Group the fatalities and injuries by event types

result <- stormTbl %>%
  group_by(EVENT) %>%
  summarize(count=n(),
    fatalities=sum(FATALITIES),
    injuries=sum(INJURIES)) %>%
  select(EVENT, fatalities,injuries)
result <- arrange(result, -fatalities,-injuries)
result$EVENT <- factor(result$EVENT, levels=unique(result$EVENT))

Top 10 of the most harmful event types with respect to population health

In USA from 1950 to 2011

melted <- melt(result[1:10,], id.vars=c("EVENT"))
ggplot(melted, aes(x=EVENT, y= value , fill=variable)) + geom_bar(stat="identity",position="dodge") + ylab("Number of fatalities or injuries") + xlab("") + theme(axis.text.x=element_text(angle=90,hjust=0.9, vjust=0.2,size=14))

Sum the property and crop damage amounts by event type

resultDamage <- stormTbl %>%
  group_by(EVENT) %>%
  summarize(property=sum(DAMAGE_PROPERTY), crop=sum(DAMAGE_CROP),total = crop+property) %>%
  select(EVENT, property,crop,total)

resultDamage <- arrange(resultDamage, -total)
resultDamage$EVENT <- factor(resultDamage$EVENT, levels=unique(resultDamage$EVENT))

Top 10 of event type with the greatest economic consequences

In USA from 1950 to 2011

melted <- melt(resultDamage[1:10,1:3], id.vars=c("EVENT"))
ggplot(melted, aes(x=EVENT, y= value , fill=variable)) + geom_bar(stat="identity",position="dodge")+ ylab("Amount $") + xlab("") + theme(axis.text.x=element_text(angle=90,hjust=0.9, vjust=.2,size=14)) + ggtitle("Damage")