Synopsis

Data Processing

The data are first read into a data frame. We examine the structure of the dataset and determine the variables that we require. Examing the event types indicates that most events are classified as being one of a handful of types. We notice that “Thunderstorm Wind” and “Thunderstorm Winds” are classified as different event types. We assume that this is unintnetional and club them together. It is possible that there are other such instances, but since there are very few events of those types, we do not make any other modifications. We check that the dataset is complete i.e. without any missing variable values. The property damage and crop damage values are then converted from the units in the raw data to dollars. We then create a new data frame containing the event type, fatalties, injuries and economic damage.

Data <- read.csv("repdata-data-StormData.csv.bz2")
names(Data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
sel <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
RedData <- Data[, sel]
# Correct one event type
RedData$EVTYPE <- as.factor(RedData$EVTYPE)
RedData$EVTYPE[(RedData$EVTYPE == "THUNDERSTORM WINDS")] <- "THUNDERSTORM WIND"
RedData$EVTYPE[(RedData$EVTYPE == "HIGH WINDS")] <- "HIGH WIND"
RedData$EVTYPE[(RedData$EVTYPE == "RIP CURRENTS")] <- "RIP CURRENT"

# Check for NA
sum(is.na(RedData))
## [1] 0
# Convert crop and property damage units to dollars

RedData$PROPDMGEXP <- as.character(RedData$PROPDMGEXP)
RedData$PROPDMGEXP[!grepl("K|M|B", RedData$PROPDMGEXP, ignore.case = TRUE)] <- "0" # everything exept K,M,B is dollar
RedData$PROPDMGEXP[grep("K", RedData$PROPDMGEXP, ignore.case = TRUE)] <- "3"
RedData$PROPDMGEXP[grep("B", RedData$PROPDMGEXP, ignore.case = TRUE)] <- "9"
RedData$PROPDMGEXP[grep("M", RedData$PROPDMGEXP, ignore.case = TRUE)] <- "6"
RedData$PROPDMGEXP[grep("H", RedData$PROPDMGEXP, ignore.case = TRUE)] <- "2"
RedData$PROPDMG <- RedData$PROPDMG*10^(as.numeric(RedData$PROPDMGEXP))

RedData$CROPDMGEXP <- as.character(RedData$CROPDMGEXP)
RedData$CROPDMGEXP[!grepl("K|M|B", RedData$CROPDMGEXP, ignore.case = TRUE)] <- "0" # everything exept K,M,B is dollar
RedData$CROPDMGEXP[grep("K", RedData$CROPDMGEXP, ignore.case = TRUE)] <- "3"
RedData$CROPDMGEXP[grep("B", RedData$CROPDMGEXP, ignore.case = TRUE)] <- "9"
RedData$CROPDMGEXP[grep("M", RedData$CROPDMGEXP, ignore.case = TRUE)] <- "6"
RedData$CROPDMGEXP[grep("H", RedData$CROPDMGEXP, ignore.case = TRUE)] <- "2"
RedData$CROPDMG <- RedData$CROPDMG*10^(as.numeric(RedData$CROPDMGEXP))

# Create new data frame with event type, fatalties, injuries, economic damage

NewData <- data.frame(Event=RedData$EVTYPE,Fatalties=RedData$FATALITIES,Injuries=RedData$INJURIES,Cost=RedData$PROPDMG+RedData$CROPDMG)

Results

Result <- aggregate(. ~ Event, NewData,sum)

head(Result[order(-Result$Fatalties),])
##              Event Fatalties Injuries        Cost
## 831        TORNADO      5633    91346 57352114049
## 130 EXCESSIVE HEAT      1903     6525   500155700
## 153    FLASH FLOOD       978     1777 17562129167
## 275           HEAT       937     2100   403258500
## 463      LIGHTNING       816     5230   940751537
## 584    RIP CURRENT       572      529      163000
head(Result[order(-Result$Injuries),])
##                 Event Fatalties Injuries         Cost
## 831           TORNADO      5633    91346  57352114049
## 853         TSTM WIND       504     6957   5038935845
## 170             FLOOD       470     6789 150319678257
## 130    EXCESSIVE HEAT      1903     6525    500155700
## 463         LIGHTNING       816     5230    940751537
## 758 THUNDERSTORM WIND       197     2396   5824578145
head(Result[order(-Result$Cost),])
##                 Event Fatalties Injuries         Cost
## 170             FLOOD       470     6789 150319678257
## 410 HURRICANE/TYPHOON        64     1275  71913712800
## 831           TORNADO      5633    91346  57352114049
## 668       STORM SURGE        13       38  43323541000
## 244              HAIL        15     1361  18758221521
## 153       FLASH FLOOD       978     1777  17562129167

Tornados are the most harmful type of event to human health in terms of both fatalties and injuries. However, floods cause the greatest economic damage. The top 5 types of events with the highest fatalties, injuries and economic cost are shown graphically below:

library(ggplot2)
g <- ggplot(Result[order(-Result$Fatalties),][1:5,], aes(x=reorder(Event,-Fatalties),y=Fatalties)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+ggtitle("Top 5 Event Types with Highest Total Fatalities") +labs(x="Event Type", y="Fatalities")
g

g <- ggplot(Result[order(-Result$Injuries),][1:5,], aes(x=reorder(Event,-Injuries),y=Injuries)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+ggtitle("Top 5 Event Types with Highest Total Injuries") +labs(x="Event Type", y="Injuries")
g

g <- ggplot(Result[order(-Result$Cost),][1:5,], aes(x=reorder(Event,-Cost),y=Cost/1e9)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+ggtitle("Top 5 Event Types with Highest Total Economic Damage") +labs(x="Event Type", y="Cost of Economic Damage (Billion $)")
g