Synopsis

The basic goal of this report is to explore the NOAA Storm Database and answer some basic questions about severe weather events.

The 2 main Questions addressed are the following:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

Here is a summary of the results from this Data Analysis: 1. Tornados is the event that seems to cause the most fatalities, followed by Excessive Heat and Flash Flood Events. Tornados are also the major cause of injuries. 2. Floods seem to be the major contributor of Economic damage, followed by Hurricane/Typhoon and Tornado events.

Data Processing 1

Read the csv file and create a Data Frame aggregating the health impact by Event Type

stormdata <- read.csv("repdata%2Fdata%2FStormData.csv")

health_impact_by_evtype <- data.frame(fatalities = tapply(stormdata$FATALITIES, stormdata$EVTYPE, sum), injuries = tapply(stormdata$INJURIES, stormdata$EVTYPE, sum))

health_impact_by_evtype_filtered <- health_impact_by_evtype[health_impact_by_evtype$injuries != 0,]

#Order by injuries
health_impact_by_evtype_filtered_inj_desc <- health_impact_by_evtype_filtered[order(health_impact_by_evtype_filtered$injuries, health_impact_by_evtype_filtered$fatalities, decreasing = T),]

#Add type column and melt by injury or fatality
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.3.3
health_impact_by_evtype_filtered$type <- row.names(health_impact_by_evtype_filtered)
health_impact_by_evtype_filtered_melted <- melt(health_impact_by_evtype_filtered, id.vars = 3, value.name = "count")

Create data frame with top 10 injuries and fatalities and Plot the top 10 Event

health_impact_by_evtype_filtered_melted_top_10 <- rbind(head((health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "fatalities",])[order(health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "fatalities",]$count, decreasing = T),], 10), head((health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "injuries",])[order(health_impact_by_evtype_filtered_melted[health_impact_by_evtype_filtered_melted$variable == "injuries",]$count, decreasing = T),], 10))

#Show the top10 injuries and fatalities
health_impact_by_evtype_filtered_melted_top_10
##                  type   variable count
## 129           TORNADO fatalities  5633
## 20     EXCESSIVE HEAT fatalities  1903
## 28        FLASH FLOOD fatalities   978
## 47               HEAT fatalities   937
## 85          LIGHTNING fatalities   816
## 135         TSTM WIND fatalities   504
## 30              FLOOD fatalities   470
## 101       RIP CURRENT fatalities   368
## 63          HIGH WIND fatalities   248
## 1           AVALANCHE fatalities   224
## 287           TORNADO   injuries 91346
## 293         TSTM WIND   injuries  6957
## 188             FLOOD   injuries  6789
## 178    EXCESSIVE HEAT   injuries  6525
## 243         LIGHTNING   injuries  5230
## 205              HEAT   injuries  2100
## 237         ICE STORM   injuries  1975
## 186       FLASH FLOOD   injuries  1777
## 279 THUNDERSTORM WIND   injuries  1488
## 203              HAIL   injuries  1361

Results regarding most harmful events in terms of health impact

In the below graph, we can see that Tornados are the events that seem to cause the most fatalities, followed by Excessive Heat and Flash Flood Events. Tornados are also the major cause of injuries.

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
ggplot(health_impact_by_evtype_filtered_melted_top_10, aes(x = type, y = count)) + geom_bar(stat = "identity") + facet_grid(variable~., scales = "free_y") + theme_bw() + theme(axis.text.x = element_text(angle = 90)) + ggtitle("Graph of Top 10 Events causing Fatalities and Injuries")

Data processing 2

Calculate property damage columns by converting H, K, M, B units

stormdata$pd <- 0

stormdata[stormdata$PROPDMGEXP == "H", ]$pd <- 
  stormdata[stormdata$PROPDMGEXP == "H", ]$PROPDMG * 10^2

stormdata[stormdata$PROPDMGEXP == "K", ]$pd <- 
  stormdata[stormdata$PROPDMGEXP == "K", ]$PROPDMG * 10^3

stormdata[stormdata$PROPDMGEXP == "M", ]$pd <- 
  stormdata[stormdata$PROPDMGEXP == "M", ]$PROPDMG * 10^6

stormdata[stormdata$PROPDMGEXP == "B", ]$pd <- 
  stormdata[stormdata$PROPDMGEXP == "B", ]$PROPDMG * 10^9

Calculate crop damage columns by converting H, K, M, B units

stormdata$cd <- 0

stormdata[stormdata$CROPDMGEXP == "H", ]$cd <- 
  stormdata[stormdata$CROPDMGEXP == "H", ]$CROPDMG * 10^2

stormdata[stormdata$CROPDMGEXP == "K", ]$cd <- 
  stormdata[stormdata$CROPDMGEXP == "K", ]$CROPDMG * 10^3

stormdata[stormdata$CROPDMGEXP == "M", ]$cd <- 
  stormdata[stormdata$CROPDMGEXP == "M", ]$CROPDMG * 10^6

stormdata[stormdata$CROPDMGEXP == "B", ]$cd <- 
  stormdata[stormdata$CROPDMGEXP == "B", ]$CROPDMG * 10^9

Create Data Frame with sum of top 10 Property Damage and Crop Damage

event_dmg <- data.frame(Property = tapply(stormdata$pd, stormdata$EVTYPE, sum), Crop = tapply(stormdata$cd, stormdata$EVTYPE, sum))

event_dmg$Total <- event_dmg$Property + event_dmg$Crop
event_dmg$Event_Type <- row.names(event_dmg)

event_dmg_top10 <- head(event_dmg[order(event_dmg$Total, decreasing = T),], 10)
event_dmg_top10
##                       Property        Crop        Total        Event_Type
## FLOOD             144657709800  5661968450 150319678250             FLOOD
## HURRICANE/TYPHOON  69305840000  2607872800  71913712800 HURRICANE/TYPHOON
## TORNADO            56925660480   414953110  57340613590           TORNADO
## STORM SURGE        43323536000        5000  43323541000       STORM SURGE
## HAIL               15727367220  3025537450  18752904670              HAIL
## FLASH FLOOD        16140811510  1421317100  17562128610       FLASH FLOOD
## DROUGHT             1046106000 13972566000  15018672000           DROUGHT
## HURRICANE          11868319010  2741910000  14610229010         HURRICANE
## RIVER FLOOD         5118945500  5029459000  10148404500       RIVER FLOOD
## ICE STORM           3944927810  5022113500   8967041310         ICE STORM

Results regarding events that have the greatest economic consequences

In the below graph, we can see that Floods seem to be the major contributor of Economic damage, followed by Hurricane/Typhoon and Tornado events.

ggplot(event_dmg_top10, aes(x = Event_Type, y = Total)) + geom_bar(stat = "identity") + theme_bw() + theme(axis.text.x = element_text(angle = 90)) + ylab("Damages in US $") + ggtitle("Graph of Top 10 Events causing Economical Damage") + theme(plot.title = element_text(hjust = 0.5))