Synopsis

The goal of this report is to address questions related to Weather Events and Storms in U.S. that are most damaging in terms of Fatalities, Injuries and damages to properties and crop.

The following two main questions are answered in this report:

1 - Which types of events are most harmful with respect to population health?

2 - Which types of events have the greatest economic consequences?

Data Processing

Loading R libraries needed

library(knitr)
library(markdown)
library(rmarkdown)
library(plyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stats)
library(xtable)  
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:xtable':
## 
##     label, label<-
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(ggplot2) 

Reading the NOAA Data Source from csv

df_temp <- read.csv("StormData.csv", header = T, as.is = T)

Reducing the dataset and adjusting the data

df <- subset(df_temp, select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))

df$EVENT <- "OTHER"
df$EVENT[grep("HAIL", df$EVTYPE, ignore.case = TRUE)] <- "HAIL"
df$EVENT[grep("HEAT", df$EVTYPE, ignore.case = TRUE)] <- "HEAT"
df$EVENT[grep("FLOOD", df$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
df$EVENT[grep("WIND", df$EVTYPE, ignore.case = TRUE)] <- "WIND"
df$EVENT[grep("STORM", df$EVTYPE, ignore.case = TRUE)] <- "STORM"
df$EVENT[grep("SNOW", df$EVTYPE, ignore.case = TRUE)] <- "SNOW"
df$EVENT[grep("TORNADO", df$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
df$EVENT[grep("WINTER", df$EVTYPE, ignore.case = TRUE)] <- "WINTER"
df$EVENT[grep("RAIN", df$EVTYPE, ignore.case = TRUE)] <- "RAIN"

decode.units <- function(d) {switch(d, H = 100, K = 1000, M = 1e+06, B = 1e+09, `0` = 1, `1` = 10, `2` = 100, `3` = 1000, `4` = 10000, `5` = 1e+05, `6` = 1e+06, `7` = 1e+07, `8` = 1e+08, `9` = 1e+09, 0)}

Aggregating Data for the Graphs

events <- aggregate(cbind(FATALITIES, INJURIES) ~ EVENT, df, sum, na.rm = TRUE)
events.top <- events[order(-events$FATALITIES)[1:10], ]


events.top$INJURIES <- cut2(events.top$INJURIES, g = 10)
ggplot(events.top, aes(x = reorder(EVENT, -FATALITIES), y = FATALITIES, fill = INJURIES)) + geom_bar(stat = "identity") + scale_fill_brewer(palette="RdYlGn") + guides(fill = guide_legend(reverse = T)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab(NULL) + ggtitle(paste("Top 10 most harmful weather events in the US"))

df$DAMAGE <- df$PROPDMG * sapply(df$PROPDMGEXP, decode.units) + df$CROPDMG * sapply(df$CROPDMGEXP, decode.units)
data.damage <- aggregate(DAMAGE ~ EVENT, df, sum, na.rm = T)
data.damage.top <- data.damage[order(-data.damage$DAMAGE)[1:10], ]

ggplot(data.damage.top, aes(x = reorder(EVENT, -DAMAGE), y = DAMAGE)) + geom_bar(stat = "identity", fill = "blue") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab(NULL) + ylab("Economic impact in USD") + ggtitle(paste("Top 10 events in order of greatest impact"))

Results

The two main findings based on analysis above are.

  1. Tornadoes are causing most of death or injury, while
  2. Flood causing most economic consequences.