The goal of this report is to address questions related to Weather Events and Storms in U.S. that are most damaging in terms of Fatalities, Injuries and damages to properties and crop.
The following two main questions are answered in this report:
1 - Which types of events are most harmful with respect to population health?
2 - Which types of events have the greatest economic consequences?
library(knitr)
library(markdown)
library(rmarkdown)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stats)
library(xtable)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:xtable':
##
## label, label<-
## The following objects are masked from 'package:dplyr':
##
## combine, src, summarize
## The following objects are masked from 'package:plyr':
##
## is.discrete, summarize
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
library(ggplot2)
df_temp <- read.csv("StormData.csv", header = T, as.is = T)
df <- subset(df_temp, select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))
df$EVENT <- "OTHER"
df$EVENT[grep("HAIL", df$EVTYPE, ignore.case = TRUE)] <- "HAIL"
df$EVENT[grep("HEAT", df$EVTYPE, ignore.case = TRUE)] <- "HEAT"
df$EVENT[grep("FLOOD", df$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
df$EVENT[grep("WIND", df$EVTYPE, ignore.case = TRUE)] <- "WIND"
df$EVENT[grep("STORM", df$EVTYPE, ignore.case = TRUE)] <- "STORM"
df$EVENT[grep("SNOW", df$EVTYPE, ignore.case = TRUE)] <- "SNOW"
df$EVENT[grep("TORNADO", df$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
df$EVENT[grep("WINTER", df$EVTYPE, ignore.case = TRUE)] <- "WINTER"
df$EVENT[grep("RAIN", df$EVTYPE, ignore.case = TRUE)] <- "RAIN"
decode.units <- function(d) {switch(d, H = 100, K = 1000, M = 1e+06, B = 1e+09, `0` = 1, `1` = 10, `2` = 100, `3` = 1000, `4` = 10000, `5` = 1e+05, `6` = 1e+06, `7` = 1e+07, `8` = 1e+08, `9` = 1e+09, 0)}
events <- aggregate(cbind(FATALITIES, INJURIES) ~ EVENT, df, sum, na.rm = TRUE)
events.top <- events[order(-events$FATALITIES)[1:10], ]
events.top$INJURIES <- cut2(events.top$INJURIES, g = 10)
ggplot(events.top, aes(x = reorder(EVENT, -FATALITIES), y = FATALITIES, fill = INJURIES)) + geom_bar(stat = "identity") + scale_fill_brewer(palette="RdYlGn") + guides(fill = guide_legend(reverse = T)) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab(NULL) + ggtitle(paste("Top 10 most harmful weather events in the US"))
df$DAMAGE <- df$PROPDMG * sapply(df$PROPDMGEXP, decode.units) + df$CROPDMG * sapply(df$CROPDMGEXP, decode.units)
data.damage <- aggregate(DAMAGE ~ EVENT, df, sum, na.rm = T)
data.damage.top <- data.damage[order(-data.damage$DAMAGE)[1:10], ]
ggplot(data.damage.top, aes(x = reorder(EVENT, -DAMAGE), y = DAMAGE)) + geom_bar(stat = "identity", fill = "blue") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + xlab(NULL) + ylab("Economic impact in USD") + ggtitle(paste("Top 10 events in order of greatest impact"))
The two main findings based on analysis above are.