Synopsis

This paper Explore and Analyze storm and other Severe Events and its cause on Public Heath and Property. Such events causes in fatalites, injuries and property damage. So, anlysing the nature of those events is very important. Based on the data provided by U.S. National Oceanic and Atmospheric Administration’s (NOAA), this document show the top 10 events causing maximum fatalities,Injuries and Economic Consequences.

Data Processing

Source: Data was downloded from here

Loading Data

Here we load the data and take only the required Columns from it.

library(dplyr)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file (url, destfile = "repdata-data-StormData.csv.bz2", method="curl", quiet=TRUE)
stromData <- read.csv(bzfile("repdata-data-StormData.csv.bz2"),sep=",",header = TRUE)
subStromData<- select(stromData,EVTYPE,FATALITIES:CROPDMGEXP)

Transforming Data

Strom Dataset contains various expense unit (eg k=thousand,m=million) these need to be converted to numeric value(eg k=100,m=1000000). Following code do the data tranformation. ?

#possible values for PROPDMGEXP
unique(subStromData$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
#Add new column PROPDMGUNIT and assign value based on the value of PROPDMGEXP
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "K"] <- 1000
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "M"] <- 1e+06
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "m"] <- 1e+06
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "B"] <- 1e+09
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "h"] <- 100
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "H"] <- 100
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "0"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "1"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "2"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "3"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "4"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "5"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "6"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "7"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "8"] <- 10
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == ""] <- 0
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "+"] <- 1
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "-"] <- 0
subStromData$PROPDMGUNIT[subStromData$PROPDMGEXP == "?"] <- 0
#possible values for CROPDMGEXP
unique(subStromData$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
#Add new column CROPDMGUNIT and assign value based on the value of CROPDMGEXP
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "M"] <- 1e+06
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "K"] <- 1000
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "m"] <- 1e+06
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "B"] <- 1e+09
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "0"] <- 10
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "k"] <- 1000
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "2"] <- 10
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == ""] <- 0
subStromData$CROPDMGUNIT[subStromData$CROPDMGEXP == "?"] <- 0
subStromData$TOTALDMGEXP <- (subStromData$PROPDMG * subStromData$PROPDMGUNIT) + (subStromData$CROPDMG*subStromData$CROPDMGUNIT) 

Calcute total Economic Expenses

Sum up Property Damage expense and corp damage expenses to get total Expenses caused by the Events.

subStromData$TOTALDMGEXP <- (subStromData$PROPDMG * subStromData$PROPDMGUNIT) + (subStromData$CROPDMG*subStromData$CROPDMGUNIT) 

Calculate top 10 Events

Top 10 Events are calculated based on Fatalities,Injuries and Total Economic expenses caused by the Event.

#group by and aggregate data based on event Type (EVTYPE)
group_by_evtype <- group_by(subStromData,EVTYPE)
resultPerEvent<-summarise_each(group_by_evtype,funs(sum),FATALITIES,INJURIES,TOTALDMGEXP)
#calculate top 10 Event causing maximum fatalities
top10FatalitiesPE<-arrange(resultPerEvent,desc(FATALITIES))[1:10,]
top10FatalitiesPE$EVTYPE <- factor(top10FatalitiesPE$EVTYPE,levels = top10FatalitiesPE$EVTYPE[order(top10FatalitiesPE$FATALITIES,decreasing = TRUE)])
#calculate top 10 Event causing maximum injuries
top10InjuriesPE<-arrange(resultPerEvent,desc(INJURIES))[1:10,]
top10InjuriesPE$EVTYPE <- factor(top10InjuriesPE$EVTYPE,levels = top10InjuriesPE$EVTYPE[order(top10InjuriesPE$INJURIES,decreasing = TRUE)])
#calculate top 10 Event causing maximum expenses
top10DMGExp<-arrange(resultPerEvent,desc(TOTALDMGEXP))[1:10,]
top10DMGExp$EVTYPE <- factor(top10DMGExp$EVTYPE,levels = top10DMGExp$EVTYPE[order(top10DMGExp$TOTALDMGEXP,decreasing = TRUE)])

Result

This section presents the output of the analysis. Here, 3 plots are presented which shows top 10 event causing maximum Fatalities,Injuries and Economic Expenses

Top 10 Events causing maximum Fatalities

library(ggplot2)
ggplot(top10FatalitiesPE, aes(x=EVTYPE, y=FATALITIES),fill=EVTYPE)+geom_bar(stat="identity",position = "dodge")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(x="Event",y="Fatalities",title="Top 10 Events causing maximum Fatalities")

Top 10 Events causing maximum Injuries

ggplot(top10InjuriesPE, aes(x=EVTYPE, y=INJURIES),fill=EVTYPE)+geom_bar(stat="identity",position = "dodge")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(x="Event",y="Injuries",title="Top 10 Events causing maximun Injuries")

Top 10 Events causing maximum Economic Expenses

ggplot(top10DMGExp, aes(x=EVTYPE, y=TOTALDMGEXP/10^9),fill=EVTYPE)+geom_bar(stat="identity",position = "dodge")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(x="Event",y="Total Expenses (In Billion)",title="Top 10 Events causing maximun economic expenses")