Reproducible Research: Peer Assessment 2 This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

library("ggplot2")
## Warning: package 'ggplot2' was built under R version 3.2.3
library("plyr")
## Warning: package 'plyr' was built under R version 3.2.3
library("reshape2")
## Warning: package 'reshape2' was built under R version 3.2.3
library("scales")
## Warning: package 'scales' was built under R version 3.2.3
mydata <- read.csv(bzfile("repdata_data_StormData.csv.bz2"),stringsAsFactors=F)
Stormdata<- subset(mydata,select=c(EVTYPE, FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP))
Fatality <- aggregate(FATALITIES ~ EVTYPE, data=Stormdata, sum, na.rm=TRUE)
Injury <- aggregate(INJURIES ~ EVTYPE, data=Stormdata, sum, na.rm=TRUE)
attach(Fatality)
FatalitiesSort<-Fatality[order(-FATALITIES), ]
attach(Injury)
## The following object is masked from Fatality:
## 
##     EVTYPE
InjuriesSort<-Injury[order(-INJURIES), ]
Stormdata$PROPDMGEXP <- as.character(Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP = gsub("\\-|\\+|\\?","0",Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP = gsub("B|b", "1000000000", Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP = gsub("M|m", "1000000", Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP = gsub("K|k", "1000", Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP = gsub("H|h", "100", Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP <- as.numeric(Stormdata$PROPDMGEXP)
Stormdata$PROPDMGEXP[is.na(Stormdata$PROPDMGEXP)] = 1
Stormdata$PROPDMG_full<- Stormdata$PROPDMG * Stormdata$PROPDMGEXP
PropDamage<- aggregate(PROPDMG_full ~ EVTYPE, data=Stormdata, sum)
PropDamageSort<- PropDamage[order(-PropDamage$PROPDMG_full),]
Stormdata$CROPDMGEXP  <- as.character(Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  = gsub("\\-|\\+|\\?","0",Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  = gsub("B|b", "1000000000", Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  = gsub("M|m", "1000000", Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  = gsub("K|k", "1000", Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  = gsub("H|h", "100", Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP  <- as.numeric(Stormdata$CROPDMGEXP )
Stormdata$CROPDMGEXP [is.na(Stormdata$CROPDMGEXP )] = 1
Stormdata$CROPDMG_full<- Stormdata$CROPDMG * Stormdata$CROPDMGEXP

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

CropDamage <- aggregate(CROPDMG_full ~ EVTYPE, data=Stormdata, sum)
CropDamageSort<- CropDamage[order(-CropDamage$CROPDMG_full),]

FatalitiesGraph<-ggplot(FatalitiesSort[1:5,], aes(x =EVTYPE,y=FATALITIES)) + geom_bar(stat="identity") + theme_bw() + scale_fill_gradient2(low = "blue", high = "red") + coord_flip() + labs(title="Top Fatalities by Events", x = "Weather Event Type", y = "Number of Fatalities")

FatalitiesGraph

Question 2: Across the United States, which types of events have the greatest economic consequences?

PropDmgGraph<-ggplot(PropDamageSort[1:5,], aes(x =EVTYPE,y=PROPDMG_full)) + geom_bar(stat="identity") + theme_bw() + scale_fill_gradient2(low = "blue", high = "red") + labs(title="Top Property Damage by Events", x = "Weather Event Type", y = "Damage") 
CropDmgGraph<-ggplot(CropDamageSort[1:5,], aes(x = EVTYPE, y=CROPDMG_full)) + geom_bar(stat="identity") + theme_bw() + scale_fill_gradient2(low = "blue", high = "red") + labs(title="Top Crop Damage by Events", x = "Weather Event Type", y = "Damage")
PropDmgGraph <- PropDmgGraph + coord_flip()
CropDmgGraph <- CropDmgGraph + coord_flip()
PropDmgGraph

CropDmgGraph