This data analysis report involves exploring the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database.
This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The raw data consists of 902297 records each measuring 37 variable from the years 1950 to 2011.
7 variables have been used for the anaylsis which seemed most relevant to the analysis being performed on Population Health Impact and Economic Impact of Storm Events.
By aggregating the data on Event Type, plots have been created to display the Population Health Impact and Economic Impact of the top 6 most significant Event Types in terms of economic damage caused,fatalities and injuries caused.
Overall, It was inferred that Tornadoes caused the most number of fatalities and injuries (5633 reported fatalities and 91346 reported injuries) and Floods caused the highest economic damage (150.3 Bilion $)
#Set the working directory to where the data is
setwd("~/RepData_Assignment2")
data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
#Extracting Necessary Columns
reqdata <- data[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
It was important to explore the data and understand the values entered in the variables.
It was noted that extraneous values like “?” , “-” , “” were present in the columns PROPDMGEXP and CROPDMBEXP which have to be modified to run the analysis code.
Also, 'B','M','H' express the exponents of the data specified in the PROPDMG,CROPDMG variables. That is, 'B' stands for Billion,'M' stands for Million and so on.
Therefore, These character variables are converted to their integer exponents to perform simpler data processing
reqdata$PROPDMGEXP <- as.character(reqdata$PROPDMGEXP)
reqdata[which(reqdata$PROPDMGEXP==""),"PROPDMGEXP"] <- 0
reqdata[which(reqdata$PROPDMGEXP=="-"),"PROPDMGEXP"] <- 0
reqdata[which(reqdata$PROPDMGEXP=="?"),"PROPDMGEXP"] <- 0
reqdata[which(reqdata$PROPDMGEXP=="+"),"PROPDMGEXP"] <- 0
reqdata[which(reqdata$PROPDMGEXP=="B"),"PROPDMGEXP"] <- 9
reqdata[which(reqdata$PROPDMGEXP=="h"),"PROPDMGEXP"] <- 2
reqdata[which(reqdata$PROPDMGEXP=="H"),"PROPDMGEXP"] <- 2
reqdata[which(reqdata$PROPDMGEXP=="K"),"PROPDMGEXP"] <- 3
reqdata[which(reqdata$PROPDMGEXP=="m"),"PROPDMGEXP"] <- 6
reqdata[which(reqdata$PROPDMGEXP=="M"),"PROPDMGEXP"] <- 6
reqdata$PROPDMGEXP <- as.integer(reqdata$PROPDMGEXP)
reqdata$CROPDMGEXP <- as.character(reqdata$CROPDMGEXP)
reqdata[which(reqdata$CROPDMGEXP==""),"CROPDMGEXP"] <- 0
reqdata[which(reqdata$CROPDMGEXP=="?"),"CROPDMGEXP"] <- 0
reqdata[which(reqdata$CROPDMGEXP=="B"),"CROPDMGEXP"] <- 9
reqdata[which(reqdata$CROPDMGEXP=="k"),"CROPDMGEXP"] <- 3
reqdata[which(reqdata$CROPDMGEXP=="K"),"CROPDMGEXP"] <- 3
reqdata[which(reqdata$CROPDMGEXP=="m"),"CROPDMGEXP"] <- 6
reqdata[which(reqdata$CROPDMGEXP=="M"),"CROPDMGEXP"] <- 6
reqdata$CROPDMGEXP <- as.integer(reqdata$CROPDMGEXP)
#Finding total damage (crop damage + property damage) for each event
tot_damage <- (reqdata$CROPDMG * 10^reqdata$CROPDMGEXP) + (reqdata$PROPDMG * 10^reqdata$PROPDMGEXP)
reqdata$TOT_DMG <- tot_damage
library(ggplot2)
#Aggregating Fatalities By Event Type
temp <- with(reqdata,aggregate(FATALITIES,by=list(EVTYPE=EVTYPE),FUN=sum))
plotdata1 <- head(temp[order(temp[,2],decreasing=T),])
names(plotdata1)[2] <- "FATALITIES"
#Creating ggplot object
p <- ggplot(plotdata1,aes(x=EVTYPE,y=FATALITIES)) + xlab("Event Type") + ylab("Fatalities") + ggtitle("Plot Showing Event Type Vs Fatalities") + geom_bar(stat='identity') + stat_identity(geom="text",aes(label=FATALITIES),vjust=-.5)
print(plotdata1,row.names=FALSE)
## EVTYPE FATALITIES
## TORNADO 5633
## EXCESSIVE HEAT 1903
## FLASH FLOOD 978
## HEAT 937
## LIGHTNING 816
## TSTM WIND 504
print(p)
##Aggregating Injuries by Event Type
temp <- with(reqdata,aggregate(INJURIES,by=list(EVTYPE=EVTYPE),FUN=sum))
plotdata2 <- head(temp[order(temp[,2],decreasing=T),])
names(plotdata2)[2] <- "INJURIES"
#Creating ggplot object
p <- ggplot(plotdata2,aes(x=EVTYPE,y=INJURIES)) + xlab("Event Type") + ylab("Injuries") + ggtitle("Plot Showing Event Type Vs Injuries") + geom_bar(stat='identity') + stat_identity(geom="text",aes(label=INJURIES),vjust=-.5)
print(plotdata2,row.names=FALSE)
## EVTYPE INJURIES
## TORNADO 91346
## TSTM WIND 6957
## FLOOD 6789
## EXCESSIVE HEAT 6525
## LIGHTNING 5230
## HEAT 2100
print(p)
## Aggregating Total Damage by Event Type
temp <- with(reqdata,aggregate(TOT_DMG,by=list(EVTYPE=EVTYPE),FUN=sum))
plotdata3 <- head(temp[order(temp[,2],decreasing=T),])
names(plotdata3)[2] <- "TOT_DMG"
## Representing Total Damage in Billions (10^9)
plotdata3$TOT_DMG <- plotdata3$TOT_DMG/10^9
## Creating ggplot object
p <- ggplot(plotdata3,aes(x=EVTYPE,y=TOT_DMG)) + xlab("Event Type") + ylab("Total Economic Damage (in Billions)") + ggtitle("Plot Showing Event Type Vs Total Economic Damage") + geom_bar(stat='identity') + stat_identity(geom="text",aes(label=signif(TOT_DMG,digits=4),vjust=-.5))
print(plotdata3,row.names=FALSE)
## EVTYPE TOT_DMG
## FLOOD 150.32
## HURRICANE/TYPHOON 71.91
## TORNADO 57.36
## STORM SURGE 43.32
## HAIL 18.76
## FLASH FLOOD 18.24
print(p)