Synopsis

The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

We will use the estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy.

We concluded following

1) For injuries as well as fatal events, the most devastating events are tornados in the given time period.

2) Flood causes the highest property damage whereas draught causes highest crop damages and flood results in the highest total damage.

Installing necessary libraries

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.5.0 (2016-11-07) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(ggplot2)
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
## 
##     here
library(gridExtra)
library(grid)

Data Processing

Analysing Event Trackdown Data using Histogram

To analyse this, we need to convert the column ‘BGN_DATE’ into numeric year.

storm$year <- year(as.POSIXlt(storm$BGN_DATE,format = "%m/%d/%Y %H:%M:%S"))

hist(storm$year, breaks = 30, border = "black", col = "orange", xlab = "Year",main = "Histogram of Yearwise Storm Data")

It can be inferred from above histogram that frequency of number of events tracked increases from the year 1995 significantly.

As the questions asked are only pertaining to health & economic consequences, we can further trim down “storm”

stormrefined <- storm[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP","year")]
dim(stormrefined)
## [1] 902297      8

Code for answering question ‘Does the analysis address the question of which types of events are most harmful to population health ?’

harmpophealth <- ddply(stormrefined,.(EVTYPE),summarise,fatalities = sum(FATALITIES), injuries = sum(INJURIES))
fatal <- harmpophealth[order(harmpophealth$fatalities,decreasing = TRUE),]
injury <- harmpophealth[order(harmpophealth$injuries,decreasing = TRUE),]

As columns ‘PROPDMGEXP’ & ‘CROPDMGEXP’ are expressed with letters K(i.e. Thousands), L (i.e. Lacs),M ( i.e. Millions), B(i.e. Billions), it is required to convert the letter value of the exponent to a usable number employing following function.

damageamount <- function(amount,magnitude){
  returnAmount <- 0
  if(toupper(magnitude)[1]=="K")
  {
    returnAmount <- (amount*1000)
  }
  if(toupper(magnitude)[1]=="M")
  {
    returnAmount <- (amount*1000000)
  }
  if(toupper(magnitude)[1]=="B")
  {
    returnAmount <- (amount*1000000000)
  }
  return(returnAmount)
}

Using the above function, property and crop damage amounts can be calculated.

Code for answering the question ‘Does the analysis address the question of which types of events have the greatest economic consequences ?’

damagedata <- subset(stormrefined,PROPDMG >0 | CROPDMG >0)
damagedata$PropDamageamount <- mapply(damageamount,damagedata$PROPDMG,damagedata$PROPDMGEXP)
damagedata$CropDamageamount <- mapply(damageamount,damagedata$CROPDMG,damagedata$CROPDMGEXP)
damagedata$Damageamount <- ((mapply(damageamount,damagedata$PROPDMG,damagedata$PROPDMGEXP))+(mapply(damageamount,damagedata$CROPDMG,damagedata$CROPDMGEXP)))
Proptotaldamage <- ddply(damagedata,.(EVTYPE),summarise, Totalpropdamageamount=sum(PropDamageamount))
Croptotaldamage <- ddply(damagedata,.(EVTYPE),summarise, Totalcropdamageamount=sum(CropDamageamount))
totaldamage <- ddply(damagedata,.(EVTYPE),summarise, Totalamount=sum(Damageamount))
Proptotaldamage <- Proptotaldamage[order(Proptotaldamage$Totalpropdamageamount,decreasing = T),]
Croptotaldamage <- Croptotaldamage[order(Croptotaldamage$Totalcropdamageamount,decreasing = T),]
totaldamage <- totaldamage[order(totaldamage$Totalamount, decreasing = T),]

RESULTS

Quesion 1 – Does the analysis address the question of which types of events are most harmful to population health ?

head(fatal[,c("EVTYPE","fatalities")])
##             EVTYPE fatalities
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
head(injury[,c("EVTYPE","injuries")])
##             EVTYPE injuries
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230
## 275           HEAT     2100

Answer for Question 1

For injuries as well as fatal events, the most devastating events are tornados in the given time period.

Question 2 – Does the analysis address the question of which types of events have the greatest economic consequences ?

head(Proptotaldamage)
##                EVTYPE Totalpropdamageamount
## 72              FLOOD          144657709800
## 197 HURRICANE/TYPHOON           69305840000
## 354           TORNADO           56937160480
## 299       STORM SURGE           43323536000
## 59        FLASH FLOOD           16140811510
## 116              HAIL           15732266720
head(Croptotaldamage)
##          EVTYPE Totalcropdamageamount
## 39      DROUGHT           13972566000
## 72        FLOOD            5661968450
## 262 RIVER FLOOD            5029459000
## 206   ICE STORM            5022113500
## 116        HAIL            3025954450
## 189   HURRICANE            2741910000
head(totaldamage)
##                EVTYPE  Totalamount
## 72              FLOOD 150319678250
## 197 HURRICANE/TYPHOON  71913712800
## 354           TORNADO  57352113590
## 299       STORM SURGE  43323541000
## 116              HAIL  18758221170
## 59        FLASH FLOOD  17562128610

Answer for Question 2

Inferring the above data, one can conclude that flood causes the highest property damage whereas draught causes highest crop damages and flood results in the highest total damage.

Figures pertaining to Question 1

injuriesPlot <- ggplot(data = head(injury,10), aes(x=reorder(EVTYPE,injuries),y=injuries)) +geom_bar(fill="orange", stat = "identity")+ylab("Total number of injuries") + xlab("Event type") +ggtitle("Total Injuries by Severe\n Weather Events in the U.S.\n from 1995 - 2011") + theme(axis.text.x = element_text(angle = 45, hjust = 1))

fatalitiesPlot <- ggplot(data =head(fatal,10),
aes(x=reorder(EVTYPE,fatalities),y=fatalities))+ geom_bar(fill="blue",stat = "identity") +ylab("Total number of fatalities") + xlab("Event type") +ggtitle("Total Fatalities by Severe\n Weather Events in the U.S.\n from 1995 - 2011")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))

grid.arrange(fatalitiesPlot, injuriesPlot, ncol = 2)

Figures pertaining to Question 2

PropertyDamagePlot <- ggplot(data = head(Proptotaldamage,10), aes(x=reorder(EVTYPE,Totalpropdamageamount),y=Totalpropdamageamount)) +geom_bar(fill="orange", stat = "identity")+ylab("Total Amount due to Property Damage") + xlab("Event type") +ggtitle("Property Damage 1995-2011") + theme(axis.text.x = element_text(angle = 45, hjust = 1))

CropDamagePlot <- ggplot(data = head(Croptotaldamage,10),aes(x=reorder(EVTYPE,Totalcropdamageamount),y=Totalcropdamageamount)) +geom_bar(fill="blue", stat = "identity")+ylab("Total Amount due to Crop Damage") + xlab("Event type") +ggtitle("Crop Damage from 1995-2011")+ theme(axis.text.x = element_text(angle = 45, hjust = 1))

grid.arrange(PropertyDamagePlot, CropDamagePlot, ncol = 2)