Synopsis

The goal of this project is to explore NOAA Storm Database and answer some questions regarding events. The events in database start in the year 1950 and end in November 2011.

This Project analysis address the following questions:

  1. Across the united states, Which type of events are most harmful with respect to population health?
  2. Across united states, which types of events has greatest economic consequences?

Data

The data for this analysis come in the form of comma-separated-value file compressed via the bzip2 algorithm to reduce its size.

Storm Data

To know about variables of the database , please see below links.

Data Processing

a) Data Loading

Download the file from location and load this to data.table

dataurl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if (!file.exists("StormData.csv.bz2")) {
 download.file(dataurl,destfile="StormData.csv.bz2")
}

  storm <- read.csv("StormData.csv.bz2")


library(data.table)
storm <- as.data.table(storm)

b) Examine the variable names and decide the variable names to keep for the analysis

names(storm)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
keepcols <- c("EVTYPE","INJURIES","FATALITIES","PROPDMG","PROPDMGEXP",
              "CROPDMG","CROPDMGEXP")

c) Data Subsetting

Subset the data where there is proper event recorded and variables used in the analysis are having more than 0.

storm <- storm[, keepcols,with=FALSE]
cols <- c("PROPDMGEXP", "CROPDMGEXP")

## changing the type from factor to character for data transformations
stormDT <- storm[, (cols) := lapply(.SD,as.character), 
                 .SDcols = cols]

## subset data for the events and measure variables are greater than 0
stormDT <- stormDT[(EVTYPE != "?" & 
        (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0))]

d) Data Transformation

Data Transformation for Crop damage and Property damage exponential. Blank values are converted to 0, All alpha numeric notations are made in same case and converted to common notation.

table(stormDT$CROPDMGEXP)
## 
##             ?      0      B      k      K      m      M 
## 152663      6     17      7     21  99932      1   1985
table(stormDT$PROPDMGEXP)
## 
##             -      +      0      2      3      4      5      6      7      B 
##  11585      1      5    210      1      1      4     18      3      3     40 
##      h      H      K      m      M 
##      1      6 231427      7  11320
stormDT[CROPDMGEXP=="",CROPDMGEXP := 0]
stormDT[PROPDMGEXP=="",PROPDMGEXP := 0]

stormDT[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
table(stormDT$CROPDMGEXP)
## 
##      ?      0      B      K      M 
##      6 152680      7  99953   1986
table(stormDT$PROPDMGEXP)
## 
##      -      +      0      2      3      4      5      6      7      B      H 
##      1      5  11795      1      1      4     18      3      3     40      7 
##      K      M 
## 231427  11327
cropDmgExpKey <-  c("?" = 10^0, 
                    "0" = 10^0,
                    "K" = 10^3,
                    "M" = 10^6,
                    "B" = 10^9)


propDmgExpKey <-  c("-" = 10^0, 
                    "+" = 10^0,
                    "0" = 10^0,
                    "1" = 10^1,
                    "2" = 10^2,
                    "3" = 10^3,
                    "4" = 10^4,
                    "5" = 10^5,
                    "6" = 10^6,
                    "7" = 10^7,
                    "8" = 10^8,
                    "9" = 10^9,
                    "H" = 10^2,
                    "K" = 10^3,
                    "M" = 10^6,
                    "B" = 10^9)


stormDT[, CROPDMGEXP := cropDmgExpKey[as.character(stormDT[,CROPDMGEXP])]]
stormDT[, PROPDMGEXP := propDmgExpKey[as.character(stormDT[,PROPDMGEXP])]]
table(stormDT$CROPDMGEXP)
## 
##      1   1000  1e+06  1e+09 
## 152686  99953   1986      7
table(stormDT$PROPDMGEXP)
## 
##      1    100   1000  10000  1e+05  1e+06  1e+07  1e+09 
##  11801      8 231428      4     18  11330      3     40

e) Calculate Cost

Cost of Property and Crop damage is calculated as follows : Cost * Cost Exp

stormDT <- stormDT[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, PROPCOST = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, CROPCOST = CROPDMG * CROPDMGEXP)]

f) Calculate Total Fatalaties and Injuries

In this step, total fatalaties and Injuries are calculated and Top 10 are used for results

totalhealthDT <- stormDT[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), TOTALS = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalhealthDT <- totalhealthDT[order(-FATALITIES), ]
totalhealthDT <- totalhealthDT[1:10, ]
head(totalhealthDT)
##            EVTYPE FATALITIES INJURIES TOTALS
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755
## 4:           HEAT        937     2100   3037
## 5:      LIGHTNING        816     5230   6046
## 6:      TSTM WIND        504     6957   7461

g) Calculate Total Property and Crop Damage Cost

In this step, Total Property and Crop Damage costs are calculated and Top 10 are used for results

totalCostDT <- stormDT[, .(PROPCOST = sum(PROPCOST), CROPCOST = sum(CROPCOST), TOTALCOST = sum(PROPCOST) + sum(CROPCOST)), by = .(EVTYPE)]
totalCostDT <- totalCostDT[order(-TOTALCOST), ]
totalCostDT <- totalCostDT[1:10, ]
head(totalCostDT)
##               EVTYPE     PROPCOST   CROPCOST    TOTALCOST
## 1:             FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON  69305840000 2607872800  71913712800
## 3:           TORNADO  56947380677  414953270  57362333947
## 4:       STORM SURGE  43323536000       5000  43323541000
## 5:              HAIL  15735267513 3025954473  18761221986
## 6:       FLASH FLOOD  16822673979 1421317100  18243991079

Results

Events that are most harmful with respect to population health

melting data for plot reprsentation

health_consequences <- melt(totalhealthDT, id.vars="EVTYPE", variable.name = "HEALTHTYPE")
head(health_consequences)
##            EVTYPE HEALTHTYPE value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
## 4:           HEAT FATALITIES   937
## 5:      LIGHTNING FATALITIES   816
## 6:      TSTM WIND FATALITIES   504
library(ggplot2)
ggplot(health_consequences, aes(x=reorder(EVTYPE,value), y=value))+
  geom_bar(stat="identity",aes(fill=HEALTHTYPE),position="dodge")+
  labs(title="Top 10 Event Types for Fatalaties/Injuries",x="Event Type", y="Number of Health Consequences")+
  coord_flip()

Events that have greatest economic consequences

melting data for plot reprsentation

econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "DAMAGETYPE")
econ_consequences[, DAMAGETYPE := ordered(DAMAGETYPE, levels = c("CROPCOST","PROPCOST","TOTALCOST"))]
head(econ_consequences)
##               EVTYPE DAMAGETYPE        value
## 1:             FLOOD   PROPCOST 144657709807
## 2: HURRICANE/TYPHOON   PROPCOST  69305840000
## 3:           TORNADO   PROPCOST  56947380677
## 4:       STORM SURGE   PROPCOST  43323536000
## 5:              HAIL   PROPCOST  15735267513
## 6:       FLASH FLOOD   PROPCOST  16822673979
library(ggplot2)
options(scipen=999)
ggplot(econ_consequences, aes(x=reorder(EVTYPE,value), y=value))+
  geom_bar(stat="identity",aes(fill=DAMAGETYPE),position="dodge")+
  labs(title="Top 10 Event Types impacts economy",x="Event Type", y="Cost of Impact")+
  coord_flip()

options(scipen=0)