Synopsis

This is an analysis of the NOAA data for the Course project. To find which events are most harmful to population health we are going to take two variables FATALITIES and INJURIES per event.

For economic damage, we will look at the variables PROPDMG and CROPDMG.

We will remove the ones which cause 0 health issues/damage to narrow down the results.

We will also plot the most significant data points for clarity.

Data Processing

1. Read the data

SD <- read.csv("./data/repdata%2Fdata%2FStormData.csv.bz2")

2. We will modify the EV type such that every factor is in small letters, remove - and / characters, so that similar event types are clubbed together

evtype_mod <- sapply(SD$EVTYPE, tolower)
evtype_mod <- gsub(" ", "", evtype_mod)
evtype_mod <- gsub("/","", evtype_mod)
evtype_mod <- gsub("-","", evtype_mod)
SD <- cbind(SD,evtype_mod)
  • NOTE: Further processing needs to be done for accurate results, but skipped due to time constraints

For Health

3. We will first find the sum of fatalities for each event type using the plyr package.Then we sort it in descending order

library(plyr)
# Find sum of Fatalities per event
sd_fatalities <- data.frame(ddply(SD, .(evtype_mod), summarise, fatality_sum=sum(FATALITIES)))
# removing events with zero fatalities
sd_fatalities_nz <- subset(sd_fatalities, fatality_sum > 0 )
head(sd_fatalities_nz[order(-sd_fatalities_nz$fatality_sum),])
##        evtype_mod fatality_sum
## 708       tornado         5633
## 104 excessiveheat         1903
## 125    flashflood          978
## 223          heat          937
## 383     lightning          816
## 730      tstmwind          504

4. Same thing with injuries

sd_injuries <- data.frame(ddply(SD, .(evtype_mod), summarise, injury_sum=sum(INJURIES)))
sd_injuries_nz <- subset(sd_injuries, injury_sum > 0 )
head(sd_injuries_nz[order(-sd_injuries_nz$injury_sum),])
##        evtype_mod injury_sum
## 708       tornado      91346
## 730      tstmwind       6957
## 138         flood       6789
## 104 excessiveheat       6525
## 383     lightning       5230
## 223          heat       2100

Economic Impact

5. we first look at property damage

library(plyr)
library(ggplot2)
sd_propdmg <- data.frame(ddply(SD, .(evtype_mod), summarise, propertydamage_sum=sum(PROPDMG)))
sd_propdmg_nz <- subset(sd_propdmg, propertydamage_sum > 0)
d <- head(sd_propdmg_nz[order(-(sd_propdmg_nz$propertydamage_sum)),])
d
##           evtype_mod propertydamage_sum
## 708          tornado          3212258.2
## 125       flashflood          1420674.6
## 730         tstmwind          1336103.6
## 138            flood           899938.5
## 649 thunderstormwind           876844.2
## 193             hail           688693.4
qplot(d$evtype_mod,d$propertydamage_sum, data=d, color = "red", geom = "density", xlab = "Event Type", ylab = "Property Damage (in USD)")

6. Then we look at crop damage

library(plyr)
sd_cropdmg <- data.frame(ddply(SD, .(evtype_mod), summarise, cropdamage_sum=sum(CROPDMG)))
sd_cropdmg_nz <- subset(sd_cropdmg, cropdamage_sum > 0)
cd <- head(sd_cropdmg_nz[order(-(sd_cropdmg_nz$cropdamage_sum)),])
cd
##           evtype_mod cropdamage_sum
## 193             hail      579596.28
## 125       flashflood      179200.46
## 138            flood      168037.88
## 730         tstmwind      109202.60
## 708          tornado      100018.52
## 649 thunderstormwind       66791.45
qplot(cd$evtype_mod,cd$cropdamage_sum, data=d, color = "red", geom = "density", xlab = "Event Type", ylab = "Crop Damage (in USD)")

Results

Following are the results