Looking at the storm data from National Weather Service from 1950 to 2011, it's clear to us that across all the natural disaster recorded in the US, Tornadoes might be the most damaing natural disaster–both to the population health and to the economy.
When we count injuries and life lost as an indicator to population health harms across all states, Tornadoes are accounted for almost 100K harmful incidents, far exceed the next most damaging natural disaster–excessive heat–by 10 times.
Similarly, Tornadoes contribute to the greatest loss of the US economy amongst all natural events, accounting for more than 3 billion damages (including properties and crops) from 1950 to 2011– 3 times higher than flash flood, the second most damaging natural disaster.
#Download storm data
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="./stormData.csv.bz2", method="curl")
#Read and clean data
data=read.csv(bzfile("./stormData.csv.bz2", "rt"))
data2=data[-grep("Summary.*", data$EVTYPE),]
#Counting sum of harms to population health and economic damages
data2$pop_health=rowSums(data2[,c("FATALITIES","INJURIES")])
data2$econ_dmg=rowSums(data2[,c("PROPDMG","CROPDMG")])
harmCount=sort(tapply(data2$pop_health, data2$EVTYPE, sum, na.rm=T), dec=T)
harmCount_nonZero=harmCount[harmCount!=0]
harmCount_final=data.frame(event=names(harmCount_nonZero), harms=harmCount_nonZero,row.names=NULL)
harmCount_final$event=factor(harmCount_final$event, levels=harmCount_final$event[order(harmCount_final$harm, decreasing=T)])
ggplot(harmCount_final[1:5,], aes(y=harms ,x=event))+
geom_histogram(stat="identity")+
ylab("Population Harms Count")+
xlab("Natural Disaster Event")+
ggtitle("Top 5 Harmful Events to Human Health")
head(harmCount_final)
## event harms
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
require(ggplot2)
require(scales)
dmgCount=sort(tapply(data2$econ_dmg, data2$EVTYPE, sum, na.rm=T), dec=T)
dmgCount_nonZero=dmgCount[dmgCount!=0]
dmgCount_final=data.frame(event=names(dmgCount_nonZero), dmg=(dmgCount_nonZero)/1000,row.names=NULL)
dmgCount_final$event=factor(dmgCount_final$event, levels=dmgCount_final$event[order(dmgCount_final$dmg, decreasing=T)])
ggplot(dmgCount_final[1:5,], aes(y=dmg ,x=event))+
geom_histogram(stat="identity")+
scale_y_continuous(labels = dollar)+
ylab("Sum of Economic Loss (Million USD)")+
xlab("Natural Disaster Event")+
ggtitle("Top 5 Harmful Events to Economy")
head(dmgCount_final)
## event dmg
## 1 TORNADO 3312.3
## 2 FLASH FLOOD 1599.3
## 3 TSTM WIND 1445.2
## 4 HAIL 1268.3
## 5 FLOOD 1068.0
## 6 THUNDERSTORM WIND 943.6