Synopsis

The goal of this project is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOOA) Storm Database and answer some basic questions about severe weather events, for example:

The database tracks charateristics of major storms and weather events in the United States from 1950 to 2011, including when and where they occur, as well as estimates of any fatalities, injures, and property damage.

Data and Documentation

Data Processing

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "stormData.csv.bz2", method = "curl")
storm <- read.table("stormData.csv.bz2", header = T, sep = ",", stringsAsFactors = F)
stormReduce <- subset(storm, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG))
stormReduce <- subset(stormReduce,PROPDMG != 0 | CROPDMG != 0 | FATALITIES != 0 | INJURIES !=0)
library(stringr)
stormReduce$EVTYPE <- tolower(str_trim(stormReduce$EVTYPE))
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
storm_HealthDamage <- stormReduce %>% group_by(EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES))
# Calculate top 10 damages 
storm_HealthDamage$DAMAGE <- storm_HealthDamage$FATALITIES + storm_HealthDamage$INJURIES
top10HealthDamage <- head(storm_HealthDamage[order(storm_HealthDamage$DAMAGE, decreasing = T),],10)
top10HealthDamage$EVTYPE <- as.factor(top10HealthDamage$EVTYPE)
# Reorder factor based on value of damage 
top10HealthDamage$EVTYPE <- reorder(top10HealthDamage$EVTYPE, top10HealthDamage$DAMAGE, sum)
top10HealthDamage
## Source: local data frame [10 x 4]
## 
##               EVTYPE FATALITIES INJURIES DAMAGE
## 1            tornado       5633    91346  96979
## 2     excessive heat       1903     6525   8428
## 3          tstm wind        504     6957   7461
## 4              flood        470     6789   7259
## 5          lightning        816     5230   6046
## 6               heat        937     2100   3037
## 7        flash flood        978     1777   2755
## 8          ice storm         89     1975   2064
## 9  thunderstorm wind        133     1488   1621
## 10      winter storm        206     1321   1527
storm_EcoDamage <- stormReduce %>% group_by(EVTYPE) %>% summarise(PROPDMG = sum(PROPDMG), CROPDMG = sum(CROPDMG))
storm_EcoDamage$DAMAGE <- storm_EcoDamage$PROPDMG + storm_EcoDamage$CROPDMG
top10EcoDamage <- head(storm_EcoDamage[order(storm_EcoDamage$DAMAGE, decreasing = T),], 10)
top10EcoDamage$EVTYPE <-  factor(top10EcoDamage$EVTYPE)
top10EcoDamage$EVTYPE <- reorder(top10EcoDamage$EVTYPE, top10EcoDamage$DAMAGE, sum)
top10EcoDamage
## Source: local data frame [10 x 4]
## 
##                EVTYPE   PROPDMG   CROPDMG    DAMAGE
## 1             tornado 3212258.2 100018.52 3312276.7
## 2         flash flood 1420174.6 179200.46 1599375.1
## 3           tstm wind 1336103.6 109202.60 1445306.2
## 4                hail  688693.4 579596.28 1268289.7
## 5               flood  899938.5 168037.88 1067976.4
## 6   thunderstorm wind  876844.2  66791.45  943635.6
## 7           lightning  603351.8   3580.61  606932.4
## 8  thunderstorm winds  446293.2  18684.93  464978.1
## 9           high wind  324731.6  17283.21  342014.8
## 10       winter storm  132720.6   1978.99  134699.6
library(reshape2)
top10HealthDamage <- melt(top10HealthDamage[,1:3], id.vars = "EVTYPE", variable.name = "DAMAGE_TYPE", value.name = "DAMAGE_COUNT")
top10EcoDamage <- melt(top10EcoDamage[,1:3], id.vars = "EVTYPE", variable.name = "DAMAGE_TYPE", value.name = "DAMAGE_COUNT")
# Change to descriptive label
top10EcoDamage$DAMAGE_TYPE <- factor(top10EcoDamage$DAMAGE_TYPE, labels = c("PROPERTY_DAMAGE", "CROP_DAMAGE"))

Result

Top 10 Harmful Events to Population Health

library(ggplot2)
ggplot(top10HealthDamage, aes(EVTYPE, DAMAGE_COUNT, fill = DAMAGE_TYPE)) + geom_bar(stat = "identity", position = "dodge") + coord_flip()

Top 10 Harmful Events to Economy

ggplot(top10EcoDamage, aes(EVTYPE, DAMAGE_COUNT, fill = DAMAGE_TYPE)) + geom_bar(stat = "identity", position = "dodge") + coord_flip()