Synopsis:

The research includes the analysis of economic and population health consequences caused by major storms and weather events in the US. The goal of this reasearch is to explore the NOAA Storm Database and answer the questions: 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences? For calculation of economic consequences, the property and crop damage from each event type was summarised (indicated in US dollars). For calculation of population health consequences, the total numbers of fatalities and injuries were used (indicated in number of people).


Data processing:

data <- read.csv("repdata-data-StormData.csv.bz2")
library(dplyr)
library(ggplot2)
# Summarising data by Event type, total fatalities and injuries for Q1
health.data <- data %.%
    group_by(EVTYPE) %.%
    summarise(fatalities = sum(FATALITIES, na.rm = T), injuries = sum(INJURIES, na.rm = T)) %.%
    mutate(fatalities.p = round(fatalities / sum(fatalities) * 100, 2), injuries.p = round(injuries / sum(injuries) * 100, 2))

# Cleaning and summarizing data for Q2
data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$PROPDMGEXP[data$PROPDMGEXP == "B"] <- "9"
data$PROPDMGEXP[tolower(data$PROPDMGEXP) == "h"] <- "2"
data$PROPDMGEXP[data$PROPDMGEXP == "K"] <- "3"
data$PROPDMGEXP[tolower(data$PROPDMGEXP) == "m"] <- "6"
data$PROPDMGEXP[data$PROPDMGEXP %in% c("", "-", "?", "+")] <- "0"
data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)
data$propertydamage <- data$PROPDMG * 10 ^ data$PROPDMGEXP

data$CROPDMGEXP <- as.character(data$CROPDMGEXP)
data$CROPDMGEXP[data$CROPDMGEXP == "B"] <- "9"
data$CROPDMGEXP[tolower(data$CROPDMGEXP) == "k"] <- "3"
data$CROPDMGEXP[tolower(data$CROPDMGEXP) == "m"] <- "6"
data$CROPDMGEXP[data$CROPDMGEXP %in% c("", "?")] <- "0"
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)
data$cropdamage <- data$CROPDMG * 10 ^ data$CROPDMGEXP

data$totaldamage <- data$propertydamage + data$cropdamage

damage.data <- data %.%
    group_by(EVTYPE) %.%
    summarise(damage = sum(totaldamage, na.rm = T)) %.%
    mutate(damage.p = round(damage / sum(damage, na.rm = T) * 100, 2))

Results:

Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

qplot(log(fatalities), log(injuries), data = arrange(health.data, -fatalities, -injuries)[1:10, ], color = EVTYPE, alpha=I(0.5), size = I(5), main = "Injuries and fatalities from different events") + geom_text(aes(label=EVTYPE), size=2, hjust=1, vjust=-1, angle = 45)

plot of chunk healthplot

Top 10 events with most number of fatalities:

arrange(health.data, -fatalities)[1:10, c(1,2,4)]
## Source: local data frame [10 x 3]
## 
##            EVTYPE fatalities fatalities.p
## 1         TORNADO       5633        37.19
## 2  EXCESSIVE HEAT       1903        12.57
## 3     FLASH FLOOD        978         6.46
## 4            HEAT        937         6.19
## 5       LIGHTNING        816         5.39
## 6       TSTM WIND        504         3.33
## 7           FLOOD        470         3.10
## 8     RIP CURRENT        368         2.43
## 9       HIGH WIND        248         1.64
## 10      AVALANCHE        224         1.48

EVTYPE - type of event, fatalities - number of fatalities, fatalities.p - percent of fatalities from each type

Top 10 events with most number of injuries:

arrange(health.data, -injuries)[1:10, c(1,3,5)]
## Source: local data frame [10 x 3]
## 
##               EVTYPE injuries injuries.p
## 1            TORNADO    91346      65.00
## 2          TSTM WIND     6957       4.95
## 3              FLOOD     6789       4.83
## 4     EXCESSIVE HEAT     6525       4.64
## 5          LIGHTNING     5230       3.72
## 6               HEAT     2100       1.49
## 7          ICE STORM     1975       1.41
## 8        FLASH FLOOD     1777       1.26
## 9  THUNDERSTORM WIND     1488       1.06
## 10              HAIL     1361       0.97

EVTYPE - type of event, injuries - number of injuries, injuries.p - percent of injuries from each type


Q2. Across the United States, which types of events have the greatest economic consequences?

damage.data2 <- arrange(damage.data, -damage)[1:10, c(1,2,3)]

ggplot(data = damage.data2, aes(x=EVTYPE, y = damage)) + geom_bar(stat = "identity", aes(fill = EVTYPE)) + labs(title = "Top 10 events with most damage", x = "Events", y = "Damage, USD") + scale_fill_hue(guide = F) + geom_text(aes(label=EVTYPE), size=2.5, hjust=0, vjust=-1, angle = 90)

plot of chunk damageplot

Top 10 events with most damage:

damage.data2
## Source: local data frame [10 x 3]
## 
##               EVTYPE    damage damage.p
## 1              FLOOD 1.503e+11    31.49
## 2  HURRICANE/TYPHOON 7.191e+10    15.07
## 3            TORNADO 5.736e+10    12.02
## 4        STORM SURGE 4.332e+10     9.08
## 5               HAIL 1.876e+10     3.93
## 6        FLASH FLOOD 1.824e+10     3.82
## 7            DROUGHT 1.502e+10     3.15
## 8          HURRICANE 1.461e+10     3.06
## 9        RIVER FLOOD 1.015e+10     2.13
## 10         ICE STORM 8.967e+09     1.88

EVTYPE - type of event, damage - total amount of damage in USD, damage.p - percent of damage from each type