Synopsis

The basic goal of this study is to explore what problems storms and other severe weather events can cause for communities and municipalities. It is based on the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm data which includes information about characteristics of major storms and weather events in the United States, when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

We discovered that excessive heat and tornado are most harmful with respect to population health, while flood and hurricane/typhoon have the greatest economic consequences.

# load libraries
library(knitr)
## Warning: package 'knitr' was built under R version 3.2.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(plyr)

Data Processing

# Loading data
data_raw <- read.csv(bzfile("~/datasciencecoursera/RepData_PeerAssessment2/stormData.csv.bz2"), sep=",", header=T)
# select a subset of data containing information about event type, fatalities, injuries, property and crop damage
data_sub <- data_raw[,c('EVTYPE','FATALITIES','INJURIES','PROPDMG','PROPDMGEXP','CROPDMG','CROPDMGEXP')]
# update property damage with correct numbers: H to 100, K to 1000, M to 1000000, B to 1000000000
data_sub[data_sub$PROPDMGEXP %in% c("H","h"), ]$PROPDMG <- data_sub[data_sub$PROPDMGEXP %in% c("H","h"), ]$PROPDMG * 10^2
data_sub[data_sub$PROPDMGEXP %in% c("K","k"), ]$PROPDMG <- data_sub[data_sub$PROPDMGEXP %in% c("K","k"), ]$PROPDMG * 10^3
data_sub[data_sub$PROPDMGEXP %in% c("M","m"), ]$PROPDMG <- data_sub[data_sub$PROPDMGEXP %in% c("M","m"), ]$PROPDMG * 10^6
data_sub[data_sub$PROPDMGEXP %in% c("B","b"), ]$PROPDMG <- data_sub[data_sub$PROPDMGEXP %in% c("B","b"), ]$PROPDMG * 10^9
# update crop damage with correct numbers: H to 100, K to 1000, M to 1000000, B to 1000000000
data_sub[data_sub$CROPDMGEXP %in% c("H","h"), ]$CROPDMG <- data_sub[data_sub$CROPDMGEXP %in% c("H","h"), ]$CROPDMG * 10^2
data_sub[data_sub$CROPDMGEXP %in% c("K","k"), ]$CROPDMG <- data_sub[data_sub$CROPDMGEXP %in% c("K","k"), ]$CROPDMG * 10^3
data_sub[data_sub$CROPDMGEXP %in% c("M","m"), ]$CROPDMG <- data_sub[data_sub$CROPDMGEXP %in% c("M","m"), ]$CROPDMG * 10^6
data_sub[data_sub$CROPDMGEXP %in% c("B","b"), ]$CROPDMG <- data_sub[data_sub$CROPDMGEXP %in% c("B","b"), ]$CROPDMG * 10^9

Results

The data analysis addresses the following questions:

  1. Across the United States, which types of events are most harmful with respect to population health?

Figures below show that tornados and excessive heat are the most harmfull events on population health based on the amount of fatalities and injuries.

# aggregate number of fatalities by event type
fatal_per_event <- aggregate(FATALITIES ~ EVTYPE, data=data_sub, sum)
# select only 10 events with highest amount of fatalities
max_fatal_per_event <- head(fatal_per_event[order(fatal_per_event$FATALITIES, decreasing=TRUE), ], 10)
# plot a histogram of most harmful weather events with regards to number of fatalities
ggplot(max_fatal_per_event, aes(x = EVTYPE, y = FATALITIES)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Fatalities by Top 10 Weather Events")

# aggregate number of injuries by event type
inj_per_event <- aggregate(INJURIES ~ EVTYPE, data=data_sub, sum)
# select only 10 events with highest amount of fataities
max_inj_per_event <- head(inj_per_event[order(inj_per_event$INJURIES, decreasing=TRUE), ], 10)
# plot a histogram with number of fatalities by event type
ggplot(max_inj_per_event, aes(x = EVTYPE, y = INJURIES)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Injuries") + ggtitle("Injuries by Top 10 Weather Events")

  1. Across the United States, which types of events have the greatest economic consequences ?

Most economic demage are done by flood which is followed by hurricane/typhoon and then tornado as shown in a figure bellow.

# aggregate crop and property damage by event type
damage_per_type <- aggregate(PROPDMG + CROPDMG ~ EVTYPE, data=data_sub, sum)
names(damage_per_type) = c("EVTYPE", "TotalDamage")
# select only 10 events with highest damages
max_damage_per_type <- head(damage_per_type[order(damage_per_type$TotalDamage, decreasing=TRUE), ], 10)
# plot a histogram with damages by event type
ggplot(max_damage_per_type, aes(x = EVTYPE, y = TotalDamage)) + 
    geom_bar(stat = "identity", fill = "blue") + 
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
    xlab("Event Type") + ylab("Damages, $") + ggtitle("Property and Crop Damages by Top 10 Weather Events")