This report studies the impact of storm related events in the United States from 1955 to 2011. The storm related events are studied considering the impact to population health and economy.
The following questions are addressed in this study.
This study shows that:
The data in this study is collected by U.S. National Oceanic and Atmospheric Administration’s (NOAA). The NOAA storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The NOAA storm data in this study is stored in a compressed comma-separated-value file. The data is loaded into R using the read.table() command.
if(!file.exists("./repdata-data-StormData.csv.bz2")){
file.create("./repdata-data-StormData.csv.bz2")
URL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "./repdata-data-StormData.csv.bz2"
download.file(URL, destfile)
}
stormData <- read.table("repdata-data-StormData.csv.bz2", header = TRUE, sep=",")
# Transformation of Data
# The values in PROPDMG and CROPDMG columns should be multiplied by a multiplier based on the
# values in PROPDMGEXP, and CROPDMGEXP columns respectively. The multiplier codes are defined
# below:
#
# PROPDMGEXP, and CROPDMGEXP column codes
# ------------------------------------------
# H,h = hundreds = 100
# K,k = kilos = thousands = 1,000
# M,m = millions = 1,000,000
# B,b = billions = 1,000,000,000
# (+) = 1
# (-) = 0
# (?) = 0
# blank/empty character = 0
# numeric 0..8 = 10
# ------------------------------------------
# Adding a column as multiplier to PROPDMG column and populating it based on the PROPDMGEXP
stormData[stormData$PROPDMGEXP == "-" | stormData$PROPDMGEXP =="?", "PROPDMG_MULTIPLIER"] <- 0.0
stormData[stormData$PROPDMGEXP == "+", "PROPDMG_MULTIPLIER"] <- 1.0
stormData[stormData$PROPDMGEXP %in% "0":"8", "PROPDMG_MULTIPLIER"] <- 10.0
stormData[stormData$PROPDMGEXP == "H" | stormData$PROPDMGEXP =="h", "PROPDMG_MULTIPLIER"] <- 100.0
stormData[stormData$PROPDMGEXP == "K" | stormData$PROPDMGEXP =="k", "PROPDMG_MULTIPLIER"] <- 1000.0
stormData[stormData$PROPDMGEXP == "M" | stormData$PROPDMGEXP =="m", "PROPDMG_MULTIPLIER"] <- 1000000.0
stormData[stormData$PROPDMGEXP == "B" | stormData$PROPDMGEXP =="b", "PROPDMG_MULTIPLIER"] <- 1000000000.0
stormData[is.na(stormData$PROPDMG_MULTIPLIER), "PROPDMG_MULTIPLIER"] <- 0.0
# Adding a column as multiplier to CROPDMG column and populating it based on the CROPDMGEXP
stormData[stormData$CROPDMGEXP == "-" | stormData$CROPDMGEXP =="?", "CROPDMG_MULTIPLIER"] <- 0.0
stormData[stormData$CROPDMGEXP == "+", "CROPDMG_MULTIPLIER"] <- 1.0
stormData[stormData$CROPDMGEXP %in% "0":"8", "CROPDMG_MULTIPLIER"] <- 10.0
stormData[stormData$CROPDMGEXP == "H" | stormData$CROPDMGEXP =="h", "CROPDMG_MULTIPLIER"] <- 100.0
stormData[stormData$CROPDMGEXP == "K" | stormData$CROPDMGEXP =="k", "CROPDMG_MULTIPLIER"] <- 1000.0
stormData[stormData$CROPDMGEXP == "M" | stormData$CROPDMGEXP =="m", "CROPDMG_MULTIPLIER"] <- 1000000.0
stormData[stormData$CROPDMGEXP == "B" | stormData$CROPDMGEXP =="b", "CROPDMG_MULTIPLIER"] <- 1000000000.0
stormData[is.na(stormData$CROPDMG_MULTIPLIER), "CROPDMG_MULTIPLIER"] <- 0.0
Fatalities, Injuries, Property Damages, and Crop Damages caused by the storms have been studied in this section. The results are summarized using the R code below.
library(ggplot2)
library(dplyr)
library(gridExtra)
# Preparing summaries of impact of different events on population health and economy.
# FATALITIES
eventFatalities <- stormData %>%
group_by(EVTYPE) %>%
summarize(total = sum(FATALITIES)) %>%
arrange(desc(total))
# INJURIES
eventInjuries <- stormData %>%
group_by(EVTYPE) %>%
summarize(total = sum(INJURIES)) %>%
arrange(desc(total))
# PROPERTY DAMAGE
eventPropertyDamage <- stormData %>%
group_by(EVTYPE) %>%
summarize(total = sum(PROPDMG * PROPDMG_MULTIPLIER)) %>%
arrange(desc(total))
# CROP DAMAGE
eventCropDamage <- stormData %>%
group_by(EVTYPE) %>%
summarize(total = sum(CROPDMG * CROPDMG_MULTIPLIER)) %>%
arrange(desc(total))
Plots for Fatalities, Injuries, Property Damages, and Crop Damages are prepared in the following R code block.
# Preparing to plot data
plotFatalities <- ggplot(eventFatalities[c(1:10), ] ) +
aes(reorder(EVTYPE, -total), total) +
geom_bar(stat="identity", fill="blue") +
theme_bw() +
xlab("Event") +
ylab("Fatalities") +
theme(axis.text.x = element_text(angle = 90))
plotInjuries <- ggplot(eventInjuries[c(1:10), ] ) +
aes(reorder(EVTYPE, -total), total) +
geom_bar(stat="identity", fill="blue") +
theme_bw() +
xlab("Event") +
ylab("Injuries") +
theme(axis.text.x = element_text(angle = 90))
plotProperty <- ggplot(eventPropertyDamage[c(1:10), ] ) +
aes(reorder(EVTYPE, -total), total/1e9) +
geom_bar(stat="identity", fill="blue") +
theme_bw() +
xlab("Event") +
ylab("Property Damage - $ Billion") +
theme(axis.text.x = element_text(angle = 90))
plotCrop <- ggplot(eventCropDamage[c(1:10), ] ) +
aes(reorder(EVTYPE, -total), total/1e9) +
geom_bar(stat="identity", fill="blue") +
theme_bw() +
xlab("Event") +
ylab("Crop Damage - $ Billion") +
theme(axis.text.x = element_text(angle = 90))
# Plotting Data for Population Health Impact
grid.arrange(plotFatalities, plotInjuries, ncol = 2)
# Plotting Data for Economic Impact
grid.arrange(plotProperty, plotCrop, ncol = 2)
Based on the above graphs we conclude: