In the last 60 year there’s been a countless number of natural disasters which have affected the lives of American citizens in greater or lesser degree. This analysis will classify events by types and show which ones are the most dangerous. Natural disasters may affect the country in several ways, and it is honsidered herby that population health and economy are 2 of the most important factors to take into account when preparing for an event, so this analysis will take main focus on those 2 factors. This analysis will use the sotrm database from the U.S. National Oceanic and Atmospheric Administration to summarize injuries, fatalities, property and crop damage and classify events by significance and impact on civilians and economy.

Data Processing

The information used on this report was obtained from U.S. National Oceanic and Atmospheric Administration’s storm database

Get the data and load it

setInternet2(use = TRUE)
#Download data from NOAA
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2")
#Load data
storms <- read.table(bzfile("StormData.csv.bz2"), header = TRUE, sep = ",", na.strings = "", check.names = TRUE, blank.lines.skip = TRUE)  

Packages and options used

library(dplyr)
library(reshape2)
library(ggplot2)
#Disable scientific notation
options(scipen=999)

Format column names

names(storms)[1] <- "STATE.ID"
names(storms) <- tolower(make.names(names(storms), allow_ = FALSE))

Format dates

storms$bgn.date <- as.Date(storms$bgn.date, format = "%m/%d/%Y")

Rename and merge event types

storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(heat)\\b.*", 
                      replacement = "HEAT", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(thunderstorm|thunderstorms|tstm|lightning)\\b.*", 
                      replacement = "THUNDERSTORMS", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(snow|snowfall)\\b.*", 
                      replacement = "SNOW", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(rain|rains)\\b.*", 
                      replacement = "RAIN", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(hurricane|hurricanes)\\b.*", 
                      replacement = "HURRICANES", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(tornado|tornados|tornadoes)\\b.*", 
                      replacement = "TORNADOS", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(cold)\\b.*", 
                      replacement = "COLD", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(wind|winds)\\b.*", 
                      replacement = "WIND", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(flood|floods|flooding)\\b.*", 
                      replacement = "FLOODS", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(volcano|volcanoes|volcanic|lava)\\b.*", 
                      replacement = "VOLCANOES", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(storm|storms)\\b.*", 
                      replacement = "STORMS", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(current|currents)\\b.*", 
                      replacement = "CURRENTS", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(avalanche|avalanches)\\b.*", 
                      replacement = "AVALANCHES", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(hail)\\b.*", 
                      replacement = "HAIL", ignore.case = TRUE)
storms$evtype <- gsub(storms$evtype, pattern = ".*\\b(fire|fires|wildfire|wildfires)\\b.*", 
                      replacement = "FIRE", ignore.case = TRUE)
storms$evtype <- factor(storms$evtype)

Aggregate fatalities and injuries by event type

#Arrange civil impact by event type
fatals <- aggregate(cbind(fatalities,injuries) ~ evtype, data = storms, FUN=sum, na.rm = TRUE)
fatals <- arrange(fatals,desc(fatalities), desc(injuries))
#Calculate totals
fatals <- mutate(fatals, total = fatalities + injuries)

#Group 10 most significant events by injuries and deaths
meltfatals <- melt(head(fatals,10), id.vars = "evtype", measure.vars = c("fatalities","injuries"))
meltfatals <- arrange(meltfatals,desc(variable), desc(value))

Process data to calculate economic damage

#Filter out unneeded data
dmg <- filter(storms, propdmg != 0 | cropdmg != 0)
dmg <- select(storms, evtype, propdmg, propdmgexp, cropdmg, cropdmgexp)
#Filter out incomplete economic damage info
dmg <- filter(dmg,(propdmgexp %in% c("K","M","B")) | (cropdmgexp %in% c("K","M","B")))

Calculate economic damage by event type

#Calculate property damage
dmg <- mutate(dmg, properties = ifelse(as.character(propdmgexp) == "K", propdmg*1000, 
                                       ifelse(as.character(propdmgexp) == "M", propdmg*1000000, 
                                              ifelse(as.character(propdmgexp) == "B", propdmg*1000000000,0))))
#Calculate crop damage
dmg <- mutate(dmg, crops = ifelse(as.character(cropdmgexp) == "K", cropdmg*1000, 
                                  ifelse(as.character(cropdmgexp) == "M", cropdmg*1000000, 
                                         ifelse(as.character(cropdmgexp) == "B", cropdmg*1000000000,0))))
#Set NAs as 0 cost
dmg$properties[is.na(dmg$properties)] <- 0
dmg$crops[is.na(dmg$crops)] <- 0

Aggregate material damage

econo <- aggregate(cbind(properties,crops) ~ evtype, data = dmg, FUN=sum, na.rm = TRUE)
#Calculate total damage in billions of dollars
econo <- mutate(econo, total = properties + crops)
econo <- arrange(econo,desc(total))

#Group 10 most significant events by damage type 
economelt <- melt(head(econo,10), id.vars = "evtype", measure.vars = c("properties","crops"))
#Transform data representation from dollars to millions of dollars
economelt$value <- economelt$value / 1000000

Results

Injuries and Deaths

Classification of the relevance of natural events with respect to population health:

ggplot(meltfatals, aes(reorder(evtype,-value), value, fill = variable)) + 
    geom_bar(stat = "identity", aes(y = value)) +
    theme(axis.text.x = element_text(angle = 90, vjust=0.5)) + 
    labs(x = "Event", y = "Civilians impacted", title = "Figure 1: Civil Impact") +
    scale_y_continuous(breaks=seq(0, 100000, by=10000))

Figue 1. Top 10 natural events causing the biggest number of deaths and injuries from 1960 to 2011

Economic Impact

Classification of the relevance of natural events with respect to economic consequences:

ggplot(economelt, aes(reorder(evtype,-value), value, fill = variable)) +
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
    labs(x = "Event", y = "Damages (Million $)", title = "Figure 2: Economic Damage") + 
    scale_y_continuous(breaks=seq(0, 200000, by=20000))

Figure 2. Top 10 natural events causing the biggest impact in terms of propery and crop damage from 1960 to 2011

Summary

In summary, we can see Tornados, Floods, Thunderstorms, Hurricanes and Storms as some of the most destructive and deadliest events in nature.