In this report, we will explore the effect of severe weather events on communities and municipalities. We will examine the economic impact and social impact of these severe weather events using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. We will aim to answer the following questions:
We found that tornado-like events are by far the most harmful to a communities well-being, where as flooding followed by tornado-like events are the most to infrastructure. As for damages to crops, arid temperatures, flooding, followed by winter-like events are the most harmful.
Let’s start by first downloading the database:
dest.file <- "data/StormData.csv.bz2"
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", method = "curl", destfile = dest.file)
We will now read in the database. We will replace any blank fields with NA.
storm.df <- read.csv(bzfile(dest.file), header = TRUE,
stringsAsFactors = FALSE, na.strings = c(""))
The following is a function we will use to create collapse the damage cost and damage exponent fields into one.
createCost <- function(cost, exponent) {
if (is.na(exponent)) {
NA
}
else if (exponent == 'H') {
cost * 100.0
}
else if (exponent == 'K') {
cost * 1000.0
}
else if (exponent == 'M') {
cost * 1000000.0
}
else if (exponent == 'B') {
cost * 1000000000.0
}
else {
NA
}
}
Before we can begin analysis, we must first clean up the data. The EVTYPE variable was not standardized, so it appears there are a lot of duplicated/overlapping categories. We will first attempt to collapse similar categories into a single category. There isn’t an exact science to this, but I attempted come up with a parent category for each ‘sibling’ category. Here are the parent categories that I came up with:
Additionally, we will convert the date field of when the event occurred into a date object.
library(lubridate)
storm.df$BGN_DATE <- mdy_hms(storm.df$BGN_DATE)
storm.df$PROPDMGEXP <- toupper(storm.df$PROPDMGEXP)
storm.df$CROPDMGEXP <- toupper(storm.df$CROPDMGEXP)
storm.df$PROPDMGEXPCOST <- mapply(createCost, storm.df$PROPDMG, storm.df$PROPDMGEXP)
storm.df$CROPDMGEXPCOST <- mapply(createCost, storm.df$CROPDMG, storm.df$CROPDMGEXP)
storm.df$HUMANCOST <- storm.df$FATALITIES + storm.df$INJURIES
storm.df$CAT <- tolower(storm.df$EVTYPE)
storm.df$CAT <- gsub('.*fire.*', 'fire', storm.df$CAT)
storm.df$CAT <- gsub('.*flood.*', 'flood', storm.df$CAT)
storm.df$CAT <- gsub('.*floooding.*', 'flood', storm.df$CAT)
storm.df$CAT <- gsub('.*rain.*', 'rain', storm.df$CAT)
storm.df$CAT <- gsub('.*precipitation.*', 'rain', storm.df$CAT)
storm.df$CAT <- gsub('.*wind.*', 'wind', storm.df$CAT)
storm.df$CAT <- gsub('.*wnd.*', 'wind', storm.df$CAT)
storm.df$CAT <- gsub('.*tornado.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*funnel.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*spout.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*typhoon.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*whirlwind.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*torndao.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*gustnado.*', 'tornado', storm.df$CAT)
storm.df$CAT <- gsub('.*hurricane.*', 'hurricane', storm.df$CAT)
storm.df$CAT <- gsub('.*avalanc.*', 'landevent', storm.df$CAT)
storm.df$CAT <- gsub('.*landslide.*', 'landevent', storm.df$CAT)
storm.df$CAT <- gsub('.*mud.*', 'landevent', storm.df$CAT)
storm.df$CAT <- gsub('.*rock slide.*', 'landevent', storm.df$CAT)
storm.df$CAT <- gsub('.*blizzard.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*ice.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*cold.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*freez.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*sleet.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*wintry.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*winter.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*icy.*', 'winter', storm.df$CAT)
storm.df$CAT <- gsub('.*hail.*', 'hail', storm.df$CAT)
storm.df$CAT <- gsub('.*ocean.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*surf.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*sea.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*swell.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*tide.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*water.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*wave.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*tsunami.*', 'water', storm.df$CAT)
storm.df$CAT <- gsub('.*arid.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*hot.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*dry.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*heat.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*drought.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*driest.*', 'arid', storm.df$CAT)
storm.df$CAT <- gsub('.*thunderstorm.*', 'thunderstorm', storm.df$CAT)
storm.df$CAT <- gsub('.*lightning.*', 'thunderstorm', storm.df$CAT)
storm.df$CAT <- gsub('.*tropical storm.*', 'thunderstorm', storm.df$CAT)
storm.df$CAT <- gsub('.*ligntning.*', 'thunderstorm', storm.df$CAT)
storm.df$CAT <- gsub('.*lighting.*', 'thunderstorm', storm.df$CAT)
storm.df$CAT <- gsub('.*volcanic.*', 'volcano', storm.df$CAT)
storm.df$CAT <- gsub('^((?!(fire|flood|rain|wind|tornado|hurricane|landevent|winter|hail|water|arid|thunderstorm|volcano)).)*$', 'other', storm.df$CAT, perl = TRUE)
storm.df$CAT <- factor(storm.df$CAT)
In order to answer question 1, that is, “which types of events are most harmful with respect to population health”, we will plot the total number of injuries and factalities associated with each type of weather event in a bar plot.
library(ggplot2)
library(dplyr)
grouped <- storm.df %>%
group_by(CAT) %>%
summarize(human.cost = sum(HUMANCOST),
property.cost = sum(PROPDMGEXPCOST, na.rm = TRUE),
crop.cost = sum(CROPDMGEXPCOST, na.rm = TRUE))
ggplot(grouped, aes(x = CAT, y = human.cost)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ylab('Injuries and Fatalities') +
xlab('Weather Event') +
ggtitle('Effects of Weather Events on Human Well-Being')
It appears that tornados are by far the most damaging to humans.
Let’s plot property cost damages by weather event.
ggplot(grouped, aes(x = CAT, y = property.cost)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ylab('Cost ($)') +
xlab('Weather Event') +
ggtitle('Property Damage Costs Caused By Weather Events')
ggplot(grouped, aes(x = CAT, y = crop.cost)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ylab('Cost ($)') +
xlab('Weather Event') +
ggtitle('Crop Damage Costs Caused By Weather Events')
We found that tornado-like events are by far the most harmful to a communities well-being, where as flooding followed by tornado-like events are the most to infrastructure. As for damages to crops, arid temperatures, flooding, followed by winter-like events are the most harmful.