Harmful Events and Economics

Synopsis

The basic goal of this analysis is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm Database and help answer the following questions:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

THe NOAA Storm Database is have records of events from 1950 to 2011. More recent years should be more complete.

Data Processing

There should be a section titled Data Processing which describes (in words and code) how the data were loaded into R and processed for analysis. In particular, your analysis must start from the raw CSV file containing the data. You cannot do any preprocessing outside the document. If preprocessing is time-consuming you may consider using the cache = TRUE option for certain code chunks.

Loading and preprocessing the data

Load necessary libraries:

library(R.utils)
library(car)
library(ggplot2)

Get data set from the link given in the assignment:

if (!file.exists("data.csv")) {
  fileurl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(fileurl, destfile = "data.bz2", method="curl")
  bunzip2("data.bz2",destname='data.csv')
}
data <- read.csv('data.csv')

Calculate the damage value of each record and calculate the dollar amount as a number for properties and crops

data$PROPDMGDOL <- recode(data$PROPDMGEXP,"'0'=1;'1'=10;'2'=100;'3'=1000;'4'=10000;'5'=100000;'6'=1000000;'7'=10000000;'8'=100000000;'B'=1000000000;'h'=100;'H'=100;'K'=1000;'m'=1000000;'M'=1000000;'-'=0;'?'=0;'+'=0", as.factor.result = FALSE, as.numeric.result=TRUE)
data$PROPDMGDOL <- as.numeric(data$PROPDMGDOL)*as.numeric(data$PROPDMG)

data$CROPDMGDOL <- recode(data$CROPDMGEXP,"'0'=1;'1'=10;'2'=100;'3'=1000;'4'=10000;'5'=100000;'6'=1000000;'7'=10000000;'8'=100000000;'B'=1000000000;'h'=100;'H'=100;'K'=1000;'m'=1000000;'M'=1000000;'-'=0;'?'=0;'+'=0", as.factor.result = FALSE, as.numeric.result=TRUE)
data$CROPDMGDOL <- as.numeric(data$CROPDMGDOL)*as.numeric(data$CROPDMG)
## Warning: NAs introduced by coercion

Clean up event types by grouping them together

# Assign tornado/wind/storm event group
windGroupLabel <- "Tornado/Wind/Storm"
data$EVTYPE[grepl("torn", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grepl("dust", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel

data$EVTYPE[grep("wind", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("storm", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("rain", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("hurricane", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("hail", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("precip", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
# Assign heat event group
heatGroupLabel <- "Heat"
data$EVTYPE[grep("heat", data$EVTYPE, ignore.case = TRUE)] <- heatGroupLabel
# Assign lightning event group
lightningGroupLabel <- "Lightining"
data$EVTYPE[grep("lig", data$EVTYPE, ignore.case = TRUE)] <- lightningGroupLabel
# Assign flood event group
floodGroupLabel <- "Flood"
data$EVTYPE[grep("flood", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("tsunami", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("fld", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("drizzle", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
# Assign fire event group
fireGroupLabel <- "Fire"
data$EVTYPE[grep("fire", data$EVTYPE, ignore.case = TRUE)] <- fireGroupLabel
# Assign snow event group
snowGroupLabel <- "Snow"
data$EVTYPE[grep("snow", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel
data$EVTYPE[grep("avalanche", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel
data$EVTYPE[grep("blizzard", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel

# Assign rip currents event group
rcGroupLabel <- "Rip currents/Surf/Sea"
data$EVTYPE[grep("current", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("astro", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("surf", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("sea", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("marine", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("water", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("high s", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel


# Assign fog event group
fogGroupLabel <- "Rip currents"
data$EVTYPE[grep("fog", data$EVTYPE, ignore.case = TRUE)] <- fogGroupLabel
# Assign cold event group
coldGroupLabel <- "Cold"
data$EVTYPE[grep("cold", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("winter", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("glaze", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("icy", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("ice", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("frost", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("freez", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("wint", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel

# Assign landslide event group
landslideGroupLabel <- "Landslide"
data$EVTYPE[grep("land", data$EVTYPE, ignore.case = TRUE)] <- landslideGroupLabel
data$EVTYPE[grep("mud", data$EVTYPE, ignore.case = TRUE)] <- landslideGroupLabel

Compare the damage caused by event

dmgdata <- aggregate(as.numeric(data$PROPDMGDOL) ~ data$EVTYPE, FUN=sum, data=data)
colnames(dmgdata) <- c('Type','Cost')
attach(dmgdata)
mostcost <- dmgdata[order(Cost),] 
detach(dmgdata)

Results

Plot the events showing the amount of economic cost. I will use a log to scale the costs.

ggplot(data = dmgdata, aes(x = Type, y = log(Cost), 
    fill = Cost)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 90)) + 
    labs(title = "Most costly disasters") + 
    labs(y = "log(USD) in damage") + labs(x = "")
## Warning: Stacking not well defined when ymin != 0