Reproducible Research: Peer Assessment 2. NOAA. The economic and health impact caused by major storms and weather events

Abstract

The use of metrics for the economic and helth implications of major storms and weather events is crucial.In this study we analize data from the U.S. National Oceanic and Atmospheric Administration's (NOAA) to find the major types of disaster with the highest economic and health impact.

Loading and preprocessing the data

Reading the data

We first read the data inlcuded in the zip file. The data is in csv format

library(ggplot2)
library(stringdist)

## Loading required package: parallel


setInternet2(TRUE)
f <- tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", 
    f)
data <- read.csv(bzfile(f))

Preprocessing the data

We used the categories in NOAA directives (http://www.nws.noaa.gov/directives/.) to transform the 985 event types (EVTYPE) to a more comprehensible set of 48 categories

categories <- c("astronomical low tide", "avalanche", "blizzard", "coastal flood", 
    "cold/wind chill", "debris flow", "dense fog", "dense smoke", "drought", 
    "dust devil", "dust storm", "excessive heat", "extreme cold/wind chill", 
    "flash flood", "flood", "frost/freeze", "funnel cloud", "freezing fog", 
    "hail", "heat", "heavy rain", "heavy snow", "high surf", "high wind", "hurricane (typhoon)", 
    "ice storm", "lake-effect snow", "lakeshore flood", "lightning", "marine hail", 
    "marine high wind", "marine strong wind", "marine thunderstorm wind", "rip current", 
    "seiche", "sleet", "storm surge/tide", "strong wind", "thunderstorm wind", 
    "tornado", "tropical depression", "tropical storm", "tsunami", "volcanic ash", 
    "waterspout", "wildfire", "winter storm", "winter weather")

The types of disaster (EVTYPE) need to be cleaned. We subset only the top types of disaster where fatalities>100 or injuries>100

fatalities <- aggregate(FATALITIES ~ EVTYPE, data = data, FUN = sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = data, FUN = sum)

topfatal <- fatalities[fatalities$FATALITIES > 100, ]
# topfatal$FATALITIES
topinjuries <- injuries[injuries$INJURIES > 100, ]

topfatal <- topfatal[order(topfatal$FATALITIES, decreasing = TRUE), ]
topfatal$EVTYPE <- tolower(as.character(topfatal$EVTYPE))

topinjuries <- topinjuries[order(topinjuries$INJURIES, decreasing = TRUE), ]
topinjuries$EVTYPE <- tolower(as.character(topinjuries$EVTYPE))

We found three types of disaster without clear correspondence, we use the following values from NOAAA directives

topinjuries$EVTYPE[topinjuries$EVTYPE == "tstm wind"] <- "thunderstorm wind"
topinjuries$EVTYPE[topinjuries$EVTYPE == "fog"] <- "dense fog"
topinjuries$EVTYPE[topinjuries$EVTYPE == "glaze"] <- "ice storm"

types <- unique(c(topfatal$EVTYPE, topinjuries$EVTYPE))

Then we perform a substitution of the values of EVTYPE in stormdata with those sugested in NOAA directives. We used two different methods for string similiraty in order to have complementary results.

correspond <- data.frame(original = types, recommended = NA)
for (i in 1:length(types)) {
    if (length(grep(types[i], categories)) == TRUE) {
        correspond$recommended[i] <- grep(types[i], categories, perl = TRUE, 
            value = TRUE)
    } else {
        correspond$recommended[i] <- categories[amatch(types[i], categories, 
            maxDist = ceiling(nchar(types[i])/2))]
    }

}

for (i in 1:nrow(topfatal)) {
    topfatal$EVTYPE[i] <- correspond$recommended[which(topfatal$EVTYPE[i] == 
        correspond$original)]
}
for (i in 1:nrow(topinjuries)) {
    topinjuries$EVTYPE[i] <- correspond$recommended[which(topinjuries$EVTYPE[i] == 
        correspond$original)]
}

After we aggregate EVTYPE to form our two final sets

aggfatalities <- aggregate(FATALITIES ~ EVTYPE, data = topfatal, FUN = sum)
aggfatalities$type <- "fatal"
agginjuries <- aggregate(INJURIES ~ EVTYPE, data = topinjuries, FUN = sum)
agginjuries$type <- "injury"

events <- merge(aggfatalities, agginjuries, all.x = T, all.y = T)
events[is.na(events)] <- 0
events$incidents <- events$FATALITIES + events$INJURIES
events <- events[, c(1, 2, 5)]

Economic impact

We transform all the values in PROPDMGEXP and CROPDMGEXP, if we are not sure about the meaning of the symbol we assume is a value of one in order to no perturb the original value in PROPDMG and CROPDMG

data2 <- data
data2$PROPDMGEXP <- as.character(data2$PROPDMGEXP)
data2$CROPDMGEXP <- as.character(data2$CROPDMGEXP)
data2$PROPDMGEXP[data2$PROPDMGEXP == "k" | data2$PROPDMGEXP == "K"] <- 1000
data2$PROPDMGEXP[data2$PROPDMGEXP == "m" | data2$PROPDMGEXP == "M"] <- 1e+06
data2$PROPDMGEXP[data2$PROPDMGEXP == "b" | data2$PROPDMGEXP == "B"] <- 1e+09
data2$PROPDMGEXP <- as.numeric(data2$PROPDMGEXP)

## Warning: NAs introduced by coercion

data2$PROPDMGEXP[is.na(data2$PROPDMGEXP) | data2$PROPDMGEXP == 0] <- 1
data2$CROPDMGEXP[data2$CROPDMGEXP == "k" | data2$CROPDMGEXP == "K"] <- 1000
data2$CROPDMGEXP[data2$CROPDMGEXP == "m" | data2$CROPDMGEXP == "M"] <- 1e+06
data2$CROPDMGEXP[data2$CROPDMGEXP == "b" | data2$CROPDMGEXP == "B"] <- 1e+09
data2$CROPDMGEXP <- as.numeric(data2$CROPDMGEXP)

## Warning: NAs introduced by coercion

data2$CROPDMGEXP[is.na(data2$CROPDMGEXP) | data2$CROPDMGEXP == 0] <- 1

We updated PROPDMG and CROPDMG multiplying by their corresponding PROPDMG and CROPDMG

data2$PROPDMG <- data2$PROPDMG * data2$PROPDMGEXP
data2$CROPDMG <- data2$CROPDMG * data2$CROPDMGEXP

We join the two cost damages (PROPDMG and CROPDMG) to have a single metric of damage.

data2$damage <- data2$CROPDMG + data2$PROPDMG

Aggregation of the data

damages <- aggregate(damage ~ EVTYPE, data = data2, FUN = sum)

Results

We plotted only the top 10 events with the highest number of incidents

topevents <- head(events[order(events$incidents, decreasing = TRUE), ], 10)

p <- qplot(factor(EVTYPE), data = topevents, geom = "bar", fill = type, weight = incidents, 
    position = "dodge", xlab = "Disaster type", ylab = "Individuals", main = "Top 10 events with the greatest health impact (fatalities and injuries)")
p + theme(axis.text.x = element_text(angle = -90), plot.title = element_text(face = "bold"))

plot of chunk unnamed-chunk-11

Figure 1(caption). Top ten disaster events with highest health impact divided in a fatal or injury outcome.

We plot the top 20 events with greatest economic consequences

topdamages <- head(damages[order(damages$damage, decreasing = TRUE), ], 20)
topdamages$damage <- topdamages$damage/1e+06
q <- qplot(x = EVTYPE, y = damage, data = topdamages, geom = "bar", stat = "identity", 
    xlab = "Disaster type", ylab = "damage Millons Dllrs", main = "Top 20 events with the greatest economic impact")
q + theme(axis.text.x = element_text(angle = -90), plot.title = element_text(face = "bold"))

plot of chunk unnamed-chunk-12

Figure 2(caption). Top twenty disaster events with highest economic impact.