The use of metrics for the economic and helth implications of major storms and weather events is crucial.In this study we analize data from the U.S. National Oceanic and Atmospheric Administration's (NOAA) to find the major types of disaster with the highest economic and health impact.
We first read the data inlcuded in the zip file. The data is in csv format
library(ggplot2)
library(stringdist)
## Loading required package: parallel
setInternet2(TRUE)
f <- tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
f)
data <- read.csv(bzfile(f))
We used the categories in NOAA directives (http://www.nws.noaa.gov/directives/.) to transform the 985 event types (EVTYPE) to a more comprehensible set of 48 categories
categories <- c("astronomical low tide", "avalanche", "blizzard", "coastal flood",
"cold/wind chill", "debris flow", "dense fog", "dense smoke", "drought",
"dust devil", "dust storm", "excessive heat", "extreme cold/wind chill",
"flash flood", "flood", "frost/freeze", "funnel cloud", "freezing fog",
"hail", "heat", "heavy rain", "heavy snow", "high surf", "high wind", "hurricane (typhoon)",
"ice storm", "lake-effect snow", "lakeshore flood", "lightning", "marine hail",
"marine high wind", "marine strong wind", "marine thunderstorm wind", "rip current",
"seiche", "sleet", "storm surge/tide", "strong wind", "thunderstorm wind",
"tornado", "tropical depression", "tropical storm", "tsunami", "volcanic ash",
"waterspout", "wildfire", "winter storm", "winter weather")
The types of disaster (EVTYPE) need to be cleaned. We subset only the top types of disaster where fatalities>100 or injuries>100
fatalities <- aggregate(FATALITIES ~ EVTYPE, data = data, FUN = sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = data, FUN = sum)
topfatal <- fatalities[fatalities$FATALITIES > 100, ]
# topfatal$FATALITIES
topinjuries <- injuries[injuries$INJURIES > 100, ]
topfatal <- topfatal[order(topfatal$FATALITIES, decreasing = TRUE), ]
topfatal$EVTYPE <- tolower(as.character(topfatal$EVTYPE))
topinjuries <- topinjuries[order(topinjuries$INJURIES, decreasing = TRUE), ]
topinjuries$EVTYPE <- tolower(as.character(topinjuries$EVTYPE))
We found three types of disaster without clear correspondence, we use the following values from NOAAA directives
topinjuries$EVTYPE[topinjuries$EVTYPE == "tstm wind"] <- "thunderstorm wind"
topinjuries$EVTYPE[topinjuries$EVTYPE == "fog"] <- "dense fog"
topinjuries$EVTYPE[topinjuries$EVTYPE == "glaze"] <- "ice storm"
types <- unique(c(topfatal$EVTYPE, topinjuries$EVTYPE))
Then we perform a substitution of the values of EVTYPE in stormdata with those sugested in NOAA directives. We used two different methods for string similiraty in order to have complementary results.
correspond <- data.frame(original = types, recommended = NA)
for (i in 1:length(types)) {
if (length(grep(types[i], categories)) == TRUE) {
correspond$recommended[i] <- grep(types[i], categories, perl = TRUE,
value = TRUE)
} else {
correspond$recommended[i] <- categories[amatch(types[i], categories,
maxDist = ceiling(nchar(types[i])/2))]
}
}
for (i in 1:nrow(topfatal)) {
topfatal$EVTYPE[i] <- correspond$recommended[which(topfatal$EVTYPE[i] ==
correspond$original)]
}
for (i in 1:nrow(topinjuries)) {
topinjuries$EVTYPE[i] <- correspond$recommended[which(topinjuries$EVTYPE[i] ==
correspond$original)]
}
After we aggregate EVTYPE to form our two final sets
aggfatalities <- aggregate(FATALITIES ~ EVTYPE, data = topfatal, FUN = sum)
aggfatalities$type <- "fatal"
agginjuries <- aggregate(INJURIES ~ EVTYPE, data = topinjuries, FUN = sum)
agginjuries$type <- "injury"
events <- merge(aggfatalities, agginjuries, all.x = T, all.y = T)
events[is.na(events)] <- 0
events$incidents <- events$FATALITIES + events$INJURIES
events <- events[, c(1, 2, 5)]
We transform all the values in PROPDMGEXP and CROPDMGEXP, if we are not sure about the meaning of the symbol we assume is a value of one in order to no perturb the original value in PROPDMG and CROPDMG
data2 <- data
data2$PROPDMGEXP <- as.character(data2$PROPDMGEXP)
data2$CROPDMGEXP <- as.character(data2$CROPDMGEXP)
data2$PROPDMGEXP[data2$PROPDMGEXP == "k" | data2$PROPDMGEXP == "K"] <- 1000
data2$PROPDMGEXP[data2$PROPDMGEXP == "m" | data2$PROPDMGEXP == "M"] <- 1e+06
data2$PROPDMGEXP[data2$PROPDMGEXP == "b" | data2$PROPDMGEXP == "B"] <- 1e+09
data2$PROPDMGEXP <- as.numeric(data2$PROPDMGEXP)
## Warning: NAs introduced by coercion
data2$PROPDMGEXP[is.na(data2$PROPDMGEXP) | data2$PROPDMGEXP == 0] <- 1
data2$CROPDMGEXP[data2$CROPDMGEXP == "k" | data2$CROPDMGEXP == "K"] <- 1000
data2$CROPDMGEXP[data2$CROPDMGEXP == "m" | data2$CROPDMGEXP == "M"] <- 1e+06
data2$CROPDMGEXP[data2$CROPDMGEXP == "b" | data2$CROPDMGEXP == "B"] <- 1e+09
data2$CROPDMGEXP <- as.numeric(data2$CROPDMGEXP)
## Warning: NAs introduced by coercion
data2$CROPDMGEXP[is.na(data2$CROPDMGEXP) | data2$CROPDMGEXP == 0] <- 1
We updated PROPDMG and CROPDMG multiplying by their corresponding PROPDMG and CROPDMG
data2$PROPDMG <- data2$PROPDMG * data2$PROPDMGEXP
data2$CROPDMG <- data2$CROPDMG * data2$CROPDMGEXP
We join the two cost damages (PROPDMG and CROPDMG) to have a single metric of damage.
data2$damage <- data2$CROPDMG + data2$PROPDMG
Aggregation of the data
damages <- aggregate(damage ~ EVTYPE, data = data2, FUN = sum)
topevents <- head(events[order(events$incidents, decreasing = TRUE), ], 10)
p <- qplot(factor(EVTYPE), data = topevents, geom = "bar", fill = type, weight = incidents,
position = "dodge", xlab = "Disaster type", ylab = "Individuals", main = "Top 10 events with the greatest health impact (fatalities and injuries)")
p + theme(axis.text.x = element_text(angle = -90), plot.title = element_text(face = "bold"))
Figure 1(caption). Top ten disaster events with highest health impact divided in a fatal or injury outcome.
topdamages <- head(damages[order(damages$damage, decreasing = TRUE), ], 20)
topdamages$damage <- topdamages$damage/1e+06
q <- qplot(x = EVTYPE, y = damage, data = topdamages, geom = "bar", stat = "identity",
xlab = "Disaster type", ylab = "damage Millons Dllrs", main = "Top 20 events with the greatest economic impact")
q + theme(axis.text.x = element_text(angle = -90), plot.title = element_text(face = "bold"))
Figure 2(caption). Top twenty disaster events with highest economic impact.
.