This report contains the analysis of the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database 1 and tries to answer following two questions:
The data is in the form of a comma-separated-value file compressed via the bzip2. Following fields are found out to be important fields to answer above questions:
# select only required columns
rcols <- c('STATE', 'EVTYPE', 'FATALITIES', 'INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')
storm.data = read.csv(bzfile('repdata-data-StormData.csv.bz2'), header=T, stringsAsFactors=F)[, rcols]
# This function converts the character-exponents into numeric format
# and keeps numeric exponent as-is.
unit2num <- function(u) {
u <- switch(u,
'+' = {0}, '-' = {0}, '?' = {0},
'h' = {2}, 'H' = {2},
'k' = {3}, 'K' = {3},
'm' = {6}, 'M' = {6},
'b' = {9}, 'B' = {9},
u)
if(u == '' | u == ' ') 0
else as.integer(u)
}
# this function converts empty/space value to zero and any other
# strings to numeric value
sanitize_nums <- function(x) {
if(x == '' | x == ' ') 0
else as.numeric(x)
}
# calculate the actual property damage by multiplying PROPDMG with 10^PROPDMGEXP
storm.data$PROPDMG <- apply(data.frame(storm.data$PROPDMG, storm.data$PROPDMGEXP), 1, function(x) { sanitize_nums(x[1])*10^unit2num(x[2]) })
# calculate the actual property damage by multiplying CROPDMG with 10^CROPDMGEXP
storm.data$CROPDMG <- apply(data.frame(storm.data$CROPDMG, storm.data$CROPDMGEXP), 1, function(x) { sanitize_nums(x[1])*10^unit2num(x[2]) })
# map abbrevation to state name
state.data <- data.frame(state.abb, state.name)
Following code creates a map of the U.S states and the event which causes the most impact on public health with respect to injuries and death in particular states.
library(plyr)
library(maps)
library(mapproj)
library(RColorBrewer)
aggr <- aggregate(FATALITIES + INJURIES ~ EVTYPE + STATE, data = storm.data, sum)
names(aggr) <- c('EVTYPE', 'STATE', 'CASUALITIES')
# prepare colors
unique_causes <- unique(storm.data$EVTYPE)
set.seed(19619)
colors <- sample(colorRampPalette(c("red", "yellow", "green", "orange", "blue"))(n = length(unique_causes)))
# select the event which causes the most population health impact in each state
saggr <- ddply(aggr, 'STATE', function (X) { X$EVTYPE[X$CASUALITIES == max(X$CASUALITIES)][1] })
unique_causes <- unique(saggr$V1)
saggr$color <- sapply(saggr$V1, function(x) { colors[match(x, unique_causes)] })
# Get actual state name needed by map function
saggr$STATE_NAME <- state.data$state.name[match(saggr$STATE, state.data$state.abb)]
# use state map to display the storm information
map("state", col = saggr$color, region = saggr$STATE_NAME, fill = TRUE, lty = 0, resolution = 0, projection="polyconic")
map("state", col = "black", fill = FALSE, add = TRUE, lty = 1, lwd = 1, projection="polyconic")
legend("bottomleft", unique_causes, fill = colors, cex=.56, bty = 'n')
Following code calculates the event that causes the most damage to public health across entire USA.
usa_aggr <- aggregate(FATALITIES + INJURIES ~ EVTYPE, data = storm.data, sum)
names(usa_aggr) <- c('EVTYPE', 'CASUALITIES')
max_casualties <- max(usa_aggr$CASUALITIES)
ph_event <- usa_aggr$EVTYPE[usa_aggr$CASUALITIES == max_casualties]
From above data, TORNADO causes most damage to public health.
Following code creates a map of the U.S states and the event which causes the most impact with respect to propery damage and damage to crops in particular state.
library(plyr)
library(maps)
library(mapproj)
library(RColorBrewer)
aggr <- aggregate(CROPDMG + PROPDMG ~ EVTYPE + STATE, data = storm.data, sum)
names(aggr) <- c('EVTYPE', 'STATE', 'ECON_IMPACT')
# prepare colors
unique_causes <- unique(storm.data$EVTYPE)
set.seed(8080)
colors <- sample(colorRampPalette(c("red", "yellow", "green", "orange", "blue"))(n = length(unique_causes)))
# select the event which causes the most economic impact in each state
saggr <- ddply(aggr, 'STATE', function (X) { X$EVTYPE[X$ECON_IMPACT == max(X$ECON_IMPACT)][1] })
unique_causes <- unique(saggr$V1)
# map color to the event
saggr$color <- sapply(saggr$V1, function(x) { colors[match(x, unique_causes)] })
# Get actual state name needed by map function
saggr$STATE_NAME <- state.data$state.name[match(saggr$STATE, state.data$state.abb)]
# use state map to display the storm information
map("state", col = saggr$color, region = saggr$STATE_NAME, fill = TRUE, lty = 0, resolution = 0, projection="polyconic")
map("state", col = "black", fill = FALSE, add = TRUE, lty = 1, lwd = 1, projection="polyconic")
legend("bottomleft", unique_causes, fill = colors, cex=.56, bty = 'n')
Following code calculates the event that causes the most damage to property and crops across USA.
usa_aggr <- aggregate(CROPDMG + PROPDMG ~ EVTYPE, data = storm.data, sum)
names(usa_aggr) <- c('EVTYPE', 'ECON_IMPACT')
max_econ_impact <- max(usa_aggr$ECON_IMPACT)
econ_event <- usa_aggr$EVTYPE[usa_aggr$ECON_IMPACT == max_econ_impact]
From above data, FLOOD causes most damage to economy.
From the above data:
TORNADO causes most harm to population health, accountable for almost 96979 deaths or injuries.FLOOD causes most harm to property and crops, accountable for almost $150319678257 worth of damage.1 U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database