Synopsis

The National Weather Service provides data for sevear and abnormal weather events dating back to the 1950’s. Using this datasest we were able to identify the top 19 weather events in terms of the total number of events, fatalities, injuries, product damage, and crop damage. These 19 events types were classified as one of four groups; winter, wind, flood related, and other event types. The impact of individual events in these four groups since 1995 in terms of fatalities, injuries, product damage, and crop damage was similar. However, trend in the total impact by event type for each of the groups is different by type of impact and group. The differences in impact over time for the different event groups should be considered when developing stategies to mitigate the impact of the high impact events.

Data Processing

Loading the required packages

library(ggplot2)
library(reshape2)
library(plyr)
library(stringr)

Loading the source data

if(!file.exists("StormData.csv.bz2")){
    download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "StormData.csv.bz2",method = "curl")
}
storm_data <- read.csv("StormData.csv.bz2")
storm_data$Date <- as.Date(storm_data$BGN_DATE,format = "%m/%d/%Y")

Identifying the highest impact event types

ev_summary <- ddply(storm_data, .(EVTYPE), summarize, 
                    FATALITIES = sum(FATALITIES),
                    INJURIES = sum(INJURIES),
                    PROPDMG = sum(PROPDMG),
                    CROPDMG = sum(CROPDMG),
                    number = length(EVTYPE))
ev_melt <- melt(ev_summary)
## Using EVTYPE as id variables
cutoffs <- ddply(ev_melt,.(variable), summarize, quant95 = quantile(value,probs = 0.95))
ev_cutoff <- join(ev_melt, cutoffs)
## Joining by: variable
ev_cutoff <- ev_cutoff[ev_cutoff$value > ev_cutoff$quant95,]
ev_cutoff$EVTYPE <- factor(ev_cutoff$EVTYPE)
ev_cutoff_counts <- ddply(ev_cutoff, .(EVTYPE), summarize, count = length(variable))
events <- as.character(ev_cutoff_counts$EVTYPE[ev_cutoff_counts$count == 5])

Filtering dataset for high impact events since 1995

data_top20 <- storm_data[storm_data$EVTYPE %in% events,]
data_top20_filt <- data_top20[data_top20$Date > as.Date("1/1/1995",format = "%m/%d/%Y"),]

Grouping events

wind <- c(grep("WIND",events,value=T), "TORNADO", "TROPICAL STORM")
winter <- c("HEAVY SNOW","ICE STORM","EXTREME COLD","HAIL","WINTER STORM")
flood <- c("FLOOD","FLASH FLOOD","URBAN/SML STREAM FLD")
others <- events[!(events %in% c(wind,winter,flood))]
data_top20_filt$group[data_top20_filt$EVTYPE %in% flood] <- "flood"
data_top20_filt$group[data_top20_filt$EVTYPE %in% wind] <- "wind"
data_top20_filt$group[data_top20_filt$EVTYPE %in% winter] <- "winter"
data_top20_filt$group[data_top20_filt$EVTYPE %in% others] <- "others"

Preparing dataframe for figures

top20m <- melt(subset(data_top20_filt, select = c(EVTYPE,Date,FATALITIES,INJURIES,PROPDMG,CROPDMG,group)), id.vars = c("EVTYPE", "group","Date"))
top20m_total <- ddply(top20m, .(group, Date, variable), summarize, total_value = sum(value))
top20m_total$total_value <- top20m_total$total_value + 0.001

Results

There are a total of 985 different types of events. We will focus on the events types that were in the 95% in crop and propperty damage caused and fatalities and injuries, as well as total number of events.

19 events were in the 95% for all categories, note THUNDERSTORM WIND and THUNDERSTORM WINDS were two categories.

print(events)
##  [1] "DUST STORM"           "EXTREME COLD"         "FLASH FLOOD"         
##  [4] "FLOOD"                "HAIL"                 "HEAVY RAIN"          
##  [7] "HEAVY SNOW"           "HIGH WIND"            "HIGH WINDS"          
## [10] "ICE STORM"            "LIGHTNING"            "STRONG WIND"         
## [13] "THUNDERSTORM WIND"    "THUNDERSTORM WINDS"   "TORNADO"             
## [16] "TROPICAL STORM"       "TSTM WIND"            "URBAN/SML STREAM FLD"
## [19] "WILDFIRE"             "WINTER STORM"

Impact of events by group since 1995

ggplot(top20m) + geom_boxplot(aes(x = group, y = value)) +
    facet_wrap(~variable, scale = "free_y") +
    scale_y_log10() +
    theme_bw() +
    labs(x = "Event Type Group", y = "Impact For Individual Events")
## Warning: Removed 621215 rows containing non-finite values (stat_boxplot).
## Warning: Removed 615720 rows containing non-finite values (stat_boxplot).
## Warning: Removed 429944 rows containing non-finite values (stat_boxplot).
## Warning: Removed 605562 rows containing non-finite values (stat_boxplot).

plot of chunk unnamed-chunk-8

Impact of event type groups since 1995 overtime.

ggplot(top20m_total) + geom_smooth(aes(x = Date, y = total_value, color = group), method = "loess") +
    facet_wrap(~variable, scale = "free_y") +
    scale_y_log10() +
    theme_bw() +
    labs(y = "Total Impact per Event Type Each Day", color = "Event Type Group")

plot of chunk unnamed-chunk-9