The National Weather Service provides data for sevear and abnormal weather events dating back to the 1950’s. Using this datasest we were able to identify the top 19 weather events in terms of the total number of events, fatalities, injuries, product damage, and crop damage. These 19 events types were classified as one of four groups; winter, wind, flood related, and other event types. The impact of individual events in these four groups since 1995 in terms of fatalities, injuries, product damage, and crop damage was similar. However, trend in the total impact by event type for each of the groups is different by type of impact and group. The differences in impact over time for the different event groups should be considered when developing stategies to mitigate the impact of the high impact events.
Loading the required packages
library(ggplot2)
library(reshape2)
library(plyr)
library(stringr)
Loading the source data
if(!file.exists("StormData.csv.bz2")){
download.file(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",destfile = "StormData.csv.bz2",method = "curl")
}
storm_data <- read.csv("StormData.csv.bz2")
storm_data$Date <- as.Date(storm_data$BGN_DATE,format = "%m/%d/%Y")
Identifying the highest impact event types
ev_summary <- ddply(storm_data, .(EVTYPE), summarize,
FATALITIES = sum(FATALITIES),
INJURIES = sum(INJURIES),
PROPDMG = sum(PROPDMG),
CROPDMG = sum(CROPDMG),
number = length(EVTYPE))
ev_melt <- melt(ev_summary)
## Using EVTYPE as id variables
cutoffs <- ddply(ev_melt,.(variable), summarize, quant95 = quantile(value,probs = 0.95))
ev_cutoff <- join(ev_melt, cutoffs)
## Joining by: variable
ev_cutoff <- ev_cutoff[ev_cutoff$value > ev_cutoff$quant95,]
ev_cutoff$EVTYPE <- factor(ev_cutoff$EVTYPE)
ev_cutoff_counts <- ddply(ev_cutoff, .(EVTYPE), summarize, count = length(variable))
events <- as.character(ev_cutoff_counts$EVTYPE[ev_cutoff_counts$count == 5])
Filtering dataset for high impact events since 1995
data_top20 <- storm_data[storm_data$EVTYPE %in% events,]
data_top20_filt <- data_top20[data_top20$Date > as.Date("1/1/1995",format = "%m/%d/%Y"),]
Grouping events
wind <- c(grep("WIND",events,value=T), "TORNADO", "TROPICAL STORM")
winter <- c("HEAVY SNOW","ICE STORM","EXTREME COLD","HAIL","WINTER STORM")
flood <- c("FLOOD","FLASH FLOOD","URBAN/SML STREAM FLD")
others <- events[!(events %in% c(wind,winter,flood))]
data_top20_filt$group[data_top20_filt$EVTYPE %in% flood] <- "flood"
data_top20_filt$group[data_top20_filt$EVTYPE %in% wind] <- "wind"
data_top20_filt$group[data_top20_filt$EVTYPE %in% winter] <- "winter"
data_top20_filt$group[data_top20_filt$EVTYPE %in% others] <- "others"
Preparing dataframe for figures
top20m <- melt(subset(data_top20_filt, select = c(EVTYPE,Date,FATALITIES,INJURIES,PROPDMG,CROPDMG,group)), id.vars = c("EVTYPE", "group","Date"))
top20m_total <- ddply(top20m, .(group, Date, variable), summarize, total_value = sum(value))
top20m_total$total_value <- top20m_total$total_value + 0.001
There are a total of 985 different types of events. We will focus on the events types that were in the 95% in crop and propperty damage caused and fatalities and injuries, as well as total number of events.
19 events were in the 95% for all categories, note THUNDERSTORM WIND and THUNDERSTORM WINDS were two categories.
print(events)
## [1] "DUST STORM" "EXTREME COLD" "FLASH FLOOD"
## [4] "FLOOD" "HAIL" "HEAVY RAIN"
## [7] "HEAVY SNOW" "HIGH WIND" "HIGH WINDS"
## [10] "ICE STORM" "LIGHTNING" "STRONG WIND"
## [13] "THUNDERSTORM WIND" "THUNDERSTORM WINDS" "TORNADO"
## [16] "TROPICAL STORM" "TSTM WIND" "URBAN/SML STREAM FLD"
## [19] "WILDFIRE" "WINTER STORM"
Impact of events by group since 1995
ggplot(top20m) + geom_boxplot(aes(x = group, y = value)) +
facet_wrap(~variable, scale = "free_y") +
scale_y_log10() +
theme_bw() +
labs(x = "Event Type Group", y = "Impact For Individual Events")
## Warning: Removed 621215 rows containing non-finite values (stat_boxplot).
## Warning: Removed 615720 rows containing non-finite values (stat_boxplot).
## Warning: Removed 429944 rows containing non-finite values (stat_boxplot).
## Warning: Removed 605562 rows containing non-finite values (stat_boxplot).
Impact of event type groups since 1995 overtime.
ggplot(top20m_total) + geom_smooth(aes(x = Date, y = total_value, color = group), method = "loess") +
facet_wrap(~variable, scale = "free_y") +
scale_y_log10() +
theme_bw() +
labs(y = "Total Impact per Event Type Each Day", color = "Event Type Group")