This data anaysis answers the following quesions.
Across the United States, which types of events are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
The top five storm types are mined from NOAA storm data with highest casualites. Casualties are either fatalites or injures. And the top five storm types are mined from NOAA storm data with highest economic damage. The economic damage is measured by property and crop damage.
library(dplyr)
library(lattice)
Data is from the NOAA Storm Database starting in 1950 to 2011.
stormdata <- read.csv(bzfile("repdata-data-StormData.csv.bz2", open = "r"))
# wrap data frame into a data table
stormdata <- tbl_df(stormdata)
names(stormdata) <- tolower(names(stormdata))
The following assumptions were made for the propdmgexp and cropdmgexp variables.
| Symbol | Exponent (10^x) |
|---|---|
| ? or - or + or ? or 0 | 0 |
| 1 | 1 |
| 2 or h or H | 2 |
| 3 or k or K | 3 |
| 4 | 4 |
| 5 | 5 |
| m or M or 6 | 6 |
| 7 | 7 |
| m or M | 8 |
| b or B | 9 |
# convert property damage exponents
stormdata$propdmgexp <- gsub("^$|-|\\+|\\?|0", 1E0, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[1]", 1E1, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[2|hH]", 1E2, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[3|kK]", 1E3, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[4]", 1E4, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[5]", 1E5, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[mM]|6", 1E6, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[7]", 1E7, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[8]", 1E8, stormdata$propdmgexp)
stormdata$propdmgexp <- gsub("[bB]", 1E9, stormdata$propdmgexp)
# apply exponents to damage estimates
stormdata$propdmg <- stormdata$propdmg * as.numeric(stormdata$propdmgexp)
# convert crop damage exponents
stormdata$cropdmgexp <- gsub("^$|-|\\+|\\?|0", 1E0, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[1]", 1E1, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[2|hH]", 1E2, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[3|kK]", 1E3, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[4]", 1E4, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[5]", 1E5, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[mM]|6", 1E6, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[7]", 1E7, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[8]", 1E8, stormdata$cropdmgexp)
stormdata$cropdmgexp <- gsub("[bB]", 1E9, stormdata$cropdmgexp)
# apply exponents to damage estimates
stormdata$cropdmg <- stormdata$cropdmg * as.numeric(stormdata$cropdmgexp)
A casualty is defined as a fatality or injury for this analysis.
total_casualties_by_evtype <-
stormdata %>%
group_by(evtype) %>%
summarise(total_casualties = sum(fatalities) + sum(injuries),
total_fatalities = sum(fatalities),
total_injuries = sum(injuries)) %>%
arrange(desc(total_casualties))
# get top five events with the most casualties
top5_casualties_by_evtype <- total_casualties_by_evtype[1:5,]
barchart(data = top5_casualties_by_evtype,
total_fatalities + total_injuries ~ evtype,
stack=T,
xlab = "Storm Event Type",
ylab = "Total Casualites (fatalities + injuries)",
main = "Top Five Human Casualties by Storm Event Type",
auto.key = list(text=c("fatalities","injuries")))
The top five storm events that have been the most harmful to humans.
total_damage_by_evtype <-
stormdata %>%
group_by(evtype) %>%
summarise(total_dmg = sum(propdmg) + sum(cropdmg),
total_prop_dmg = sum(propdmg),
total_crop_dmg = sum(cropdmg)) %>%
arrange(desc(total_dmg))
# get top five events with the most casualties
total_damage_by_evtype <- total_damage_by_evtype[1:5,]
barchart(data = total_damage_by_evtype,
total_prop_dmg + total_crop_dmg ~ evtype,
stack=T,
xlab = "Storm Event Type",
ylab = "Total Damage (property + crop)",
main = "Top Five Storm Types with Greatest Economic Impact",
auto.key = list(text=c("property damage","crop damage")))
The top five storm events that have had the greatest