Tornado events account for the most damage to population health by creating the most deaths and the highest number of weather-related injuries. Heat, wind, flood and lightning events also cause the highest numbers of deaths and injuries. Only floods and hurricanes create more property and crop damages than tornados, likely because of the larger areas and longer timeframes these events cover.
# Get Data
if (!file.exists("StormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "c:/coursera/storm.bz2")
}
if (!file.exists("StormData.csv.bz2")) {
bunzip2("StormData.csv.bz2", "StormData.csv.bz2", remove = FALSE)
}
noaa <- read.csv(bzfile("StormData.csv.bz2"))
# Minimize data columns
col <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP" )
storm <- noaa[col]
# Tidy event types
storm$EVTYPE[grep("TORNADO", storm$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
storm$EVTYPE[grep("WIND", storm$EVTYPE, ignore.case = TRUE)] <- "WIND"
storm$EVTYPE[grep("FLOOD", storm$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
storm$EVTYPE[grep("ICE", storm$EVTYPE, ignore.case = TRUE)] <- "SNOW/ICE"
storm$EVTYPE[grep("SNOW", storm$EVTYPE, ignore.case = TRUE)] <- "SNOW/ICE"
storm$EVTYPE[grep("HEAT", storm$EVTYPE, ignore.case = TRUE)] <- "HEAT"
storm$EVTYPE[grep("wint", storm$EVTYPE, ignore.case = TRUE)] <- "COLD"
storm$EVTYPE[grep("freez", storm$EVTYPE, ignore.case = TRUE)] <- "COLD"
storm$EVTYPE[grep("hail", storm$EVTYPE, ignore.case = TRUE)] <- "HAIL"
storm$EVTYPE[grep("TST", storm$EVTYPE, ignore.case = TRUE)] <- "THUNDERSTORM"
storm$EVTYPE[grep("thund", storm$EVTYPE, ignore.case = TRUE)] <- "THUNDERSTORM"
storm$EVTYPE[grep("hurri", storm$EVTYPE, ignore.case = TRUE)] <- "HURRICANE"
storm$EVTYPE[grep("warm", storm$EVTYPE, ignore.case = TRUE)] <- "HEAT"
storm$EVTYPE[grep("HoT", storm$EVTYPE, ignore.case = TRUE)] <- "HEAT"
storm$EVTYPE[grep("thermia", storm$EVTYPE, ignore.case = TRUE)] <- "COLD"
storm$EVTYPE[grep("red flag", storm$EVTYPE, ignore.case = TRUE)] <- "WIND"
storm$EVTYPE[grep("WINTER", storm$EVTYPE, ignore.case = TRUE)] <- "SNOW/ICE"
storm$EVTYPE[grep("SPOUT", storm$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
storm$EVTYPE[grep("RAIN", storm$EVTYPE, ignore.case = TRUE)] <- "RAIN"
storm$EVTYPE[grep("DRY", storm$EVTYPE, ignore.case = TRUE)] <- "DROUGHT"
storm$EVTYPE[grep("DAM ", storm$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
storm$EVTYPE[grep("AVALANCE", storm$EVTYPE, ignore.case = TRUE)] <- "AVALANCE"
storm$EVTYPE[grep("rip ", storm$EVTYPE, ignore.case = TRUE)] <- "RIP CURRENT"
storm$EVTYPE[grep("cold", storm$EVTYPE, ignore.case = TRUE)] <- "COLD"
storm$EVTYPE[grep("preci", storm$EVTYPE, ignore.case = TRUE)] <- "RAIN"
storm$EVTYPE[grep("shower", storm$EVTYPE, ignore.case = TRUE)] <- "RAIN"
storm$EVTYPE[grep("frost", storm$EVTYPE, ignore.case = TRUE)] <- "COLD"
storm$EVTYPE[grep("tropical", storm$EVTYPE, ignore.case = TRUE)] <- "HURRICANE"
storm$EVTYPE[grep("ICy", storm$EVTYPE, ignore.case = TRUE)] <- "SNOW/ICE"
storm$EVTYPE[grep("FUNNEL", storm$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
# Tidy the $ Damage and Exponent data
storm$PROPEXP[storm$PROPDMGEXP == "?"] <- 0
storm$PROPEXP[storm$PROPDMGEXP == ""] <- 0
storm$PROPEXP[storm$PROPDMGEXP == "-"] <- 0
storm$PROPEXP[storm$PROPDMGEXP == "1"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "2"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "3"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "4"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "5"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "6"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "7"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "8"] <- 10
storm$PROPEXP[storm$PROPDMGEXP == "+"] <- 1
storm$PROPEXP[storm$PROPDMGEXP == "h"] <- 100
storm$PROPEXP[storm$PROPDMGEXP == "H"] <- 100
storm$PROPEXP[storm$PROPDMGEXP == "K"] <- 1000
storm$PROPEXP[storm$PROPDMGEXP == "k"] <- 1000
storm$PROPEXP[storm$PROPDMGEXP == "M"] <- 1000000
storm$PROPEXP[storm$PROPDMGEXP == "m"] <- 1000000
storm$PROPEXP[storm$PROPDMGEXP == "b"] <- 1000000000
storm$PROPEXP[storm$PROPDMGEXP == "B"] <- 1000000000
# tidy crop data
storm$CROPEXP[storm$CROPDMGEXP == "2"] <- 10
storm$CROPEXP[storm$CROPDMGEXP == "0"] <- 10
storm$CROPEXP[storm$CROPDMGEXP == "K"] <- 1000
storm$CROPEXP[storm$CROPDMGEXP == "k"] <- 1000
storm$CROPEXP[storm$CROPDMGEXP == "M"] <- 1000000
storm$CROPEXP[storm$CROPDMGEXP == "m"] <- 1000000
storm$CROPEXP[storm$CROPDMGEXP == "B"] <- 1000000000
storm$CROPEXP[storm$CROPDMGEXP == "?"] <- 0
storm$CROPEXP[storm$CROPDMGEXP == ""] <- 0
# prep damage
storm$PROPEXP <- as.numeric(storm$PROPEXP)
storm$CROPEXP <- as.numeric(storm$CROPEXP)
storm$propDamage <- storm$PROPDMG * storm$PROPEXP
storm$CropDamage <- storm$CROPDMG * storm$CROPEXP
# since Crops are property, I've added CropDamage to propDamage
storm$propDollars <- storm$propDamage + storm$CropDamage
# Across the United States, which types of events
# (as indicated in the EVTYPE variable) are most harmful
# with respect to population health?
# "health" will be defined as fatality and injuries
# prep deaths
fatal <- aggregate(FATALITIES ~ EVTYPE, storm, FUN = sum)
deaths<-fatal[order(fatal$FATALITIES, decreasing=TRUE)[1:5],]
deaths
## EVTYPE FATALITIES
## 211 TORNADO 5664
## 54 HEAT 3178
## 43 FLOOD 1525
## 244 WIND 1426
## 80 LIGHTNING 816
#plot(deaths)
library(ggplot2)
g <- ggplot(deaths, aes(EVTYPE, FATALITIES))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90))
g <- g + labs(x = "Event Type")
g <- g + labs(y = "Fatalities")
g <- g + labs(title = "Top Weather-Related Deaths by Event Types")
g
# prep injuries
ouch <- aggregate(INJURIES ~ EVTYPE, storm, FUN = sum)
injured <- ouch[order(ouch$INJURIES, decreasing=TRUE)[1:5],]
# plot injuries
g <- ggplot(injured, aes(EVTYPE, INJURIES))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90))
g <- g + labs(x = "Event Type")
g <- g + labs(y = "# of Injuries")
g <- g + labs(title = "Top Weather-Related Injuries by Event Types")
g
# Across the United States, which types of events have
# the greatest economic consequences?
# prep damages
money <- aggregate(propDollars ~ EVTYPE, storm, FUN = sum)
damage <- money[order(money$propDollars, decreasing=TRUE)[1:10],]
# plot damages
g <- ggplot(damage, aes(EVTYPE, propDollars))
g <- g + geom_bar(stat="identity")
g <- g + theme(axis.text.x = element_text(angle = 90))
g <- g + labs(x = "Event Type")
g <- g + labs(y = "Damage in Dollars")
g <- g + labs(title = "Top Weather-Related Damages by Event Types in US Dollars")
g