Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This report contains the results of an analysis where the goal was to identify the most hazardous weather events with respect to population health and those with the greatest economic impact in the U.S. based on data collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA).
The storm database includes weather events from 1950 through the year 2011 and contains data estimates such as the number fatalities and injuries for each weather event as well as economic cost damage to properties and crops for each weather event.
The estimates for fatalities and injuries were used to determine weather events with the most harmful impact to population health. Property damage and crop damage cost estimates were used to determine weather events with the greatest economic consequences.
Load packages used for this project
if (!require(ggplot2)) { install.packages(“ggplot2”) library(ggplot2) }
if (!require(dplyr)) { install.packages(“dplyr”) library(dplyr, warn.conflicts = FALSE) }
if (!require(xtable)) { install.packages(“xtable”) library(xtable, warn.conflicts = FALSE) }
sessionInfo()
stormData <- read.csv(“repdata_data_StormData.csv.bz2”, sep = “,”, header = TRUE)
names(stormData) str(stormData) head(stormData)
stormDataTidy <- subset(stormData, EVTYPE != “?” & (FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0), select = c(“EVTYPE”, “FATALITIES”, “INJURIES”, “PROPDMG”, “PROPDMGEXP”, “CROPDMG”, “CROPDMGEXP”, “BGN_DATE”, “END_DATE”, “STATE”)) dim(stormDataTidy)
sum(is.na(stormDataTidy))
length(unique(stormDataTidy$EVTYPE))
stormDataTidy\(EVTYPE <- toupper(stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*AVALANCE.*', 'AVALANCHE', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*BLIZZARD.*', 'BLIZZARD', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*CLOUD.*', 'CLOUD', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*COLD.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*FREEZ.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*FROST.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*ICE.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LOW TEMPERATURE RECORD.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LO.*TEMP.*', 'COLD', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*DRY.*', 'DRY', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*DUST.*', 'DUST', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*FIRE.*', 'FIRE', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*FOG.*', 'FOG', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*HAIL.*', 'HAIL', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*HEAT.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WARM.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*HIGH.*TEMP.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*RECORD HIGH TEMPERATURES.*', 'HEAT', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*HYPOTHERMIA.*', 'HYPOTHERMIA/EXPOSURE', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*LANDSLIDE.*', 'LANDSLIDE', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('^LIGHTNING.*', 'LIGHTNING', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('^LIGNTNING.*', 'LIGHTNING', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('^LIGHTING.*', 'LIGHTNING', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*MICROBURST.*', 'MICROBURST', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*MUDSLIDE.*', 'MUDSLIDE', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*MUD SLIDE.*', 'MUDSLIDE', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*RAIN.*', 'RAIN', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*RIP CURRENT.*', 'RIP CURRENT', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*STORM.*', 'STORM', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*SUMMARY.*', 'SUMMARY', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*TORNADO.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*TORNDAO.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LANDSPOUT.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WATERSPOUT.*', 'TORNADO', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*SURF.*', 'SURF', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*VOLCANIC.*', 'VOLCANIC', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*WET.*', 'WET', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*WIND.*', 'WIND', stormDataTidy\)EVTYPE)
stormDataTidy\(EVTYPE <- gsub('.*WINTER.*', 'WINTER', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WINTRY.*', 'WINTER', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*SNOW.*', 'WINTER', stormDataTidy\)EVTYPE)
table(toupper(stormDataTidy$PROPDMGEXP))
table(toupper(stormDataTidy$CROPDMGEXP))
getMultiplier <- function(exp) { exp <- toupper(exp); if (exp == ““) return (10^0); if (exp ==”-“) return (10^0); if (exp ==”?“) return (10^0); if (exp ==”+“) return (10^0); if (exp ==”0”) return (10^0); if (exp == “1”) return (10^1); if (exp == “2”) return (10^2); if (exp == “3”) return (10^3); if (exp == “4”) return (10^4); if (exp == “5”) return (10^5); if (exp == “6”) return (10^6); if (exp == “7”) return (10^7); if (exp == “8”) return (10^8); if (exp == “9”) return (10^9); if (exp == “H”) return (10^2); if (exp == “K”) return (10^3); if (exp == “M”) return (10^6); if (exp == “B”) return (10^9); return (NA); }
stormDataTidy\(PROP_COST <- with(stormDataTidy, as.numeric(PROPDMG) * sapply(PROPDMGEXP, getMultiplier))/10^9 stormDataTidy\)CROP_COST <- with(stormDataTidy, as.numeric(CROPDMG) * sapply(CROPDMGEXP, getMultiplier))/10^9
healthImpactData <- aggregate(x = list(HEALTH_IMPACT = stormDataTidy\(FATALITIES + stormDataTidy\)INJURIES), by = list(EVENT_TYPE = stormDataTidy$EVTYPE), FUN = sum, na.rm = TRUE)
healthImpactData <- healthImpactData[order(healthImpactData$HEALTH_IMPACT, decreasing = TRUE),]
damageCostImpactData <- aggregate(x = list(DAMAGE_IMPACT = stormDataTidy\(PROP_COST + stormDataTidy\)CROP_COST), by = list(EVENT_TYPE = stormDataTidy\(EVTYPE), FUN = sum, na.rm = TRUE) damageCostImpactData <- damageCostImpactData[order(damageCostImpactData\)DAMAGE_IMPACT, decreasing = TRUE),]
print(xtable(head(healthImpactData, 10), caption = “Top 10 Weather Events Most Harmful to Population Health”), caption.placement = ‘top’, type = “html”, include.rownames = FALSE, html.table.attributes=‘class=“table-bordered”, width=“100%”’)
healthImpactChart <- ggplot(head(healthImpactData, 10), aes(x = reorder(EVENT_TYPE, HEALTH_IMPACT), y = HEALTH_IMPACT, fill = EVENT_TYPE)) + coord_flip() + geom_bar(stat = “identity”) + xlab(“Event Type”) + ylab(“Total Fatalities and Injures”) + theme(plot.title = element_text(size = 14, hjust = 0.5)) + ggtitle(“Top 10 Weather Events Most Harmful toHealth”) print(healthImpactChart)
print(xtable(head(damageCostImpactData, 10), caption = “Top 10 Weather Events with Greatest Economic Consequences”), caption.placement = ‘top’, type = “html”, include.rownames = FALSE, html.table.attributes=‘class=“table-bordered”, width=“100%”’)
damageCostImpactChart <- ggplot(head(damageCostImpactData, 10), aes(x = reorder(EVENT_TYPE, DAMAGE_IMPACT), y = DAMAGE_IMPACT, fill = EVENT_TYPE)) + coord_flip() + geom_bar(stat = “identity”) + xlab(“Event Type”) + ylab(“Total Property / Crop Damage Cost(in Billions)”) + theme(plot.title = element_text(size = 14, hjust = 0.5)) + ggtitle(“Top 10 Weather Events withEconomic Consequences”) print(damageCostImpactChart)
Tornados
Floods