Analysis of U.S. Storm Event Data and the Impact on Population Health and the Economy

Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This report contains the results of an analysis where the goal was to identify the most hazardous weather events with respect to population health and those with the greatest economic impact in the U.S. based on data collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA).

The storm database includes weather events from 1950 through the year 2011 and contains data estimates such as the number fatalities and injuries for each weather event as well as economic cost damage to properties and crops for each weather event.

The estimates for fatalities and injuries were used to determine weather events with the most harmful impact to population health. Property damage and crop damage cost estimates were used to determine weather events with the greatest economic consequences.

Environment Setup

Load packages used for this project

if (!require(ggplot2)) { install.packages(“ggplot2”) library(ggplot2) }

if (!require(dplyr)) { install.packages(“dplyr”) library(dplyr, warn.conflicts = FALSE) }

if (!require(xtable)) { install.packages(“xtable”) library(xtable, warn.conflicts = FALSE) }

sessionInfo()

Load Data

stormData <- read.csv(“repdata_data_StormData.csv.bz2”, sep = “,”, header = TRUE)

names(stormData) str(stormData) head(stormData)

Data Processing

Create subset of data

stormDataTidy <- subset(stormData, EVTYPE != “?” & (FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0), select = c(“EVTYPE”, “FATALITIES”, “INJURIES”, “PROPDMG”, “PROPDMGEXP”, “CROPDMG”, “CROPDMGEXP”, “BGN_DATE”, “END_DATE”, “STATE”)) dim(stormDataTidy)

sum(is.na(stormDataTidy))

Clean Event Type Data

length(unique(stormDataTidy$EVTYPE))

stormDataTidy\(EVTYPE <- toupper(stormDataTidy\)EVTYPE)

AVALANCHE

stormDataTidy\(EVTYPE <- gsub('.*AVALANCE.*', 'AVALANCHE', stormDataTidy\)EVTYPE)

BLIZZARD

stormDataTidy\(EVTYPE <- gsub('.*BLIZZARD.*', 'BLIZZARD', stormDataTidy\)EVTYPE)

CLOUD

stormDataTidy\(EVTYPE <- gsub('.*CLOUD.*', 'CLOUD', stormDataTidy\)EVTYPE)

COLD

stormDataTidy\(EVTYPE <- gsub('.*COLD.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*FREEZ.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*FROST.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*ICE.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LOW TEMPERATURE RECORD.*', 'COLD', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LO.*TEMP.*', 'COLD', stormDataTidy\)EVTYPE)

DRY

stormDataTidy\(EVTYPE <- gsub('.*DRY.*', 'DRY', stormDataTidy\)EVTYPE)

DUST

stormDataTidy\(EVTYPE <- gsub('.*DUST.*', 'DUST', stormDataTidy\)EVTYPE)

FIRE

stormDataTidy\(EVTYPE <- gsub('.*FIRE.*', 'FIRE', stormDataTidy\)EVTYPE)

FLOOD

stormDataTidy\(EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', stormDataTidy\)EVTYPE)

FOG

stormDataTidy\(EVTYPE <- gsub('.*FOG.*', 'FOG', stormDataTidy\)EVTYPE)

HAIL

stormDataTidy\(EVTYPE <- gsub('.*HAIL.*', 'HAIL', stormDataTidy\)EVTYPE)

HEAT

stormDataTidy\(EVTYPE <- gsub('.*HEAT.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WARM.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*HIGH.*TEMP.*', 'HEAT', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*RECORD HIGH TEMPERATURES.*', 'HEAT', stormDataTidy\)EVTYPE)

HYPOTHERMIA/EXPOSURE

stormDataTidy\(EVTYPE <- gsub('.*HYPOTHERMIA.*', 'HYPOTHERMIA/EXPOSURE', stormDataTidy\)EVTYPE)

LANDSLIDE

stormDataTidy\(EVTYPE <- gsub('.*LANDSLIDE.*', 'LANDSLIDE', stormDataTidy\)EVTYPE)

LIGHTNING

stormDataTidy\(EVTYPE <- gsub('^LIGHTNING.*', 'LIGHTNING', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('^LIGNTNING.*', 'LIGHTNING', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('^LIGHTING.*', 'LIGHTNING', stormDataTidy\)EVTYPE)

MICROBURST

stormDataTidy\(EVTYPE <- gsub('.*MICROBURST.*', 'MICROBURST', stormDataTidy\)EVTYPE)

MUDSLIDE

stormDataTidy\(EVTYPE <- gsub('.*MUDSLIDE.*', 'MUDSLIDE', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*MUD SLIDE.*', 'MUDSLIDE', stormDataTidy\)EVTYPE)

RAIN

stormDataTidy\(EVTYPE <- gsub('.*RAIN.*', 'RAIN', stormDataTidy\)EVTYPE)

RIP CURRENT

stormDataTidy\(EVTYPE <- gsub('.*RIP CURRENT.*', 'RIP CURRENT', stormDataTidy\)EVTYPE)

STORM

stormDataTidy\(EVTYPE <- gsub('.*STORM.*', 'STORM', stormDataTidy\)EVTYPE)

SUMMARY

stormDataTidy\(EVTYPE <- gsub('.*SUMMARY.*', 'SUMMARY', stormDataTidy\)EVTYPE)

TORNADO

stormDataTidy\(EVTYPE <- gsub('.*TORNADO.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*TORNDAO.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*LANDSPOUT.*', 'TORNADO', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WATERSPOUT.*', 'TORNADO', stormDataTidy\)EVTYPE)

SURF

stormDataTidy\(EVTYPE <- gsub('.*SURF.*', 'SURF', stormDataTidy\)EVTYPE)

VOLCANIC

stormDataTidy\(EVTYPE <- gsub('.*VOLCANIC.*', 'VOLCANIC', stormDataTidy\)EVTYPE)

WET

stormDataTidy\(EVTYPE <- gsub('.*WET.*', 'WET', stormDataTidy\)EVTYPE)

WIND

stormDataTidy\(EVTYPE <- gsub('.*WIND.*', 'WIND', stormDataTidy\)EVTYPE)

WINTER

stormDataTidy\(EVTYPE <- gsub('.*WINTER.*', 'WINTER', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*WINTRY.*', 'WINTER', stormDataTidy\)EVTYPE) stormDataTidy\(EVTYPE <- gsub('.*SNOW.*', 'WINTER', stormDataTidy\)EVTYPE)

Clean Economic Data

table(toupper(stormDataTidy$PROPDMGEXP))

table(toupper(stormDataTidy$CROPDMGEXP))

Funtion to get multiplier factor

getMultiplier <- function(exp) { exp <- toupper(exp); if (exp == ““) return (10^0); if (exp ==”-“) return (10^0); if (exp ==”?“) return (10^0); if (exp ==”+“) return (10^0); if (exp ==”0”) return (10^0); if (exp == “1”) return (10^1); if (exp == “2”) return (10^2); if (exp == “3”) return (10^3); if (exp == “4”) return (10^4); if (exp == “5”) return (10^5); if (exp == “6”) return (10^6); if (exp == “7”) return (10^7); if (exp == “8”) return (10^8); if (exp == “9”) return (10^9); if (exp == “H”) return (10^2); if (exp == “K”) return (10^3); if (exp == “M”) return (10^6); if (exp == “B”) return (10^9); return (NA); }

calculate property damage and crop damage costs

stormDataTidy\(PROP_COST <- with(stormDataTidy, as.numeric(PROPDMG) * sapply(PROPDMGEXP, getMultiplier))/10^9 stormDataTidy\)CROP_COST <- with(stormDataTidy, as.numeric(CROPDMG) * sapply(CROPDMGEXP, getMultiplier))/10^9

Summarize Data

healthImpactData <- aggregate(x = list(HEALTH_IMPACT = stormDataTidy\(FATALITIES + stormDataTidy\)INJURIES), by = list(EVENT_TYPE = stormDataTidy$EVTYPE), FUN = sum, na.rm = TRUE)

healthImpactData <- healthImpactData[order(healthImpactData$HEALTH_IMPACT, decreasing = TRUE),]

damageCostImpactData <- aggregate(x = list(DAMAGE_IMPACT = stormDataTidy\(PROP_COST + stormDataTidy\)CROP_COST), by = list(EVENT_TYPE = stormDataTidy\(EVTYPE), FUN = sum, na.rm = TRUE) damageCostImpactData <- damageCostImpactData[order(damageCostImpactData\)DAMAGE_IMPACT, decreasing = TRUE),]

Results

Event Types Most Harmful to Population

print(xtable(head(healthImpactData, 10), caption = “Top 10 Weather Events Most Harmful to Population Health”), caption.placement = ‘top’, type = “html”, include.rownames = FALSE, html.table.attributes=‘class=“table-bordered”, width=“100%”’)

healthImpactChart <- ggplot(head(healthImpactData, 10), aes(x = reorder(EVENT_TYPE, HEALTH_IMPACT), y = HEALTH_IMPACT, fill = EVENT_TYPE)) + coord_flip() + geom_bar(stat = “identity”) + xlab(“Event Type”) + ylab(“Total Fatalities and Injures”) + theme(plot.title = element_text(size = 14, hjust = 0.5)) + ggtitle(“Top 10 Weather Events Most Harmful toHealth”) print(healthImpactChart)

Event Types with Greatest Economic Consequences

print(xtable(head(damageCostImpactData, 10), caption = “Top 10 Weather Events with Greatest Economic Consequences”), caption.placement = ‘top’, type = “html”, include.rownames = FALSE, html.table.attributes=‘class=“table-bordered”, width=“100%”’)

damageCostImpactChart <- ggplot(head(damageCostImpactData, 10), aes(x = reorder(EVENT_TYPE, DAMAGE_IMPACT), y = DAMAGE_IMPACT, fill = EVENT_TYPE)) + coord_flip() + geom_bar(stat = “identity”) + xlab(“Event Type”) + ylab(“Total Property / Crop Damage Cost(in Billions)”) + theme(plot.title = element_text(size = 14, hjust = 0.5)) + ggtitle(“Top 10 Weather Events withEconomic Consequences”) print(damageCostImpactChart)

Conclusion

Which types of weather events are most harmful to population health?

Tornados

Which types of weather events have the greatest economic consequences?

Floods