This report analyzes the NOAA STORM DATABASE (1950–2011) to identify which event types are most harmful to population health and which have the greatest economic consequences.
Population health impact is measured using total fatalities and injuries.
Economic impact is measured using combined property and crop damage after converting damage exponents into numeric multipliers.
Results are summarized by event type and shown in a small number of figures and tables to support prioritization decisions for emergency preparedness.
The raw storm data file is read directly from the compressed csv (.bz2).
Only variables needed for event type, health outcomes and damages are kept.
Property and crop damages are converted to dollars using exponent codes(h, k, m, b) and then summed.
#packages
library(dplyr)
library(ggplot2)
library(readr)
#raw data
fn <- "repdata-data-StormData.csv.bz2"
d <- read_csv(fn, show_col_types = FALSE)
dim(d)
## [1] 902297 37
names(d)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
#keep only needed columns & standardize event names
d2 <- d %>%
transmute(
e = toupper(trimws(EVTYPE)),
fat = FATALITIES,
inj = INJURIES,
pd = PROPDMG,
pexp = toupper(trimws(PROPDMGEXP)),
cd = CROPDMG,
cexp = toupper(trimws(CROPDMGEXP))
)
#map exponents to multipliers
xmul <- function(x) {
case_when(
x == "H" ~ 1e2,
x == "K" ~ 1e3,
x == "M" ~ 1e6,
x == "B" ~ 1e9,
x == "" ~ 1,
TRUE ~ NA_real_
)
}
#compute total damages in dollars
d3 <- d2 %>%
mutate(
pm = xmul(pexp),
cm = xmul(cexp),
dmg = pd * pm + cd * cm
)
#summarize health impact
h <- d3 %>%
group_by(e) %>%
summarise(
fat = sum(fat, na.rm = TRUE),
inj = sum(inj, na.rm = TRUE),
tot = fat + inj,
.groups = "drop"
) %>%
arrange(desc(tot))
#top 10 health impacts
h10 <- h[1:10, ]
ggplot(h10, aes(x = reorder(e, tot), y = tot)) +
geom_col() +
coord_flip() +
labs(
x = "Event type",
y = "Total fatalities + Injuries",
caption = "figure 1: Top 10 event types by total fatalities and injuries (NOAA STORM DATABASE, 1950–2011)."
)
#summarize economic impact (property + crop)
eco <- d3 %>%
filter(!is.na(dmg)) %>%
group_by(e) %>%
summarise(
dmg = sum(dmg, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(dmg))
#top 10 (in billions)
eco10 <- eco[1:10, ] %>%
mutate(bil = dmg / 1e9)
eco10
## # A tibble: 10 × 3
## e dmg bil
## <chr> <dbl> <dbl>
## 1 FLOOD 138007444500 138.
## 2 HURRICANE/TYPHOON 29348167800 29.3
## 3 TORNADO 16520148150 16.5
## 4 HURRICANE 12405268000 12.4
## 5 RIVER FLOOD 10108369000 10.1
## 6 HAIL 10020591590 10.0
## 7 FLASH FLOOD 8715295130 8.72
## 8 ICE STORM 5925147300 5.93
## 9 STORM SURGE/TIDE 4641493000 4.64
## 10 THUNDERSTORM WIND 3813647990 3.81
#top 10 economic impacts
ggplot(eco10, aes(x = reorder(e, bil), y = bil)) +
geom_col() +
coord_flip() +
labs(
x = "Event type",
y = "Total damage(usd, billions)",
caption = "figure 2: Top 10 event types by combined property and crop damage (NOAA STORM DATABASE, 1950–2011)."
)