Synopsis

This report analyzes the NOAA STORM DATABASE (1950–2011) to identify which event types are most harmful to population health and which have the greatest economic consequences.

Population health impact is measured using total fatalities and injuries.

Economic impact is measured using combined property and crop damage after converting damage exponents into numeric multipliers.

Results are summarized by event type and shown in a small number of figures and tables to support prioritization decisions for emergency preparedness.

Data processing

The raw storm data file is read directly from the compressed csv (.bz2).

Only variables needed for event type, health outcomes and damages are kept.

Property and crop damages are converted to dollars using exponent codes(h, k, m, b) and then summed.

#packages
library(dplyr)
library(ggplot2)
library(readr)

#raw data
fn <- "repdata-data-StormData.csv.bz2"
d <- read_csv(fn, show_col_types = FALSE)

dim(d)
## [1] 902297     37
names(d)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
#keep only needed columns & standardize event names
d2 <- d %>%
  transmute(
    e = toupper(trimws(EVTYPE)),
    fat = FATALITIES,
    inj = INJURIES,
    pd = PROPDMG,
    pexp = toupper(trimws(PROPDMGEXP)),
    cd = CROPDMG,
    cexp = toupper(trimws(CROPDMGEXP))
  )

#map exponents to multipliers
xmul <- function(x) {
  case_when(
    x == "H" ~ 1e2,
    x == "K" ~ 1e3,
    x == "M" ~ 1e6,
    x == "B" ~ 1e9,
    x == "" ~ 1,
    TRUE ~ NA_real_
  )
}

#compute total damages in dollars
d3 <- d2 %>%
  mutate(
    pm = xmul(pexp),
    cm = xmul(cexp),
    dmg = pd * pm + cd * cm
  )

Results

#summarize health impact
h <- d3 %>%
  group_by(e) %>%
  summarise(
    fat = sum(fat, na.rm = TRUE),
    inj = sum(inj, na.rm = TRUE),
    tot = fat + inj,
    .groups = "drop"
  ) %>%
  arrange(desc(tot))

#top 10 health impacts
h10 <- h[1:10, ]

ggplot(h10, aes(x = reorder(e, tot), y = tot)) +
  geom_col() +
  coord_flip() +
  labs(
    x = "Event type",
    y = "Total fatalities + Injuries",
    caption = "figure 1: Top 10 event types by total fatalities and injuries (NOAA STORM DATABASE, 1950–2011)."
  )

#summarize economic impact (property + crop)
eco <- d3 %>%
  filter(!is.na(dmg)) %>%
  group_by(e) %>%
  summarise(
    dmg = sum(dmg, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(dmg))

#top 10 (in billions)
eco10 <- eco[1:10, ] %>%
  mutate(bil = dmg / 1e9)

eco10
## # A tibble: 10 × 3
##    e                          dmg    bil
##    <chr>                    <dbl>  <dbl>
##  1 FLOOD             138007444500 138.  
##  2 HURRICANE/TYPHOON  29348167800  29.3 
##  3 TORNADO            16520148150  16.5 
##  4 HURRICANE          12405268000  12.4 
##  5 RIVER FLOOD        10108369000  10.1 
##  6 HAIL               10020591590  10.0 
##  7 FLASH FLOOD         8715295130   8.72
##  8 ICE STORM           5925147300   5.93
##  9 STORM SURGE/TIDE    4641493000   4.64
## 10 THUNDERSTORM WIND   3813647990   3.81
#top 10 economic impacts
ggplot(eco10, aes(x = reorder(e, bil), y = bil)) +
  geom_col() +
  coord_flip() +
  labs(
    x = "Event type",
    y = "Total damage(usd, billions)",
    caption = "figure 2: Top 10 event types by combined property and crop damage (NOAA STORM DATABASE, 1950–2011)."
  )