Synopsis

This report analyzes the NOAA Storm Database to identify which weather event types are most harmful to population health and which have the greatest economic impact across the United States. The analysis focuses on fatalities and injuries to represent population health impacts, and on combined property and crop damage to represent economic consequences. The raw compressed dataset is downloaded (if needed), read directly from the .csv.bz2 file, and processed entirely within this document for reproducibility. Several transformations are required: event names are standardized (uppercased and trimmed) to reduce inconsistencies, and damage amounts are converted using official exponent codes to numeric dollar values. Impacts are then aggregated by event type, and the highest-impact event categories are ranked. Results are presented using two figures. These findings can help municipal or emergency management stakeholders understand which event types historically caused the greatest harm and losses. The conclusions are based on the available database records and reflect that reporting becomes more complete in later years.

Data Processing

library(dplyr)
library(ggplot2)
library(readr)
library(stringr)
library(tidyr)
library(scales)

storm_raw <- read_csv("repdata_data_StormData.csv")


storm <- storm_raw %>%
  transmute(
    EVTYPE = str_squish(str_to_upper(EVTYPE)),
    FATALITIES = as.numeric(FATALITIES),
    INJURIES = as.numeric(INJURIES),
    PROPDMG = as.numeric(PROPDMG),
    PROPDMGEXP = str_to_upper(PROPDMGEXP),
    CROPDMG = as.numeric(CROPDMG),
    CROPDMGEXP = str_to_upper(CROPDMGEXP)
  )

summary(storm$EVTYPE)[1:10]
##    Length     Class      Mode      <NA>      <NA>      <NA>      <NA>      <NA> 
##    902297 character character                                                   
##      <NA>      <NA> 
## 
exp_multiplier <- function(exp_code) {
  dplyr::case_when(
    exp_code == "H" ~ 1e2,
    exp_code == "K" ~ 1e3,
    exp_code == "M" ~ 1e6,
    exp_code == "B" ~ 1e9,
    exp_code %in% c("", NA) ~ 1,
    exp_code %in% as.character(0:9) ~ 10 ^ as.numeric(exp_code),
    TRUE ~ 1
  )
}

storm <- storm %>%
  mutate(
    PROP_MULT = exp_multiplier(PROPDMGEXP),
    CROP_MULT = exp_multiplier(CROPDMGEXP),
    PROP_DAMAGE = PROPDMG * PROP_MULT,
    CROP_DAMAGE = CROPDMG * CROP_MULT,
    ECON_DAMAGE = PROP_DAMAGE + CROP_DAMAGE,
    HEALTH_IMPACT = FATALITIES + INJURIES
  )

summary(storm$ECON_DAMAGE)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11
health_by_event <- storm %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries = sum(INJURIES, na.rm = TRUE),
    health_total = sum(HEALTH_IMPACT, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(health_total))

econ_by_event <- storm %>%
  group_by(EVTYPE) %>%
  summarise(
    prop_damage = sum(PROP_DAMAGE, na.rm = TRUE),
    crop_damage = sum(CROP_DAMAGE, na.rm = TRUE),
    econ_total = sum(ECON_DAMAGE, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(econ_total))

head(health_by_event, 10)
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries health_total
##    <chr>                  <dbl>    <dbl>        <dbl>
##  1 TORNADO                 5633    91346        96979
##  2 EXCESSIVE HEAT          1903     6525         8428
##  3 TSTM WIND                504     6957         7461
##  4 FLOOD                    470     6789         7259
##  5 LIGHTNING                816     5230         6046
##  6 HEAT                     937     2100         3037
##  7 FLASH FLOOD              978     1777         2755
##  8 ICE STORM                 89     1975         2064
##  9 THUNDERSTORM WIND        133     1488         1621
## 10 WINTER STORM             206     1321         1527
head(econ_by_event, 10)
## # A tibble: 10 × 4
##    EVTYPE              prop_damage crop_damage    econ_total
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             144657709807   5661968450 150319678257 
##  2 HURRICANE/TYPHOON  69305840000   2607872800  71913712800 
##  3 TORNADO            56947380676.   414953270  57362333946.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 HAIL               15735267513.  3025954473  18761221986.
##  6 FLASH FLOOD        16822723978.  1421317100  18244041078.
##  7 DROUGHT             1046106000  13972566000  15018672000 
##  8 HURRICANE          11868319010   2741910000  14610229010 
##  9 RIVER FLOOD         5118945500   5029459000  10148404500 
## 10 ICE STORM           3944927860   5022113500   8967041360

Results

1) Which event types are most harmful to population health?

Event types are ranked by total harm to population health defined as fatalities + injuries.

health_top10 <- health_by_event %>% slice_head(n = 10)
health_top10
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries health_total
##    <chr>                  <dbl>    <dbl>        <dbl>
##  1 TORNADO                 5633    91346        96979
##  2 EXCESSIVE HEAT          1903     6525         8428
##  3 TSTM WIND                504     6957         7461
##  4 FLOOD                    470     6789         7259
##  5 LIGHTNING                816     5230         6046
##  6 HEAT                     937     2100         3037
##  7 FLASH FLOOD              978     1777         2755
##  8 ICE STORM                 89     1975         2064
##  9 THUNDERSTORM WIND        133     1488         1621
## 10 WINTER STORM             206     1321         1527
health_long <- health_top10 %>%
  select(EVTYPE, fatalities, injuries) %>%
  tidyr::pivot_longer(cols = c(fatalities, injuries),
                      names_to = "metric", values_to = "count")

ggplot(health_long, aes(x = reorder(EVTYPE, count), y = count, fill = metric)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Top 10 Event Types by Population Health Impact",
    subtitle = "Counts of fatalities and injuries aggregated across the U.S. (1950–Nov 2011)",
    x = "Event type (standardized)",
    y = "Number of people affected",
    caption = "Figure 1. Bars show injuries and fatalities by the 10 event types with the highest combined impact."
  )

TORNADOS represent the largest combined burden of fatalities and injuries in the database period.

2) Which types of events have the greatest economic consequences?

Event types are ranked by total economic impact defined as property damage + crop damage (USD).

econ_top10 <- econ_by_event %>% slice_head(n = 10)
econ_top10
## # A tibble: 10 × 4
##    EVTYPE              prop_damage crop_damage    econ_total
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             144657709807   5661968450 150319678257 
##  2 HURRICANE/TYPHOON  69305840000   2607872800  71913712800 
##  3 TORNADO            56947380676.   414953270  57362333946.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 HAIL               15735267513.  3025954473  18761221986.
##  6 FLASH FLOOD        16822723978.  1421317100  18244041078.
##  7 DROUGHT             1046106000  13972566000  15018672000 
##  8 HURRICANE          11868319010   2741910000  14610229010 
##  9 RIVER FLOOD         5118945500   5029459000  10148404500 
## 10 ICE STORM           3944927860   5022113500   8967041360
ggplot(econ_top10, aes(x = reorder(EVTYPE, econ_total), y = econ_total)) +
  geom_col() +
  coord_flip() +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(
    title = "Top 10 Event Types by Total Economic Damage",
    subtitle = "Property + crop damage aggregated across the U.S. (1950–Nov 2011)",
    x = "Event type (standardized)",
    y = "Total damage (USD)",
    caption = "Figure 2. Bars show combined crop and property damages for the 10 event types with the highest total losses."
  )

FLOODS account for the largest reported losses in property and agriculture, and would be key categories for resource prioritization.