Synopsis

This report explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to identify which types of weather events are most harmful to population health and which have the greatest economic consequences. The analysis covers data from 1950 to November 2011. We process the raw data, transform key variables, and visualize the top event types by fatalities, injuries, and property damage. The goal is to inform resource prioritization for emergency preparedness.

Data Processing

# Load raw data
storm_data <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), stringsAsFactors = FALSE)

# Select relevant columns
storm <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Convert damage exponents to numeric multipliers
exp_map <- c("K" = 1e3, "M" = 1e6, "B" = 1e9)
storm$PROPDMGEXP <- toupper(storm$PROPDMGEXP)
storm$CROPDMGEXP <- toupper(storm$CROPDMGEXP)

storm <- storm %>%
  mutate(
    prop_mult = exp_map[PROPDMGEXP],
    crop_mult = exp_map[CROPDMGEXP],
    prop_mult = ifelse(is.na(prop_mult), 0, prop_mult),
    crop_mult = ifelse(is.na(crop_mult), 0, crop_mult),
    total_damage = PROPDMG * prop_mult + CROPDMG * crop_mult
  )

Results

# Summarize fatalities and injuries by event type
health_impact <- storm %>%
  group_by(EVTYPE) %>%
  summarize(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  mutate(total_harm = total_fatalities + total_injuries) %>%
  arrange(desc(total_harm)) %>%
  slice(1:10)

# Plot
ggplot(health_impact, aes(x = reorder(EVTYPE, total_harm), y = total_harm)) +
  geom_bar(stat = "identity", fill = "firebrick") +
  coord_flip() +
  labs(title = "Top 10 Event Types by Total Harm to Population Health",
       x = "Event Type", y = "Fatalities + Injuries")

# Summarize total damage by event type
economic_impact <- storm %>%
  group_by(EVTYPE) %>%
  summarize(total_damage = sum(total_damage, na.rm = TRUE)) %>%
  arrange(desc(total_damage)) %>%
  slice(1:10)

# Plot
ggplot(economic_impact, aes(x = reorder(EVTYPE, total_damage), y = total_damage / 1e9)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Event Types by Economic Damage",
       x = "Event Type", y = "Total Damage (Billions USD)")