Synopsis

This analysis explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database covering weather events from 1950 to November 2011. The goal is to identify which types of severe weather events are most harmful to population health and which have the greatest economic consequences. We examine fatalities, injuries, property damage, and crop damage across all recorded event types. Tornadoes are found to be the most harmful to population health, causing the highest number of both fatalities and injuries. Floods cause the greatest overall economic damage when property and crop damage are combined. These findings can help government and municipal managers prioritize disaster preparedness resources.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
# Load data directly from the bz2 compressed CSV file
storm <- read.csv("repdata-data-StormData.csv.bz2", stringsAsFactors = FALSE)
dim(storm)
## [1] 902297     37
head(storm[, c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")])
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0
# Aggregate fatalities and injuries by event type
health <- storm %>%
  group_by(EVTYPE) %>%
  summarise(
    FATALITIES = sum(FATALITIES, na.rm = TRUE),
    INJURIES   = sum(INJURIES,   na.rm = TRUE),
    TOTAL      = FATALITIES + INJURIES
  ) %>%
  arrange(desc(TOTAL))

# Keep top 10 most harmful events
top_health <- head(health, 10)
top_health
## # A tibble: 10 × 4
##    EVTYPE            FATALITIES INJURIES TOTAL
##    <chr>                  <dbl>    <dbl> <dbl>
##  1 TORNADO                 5633    91346 96979
##  2 EXCESSIVE HEAT          1903     6525  8428
##  3 TSTM WIND                504     6957  7461
##  4 FLOOD                    470     6789  7259
##  5 LIGHTNING                816     5230  6046
##  6 HEAT                     937     2100  3037
##  7 FLASH FLOOD              978     1777  2755
##  8 ICE STORM                 89     1975  2064
##  9 THUNDERSTORM WIND        133     1488  1621
## 10 WINTER STORM             206     1321  1527
# Convert PROPDMGEXP and CROPDMGEXP multiplier letters to numbers
exp_convert <- function(exp) {
  exp <- toupper(exp)
  case_when(
    exp == "K" ~ 1e3,
    exp == "M" ~ 1e6,
    exp == "B" ~ 1e9,
    exp == "H" ~ 1e2,
    TRUE        ~ 1
  )
}

storm <- storm %>%
  mutate(
    PROP_DAMAGE = PROPDMG * exp_convert(PROPDMGEXP),
    CROP_DAMAGE = CROPDMG * exp_convert(CROPDMGEXP),
    TOTAL_DAMAGE = PROP_DAMAGE + CROP_DAMAGE
  )

# Aggregate economic damage by event type
economic <- storm %>%
  group_by(EVTYPE) %>%
  summarise(TOTAL_DAMAGE = sum(TOTAL_DAMAGE, na.rm = TRUE)) %>%
  arrange(desc(TOTAL_DAMAGE))

# Keep top 10
top_economic <- head(economic, 10)
top_economic
## # A tibble: 10 × 2
##    EVTYPE             TOTAL_DAMAGE
##    <chr>                     <dbl>
##  1 FLOOD             150319678257 
##  2 HURRICANE/TYPHOON  71913712800 
##  3 TORNADO            57352114049.
##  4 STORM SURGE        43323541000 
##  5 HAIL               18758222016.
##  6 FLASH FLOOD        17562129167.
##  7 DROUGHT            15018672000 
##  8 HURRICANE          14610229010 
##  9 RIVER FLOOD        10148404500 
## 10 ICE STORM           8967041360

Results

Question 1: Which events are most harmful to population health?

# Reshape for grouped bar chart
library(tidyr)
top_health_long <- top_health %>%
  select(EVTYPE, FATALITIES, INJURIES) %>%
  pivot_longer(cols = c(FATALITIES, INJURIES),
               names_to = "Type", values_to = "Count")

ggplot(top_health_long, aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Figure 1: Top 10 Weather Events Most Harmful to Population Health",
    x = "Event Type",
    y = "Number of People Affected",
    fill = "Harm Type"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Figure 1 shows the top 10 weather event types causing the most fatalities and injuries across the United States. Tornadoes are by far the most harmful event type, causing over 5,000 fatalities and more than 90,000 injuries in the recorded period.

Question 2: Which events have the greatest economic consequences?

ggplot(top_economic, aes(x = reorder(EVTYPE, -TOTAL_DAMAGE), 
                          y = TOTAL_DAMAGE / 1e9)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Figure 2: Top 10 Weather Events with Greatest Economic Consequences",
    x = "Event Type",
    y = "Total Economic Damage (Billions USD)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Figure 2 shows the top 10 weather event types with the greatest combined property and crop damage. Floods have the greatest economic impact with over $150 billion in total damage, followed by hurricanes/typhoons and tornadoes.