Synopsis

This report analyzes the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to identify weather events most harmful to public health and those with the greatest economic consequences. The data spans from 1950 to November 2011. We found that tornadoes cause the most fatalities and injuries combined, making them the most dangerous event for population health. For economic damage, floods cause the greatest total property and crop damage. These findings can help government officials prioritize disaster preparedness and resource allocation.

Data Processing

Loading the Data

# Load required libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Read the data directly from the compressed .bz2 file
storm_data <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), stringsAsFactors = FALSE)

# Check the dimensions
dim(storm_data)
## [1] 902297     37
# Preview first few rows
head(storm_data[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Processing for Health Impact

We select columns related to event type, fatalities, and injuries, then aggregate totals by event type.

# Select relevant columns
health_data <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES) %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
    Total_Injuries   = sum(INJURIES,   na.rm = TRUE),
    Total_Harm       = Total_Fatalities + Total_Injuries
  ) %>%
  arrange(desc(Total_Harm))

# Show top 10 most harmful events
top10_health <- head(health_data, 10)
print(top10_health)
## # A tibble: 10 × 4
##    EVTYPE            Total_Fatalities Total_Injuries Total_Harm
##    <chr>                        <dbl>          <dbl>      <dbl>
##  1 TORNADO                       5633          91346      96979
##  2 EXCESSIVE HEAT                1903           6525       8428
##  3 TSTM WIND                      504           6957       7461
##  4 FLOOD                          470           6789       7259
##  5 LIGHTNING                      816           5230       6046
##  6 HEAT                           937           2100       3037
##  7 FLASH FLOOD                    978           1777       2755
##  8 ICE STORM                       89           1975       2064
##  9 THUNDERSTORM WIND              133           1488       1621
## 10 WINTER STORM                   206           1321       1527

Processing for Economic Impact

The damage columns (PROPDMG, CROPDMG) use an exponent column (PROPDMGEXP, CROPDMGEXP) to encode magnitude (K = thousands, M = millions, B = billions). We convert these into actual dollar values.

# Function to convert exponent letters to multipliers
get_multiplier <- function(exp) {
  exp <- toupper(exp)
  case_when(
    exp == "K" ~ 1e3,
    exp == "M" ~ 1e6,
    exp == "B" ~ 1e9,
    exp == "H" ~ 1e2,
    exp %in% as.character(0:9) ~ 10^as.numeric(exp),
    TRUE ~ 1
  )
}

# Apply multiplier to get actual damage values
economic_data <- storm_data %>%
  select(EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>%
  mutate(
    Prop_Damage = PROPDMG * get_multiplier(PROPDMGEXP),
    Crop_Damage = CROPDMG * get_multiplier(CROPDMGEXP),
    Total_Damage = Prop_Damage + Crop_Damage
  ) %>%
  group_by(EVTYPE) %>%
  summarise(Total_Economic_Damage = sum(Total_Damage, na.rm = TRUE)) %>%
  arrange(desc(Total_Economic_Damage))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `Prop_Damage = PROPDMG * get_multiplier(PROPDMGEXP)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Show top 10 most costly events
top10_economic <- head(economic_data, 10)
print(top10_economic)
## # A tibble: 10 × 2
##    EVTYPE            Total_Economic_Damage
##    <chr>                             <dbl>
##  1 FLOOD                     150319678257 
##  2 HURRICANE/TYPHOON          71913712800 
##  3 TORNADO                    57362333946.
##  4 STORM SURGE                43323541000 
##  5 HAIL                       18761221986.
##  6 FLASH FLOOD                18243991078.
##  7 DROUGHT                    15018672000 
##  8 HURRICANE                  14610229010 
##  9 RIVER FLOOD                10148404500 
## 10 ICE STORM                   8967041360

Results

Question 1: Which events are most harmful to population health?

# Reshape for plotting
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.3
top10_health_long <- top10_health %>%
  select(EVTYPE, Total_Fatalities, Total_Injuries) %>%
  pivot_longer(cols = c(Total_Fatalities, Total_Injuries),
               names_to = "Type",
               values_to = "Count")

# Plot
ggplot(top10_health_long, aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
  geom_bar(stat = "identity", position = "stack") +
  coord_flip() +
  labs(
    title = "Figure 1: Top 10 Weather Events Most Harmful to Population Health",
    subtitle = "Based on total fatalities and injuries (1950–2011)",
    x = "Event Type",
    y = "Total Count (Fatalities + Injuries)",
    fill = "Harm Type",
    caption = "Source: NOAA Storm Database"
  ) +
  scale_fill_manual(values = c("Total_Fatalities" = "#d73027", "Total_Injuries" = "#fc8d59"),
                    labels = c("Fatalities", "Injuries")) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

Finding: Tornadoes are by far the most harmful weather event for public health, causing over 90,000 combined fatalities and injuries between 1950 and 2011. Excessive heat and thunderstorm winds are the next most dangerous events.

Question 2: Which events have the greatest economic consequences?

# Convert to billions for readability
top10_economic <- top10_economic %>%
  mutate(Damage_Billions = Total_Economic_Damage / 1e9)

ggplot(top10_economic, aes(x = reorder(EVTYPE, Damage_Billions), y = Damage_Billions)) +
  geom_bar(stat = "identity", fill = "#2166ac") +
  coord_flip() +
  labs(
    title = "Figure 2: Top 10 Weather Events with Greatest Economic Consequences",
    subtitle = "Combined property and crop damage in billions USD (1950–2011)",
    x = "Event Type",
    y = "Total Damage (Billions USD)",
    caption = "Source: NOAA Storm Database"
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold"))

Finding: Floods cause the greatest economic damage overall (~$150 billion), followed by hurricanes/typhoons and tornadoes. Floods affect property and crops extensively due to their widespread geographic reach.