Synopsis

This analysis explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to identify the most harmful weather events in terms of public health and economic consequences. The data spans from 1950 to 2011 and contains records of injuries, fatalities, and property damage. We determine which event types (e.g., tornado, flood, etc.) cause the most deaths and injuries, and which cause the highest economic losses. The dataset is processed entirely within R, and key findings are visualized with bar plots. Results show that tornadoes are most dangerous to human life, while floods and hurricanes cause the highest financial damages.


Data Processing

library(dplyr)
library(ggplot2)
library(readr)

# Load the data
data <- read.csv("repdata_data_StormData.csv.bz2")

# Check the structure
str(data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Select needed columns
storm <- data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Convert PROPDMGEXP and CROPDMGEXP to multipliers
exp_map <- c(
  "K" = 1e3, "M" = 1e6, "B" = 1e9,
  "k" = 1e3, "m" = 1e6,
  "H" = 1e2, "h" = 1e2,
  "0" = 1, "1" = 10, "2" = 100, "3" = 1000, "4" = 10000,
  "5" = 1e5, "6" = 1e6, "7" = 1e7, "8" = 1e8, "9" = 1e9
)

storm$PROPDMGEXP <- toupper(as.character(storm$PROPDMGEXP))
storm$CROPDMGEXP <- toupper(as.character(storm$CROPDMGEXP))

storm$PROPDMG_MULTI <- exp_map[storm$PROPDMGEXP]
storm$CROPDMG_MULTI <- exp_map[storm$CROPDMGEXP]

storm$PROPDMG_MULTI[is.na(storm$PROPDMG_MULTI)] <- 1
storm$CROPDMG_MULTI[is.na(storm$CROPDMG_MULTI)] <- 1

# Calculate total damage
storm <- storm %>%
  mutate(
    property_damage = PROPDMG * PROPDMG_MULTI,
    crop_damage = CROPDMG * CROPDMG_MULTI,
    total_damage = property_damage + crop_damage
  )
health <- storm %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries = sum(INJURIES, na.rm = TRUE),
    total_harm = fatalities + injuries
  ) %>%
  arrange(desc(total_harm)) %>%
  slice(1:10)

# Plot
ggplot(health, aes(x = reorder(EVTYPE, -total_harm), y = total_harm)) +
  geom_bar(stat = "identity", fill = "tomato") +
  labs(title = "Top 10 Weather Events by Total Harm to Population Health",
       x = "Event Type", y = "Fatalities + Injuries") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

economic <- storm %>%
  group_by(EVTYPE) %>%
  summarise(total_cost = sum(total_damage, na.rm = TRUE)) %>%
  arrange(desc(total_cost)) %>%
  slice(1:10)
 
# Plot
ggplot(economic, aes(x = reorder(EVTYPE, -total_cost), y = total_cost / 1e9)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Top 10 Weather Events by Economic Damage",
       x = "Event Type", y = "Total Damage (Billion USD)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))