Synopsis

This analysis explores the NOAA Storm Database to answer two key questions: which types of events are most harmful to population health and which have the greatest economic consequences. By aggregating data from 1950 to 2011, we found that Tornadoes cause the highest number of combined fatalities and injuries. In terms of economic impact, Floods cause the most significant property and crop damage when adjusted for exponent multipliers.

Data Processing

The following code loads the raw data directly from the compressed bzip2 file and processes the variables for analysis. We filter for the event type, health indicators, and economic damage variables.

library(ggplot2)
library(dplyr)

# 1. Load the data (Ensure the file is in your working directory)
if(!exists("stormData")) {
    stormData <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))
}

# 2. Extract necessary columns
storm_sub <- stormData %>%
    select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# 3. Clean the Economic Exponents
# This function converts characters like 'K', 'M', 'B' into numeric multipliers
convert_exponent <- function(exp) {
    exp <- toupper(as.character(exp))
    if (exp == "H") return(100)
    if (exp == "K") return(1000)
    if (exp == "M") return(1e+06)
    if (exp == "B") return(1e+09)
    if (exp %in% c("", "-", "?", "+")) return(1)
    if (!is.na(as.numeric(exp))) return(10^as.numeric(exp))
    return(1)
}

# Apply the conversion to create a Total Damage column
storm_sub$prop_mult <- sapply(storm_sub$PROPDMGEXP, convert_exponent)
storm_sub$crop_mult <- sapply(storm_sub$CROPDMGEXP, convert_exponent)

storm_sub <- storm_sub %>%
    mutate(TOTAL_ECON_LOSS = (PROPDMG * prop_mult) + (CROPDMG * crop_mult),
           TOTAL_HEALTH_IMPACT = FATALITIES + INJURIES)

econ_summary <- storm_sub %>%
    group_by(EVTYPE) %>%
    summarise(Total_Econ = sum(TOTAL_ECON_LOSS)) %>%
    arrange(desc(Total_Econ)) %>%
    slice(1:10)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(econ_summary, aes(x = reorder(EVTYPE, -Total_Econ), y = Total_Econ/1e+09)) +
    geom_bar(stat = "identity", fill = "darkblue") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Weather Events with Greatest Economic Impact",
         subtitle = "Total Damage in Billions of USD",
         x = "Event Type",
         y = "Damage (Billions $)")

health_summary <- storm_sub %>%
    group_by(EVTYPE) %>%
    summarise(Total_Health = sum(TOTAL_HEALTH_IMPACT)) %>%
    arrange(desc(Total_Health)) %>%
    slice(1:10)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(health_summary, aes(x = reorder(EVTYPE, -Total_Health), y = Total_Health)) +
    geom_bar(stat = "identity", fill = "darkred") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Most Harmful Weather Events to Population Health",
         subtitle = "Combined Fatalities and Injuries (1950-2011)",
         x = "Event Type",
         y = "Total Fatalities & Injuries")