Synopsis

This analysis explores the NOAA Storm Database to identify which weather events cause the most harm to population health and the greatest economic damage. We processed data from 1950 to 2011, focusing on fatalities and injuries for health impacts, and property/crop damage for economic impacts. Our findings show that tornadoes are the primary threat to health, while floods cause the most significant economic loss.

Data Processing

The analysis uses the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The data was loaded directly from the provided bzip2 compressed CSV file.

# Final robust check for the data file
file_name <- "stormData.csv.bz2"

if (!file.exists(file_name)) {
    # If file is missing, try one last time to download it
    url <- "[https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2](https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)"
    download.file(url, file_name, method = "libcurl")
}

# Read the data - R handles .bz2 files automatically
stormData <- read.csv(file_name)

# Create the 'subData' object safely
subData <- stormData[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]


# Function to convert character exponents to numeric multipliers
convExp <- function(e) {
    e <- toupper(as.character(e))
    if (e == "K") return(1000)
    if (e == "M") return(1000000)
    if (e == "B") return(1000000000)
    return(1)
}

# Apply the function to create the multipliers
subData$PropMult <- sapply(subData$PROPDMGEXP, convExp)
subData$CropMult <- sapply(subData$CROPDMGEXP, convExp)

# Calculate total damage
subData$TotalDamage <- (subData$PROPDMG * subData$PropMult) + (subData$CROPDMG * subData$CropMult)


library(ggplot2)

healthSum <- aggregate(FATALITIES + INJURIES ~ EVTYPE, data = subData, sum)
names(healthSum) <- c("EVTYPE", "TotalHealth")
healthTop10 <- healthSum[order(-healthSum$TotalHealth), ][1:10, ]

ggplot(healthTop10, aes(x = reorder(EVTYPE, -TotalHealth), y = TotalHealth)) +
    geom_bar(stat = "identity", fill = "firebrick") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Events Harmful to Population Health",
         x = "Event Type", y = "Total Fatalities and Injuries")

econSum <- aggregate(TotalDamage ~ EVTYPE, data = subData, sum)
econTop10 <- econSum[order(-econSum$TotalDamage), ][1:10, ]

ggplot(econTop10, aes(x = reorder(EVTYPE, -TotalDamage), y = TotalDamage)) +
    geom_bar(stat = "identity", fill = "darkblue") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(title = "Top 10 Events with Greatest Economic Consequences",
         x = "Event Type", y = "Total Economic Damage (USD)")