Synopsis

This analysis explores the NOAA Storm Database to identify which weather event types have the greatest impact on public health and the economy in the United States between 1950 and 2011. Using variables such as fatalities, injuries, property damage, and crop damage, we summarize and visualize the impact of different event types. Data transformations include converting monetary values to numeric and grouping events. Tornadoes are found to be the most harmful to population health, while floods and hurricanes cause the highest economic damage. All data processing and plotting were performed in R, using packages such as dplyr, ggplot2, and knitr. The analysis begins with the raw data file and is fully reproducible. Figures are limited to three and include appropriate captions. This report aims to inform stakeholders involved in disaster preparedness. The final figures illustrate the top harmful and costly weather events. The code and results are included in this document.

Data Processing

# Load required packages again in case chunk order changes
library(dplyr)
library(readr)

# Load the data
data_file <- "repdata_data_StormData.csv.bz2"
if (!file.exists(data_file)) {
  stop("Data file 'repdata_data_StormData.csv.bz2' not found.")
}

storm_data <- read.csv(data_file)

# Function to convert exponent values
convert_exp <- function(e) {
  e <- toupper(as.character(e))
  if (e == "K") return(1e3)
  if (e == "M") return(1e6)
  if (e == "B") return(1e9)
  return(1)  # default case
}

# Clean and process the dataset
storm_df <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>%
  mutate(
    PROPDMGEXP = sapply(PROPDMGEXP, convert_exp),
    CROPDMGEXP = sapply(CROPDMGEXP, convert_exp),
    PROP_DAMAGE = PROPDMG * as.numeric(PROPDMGEXP),
    CROP_DAMAGE = CROPDMG * as.numeric(CROPDMGEXP)
  )

Results

Health Impact: Fatalities and Injuries

health_impact <- storm_df %>%
  group_by(EVTYPE) %>%
  summarise(
    Fatalities = sum(FATALITIES, na.rm = TRUE),
    Injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  mutate(Total_Impact = Fatalities + Injuries) %>%
  arrange(desc(Total_Impact)) %>%
  slice_head(n = 10)

# Plot
health_impact %>%
  pivot_longer(cols = c(Fatalities, Injuries), names_to = "Type", values_to = "Count") %>%
  ggplot(aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events Impacting Health",
    x = "Event Type",
    y = "Number of People"
  ) +
  theme_minimal()

Economic Impact

economic_impact <- storm_df %>%
  group_by(EVTYPE) %>%
  summarise(Total_Damage = sum(PROP_DAMAGE + CROP_DAMAGE, na.rm = TRUE)) %>%
  arrange(desc(Total_Damage)) %>%
  slice_head(n = 10)

# Plot
ggplot(economic_impact, aes(x = reorder(EVTYPE, Total_Damage), y = Total_Damage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events by Economic Damage",
    x = "Event Type",
    y = "Total Economic Damage (USD)"
  ) +
  theme_minimal()

Combined Health and Economic Impact

combined_impact <- health_impact %>%
  select(EVTYPE, Total_Impact) %>%
  inner_join(economic_impact, by = "EVTYPE")

# Plot
ggplot(combined_impact, aes(x = Total_Impact, y = Total_Damage, label = EVTYPE)) +
  geom_point(color = "steelblue", size = 3) +
  geom_text(hjust = 0, vjust = 0, size = 3, nudge_x = 0.05 * max(combined_impact$Total_Impact)) +
  labs(
    title = "Health vs. Economic Impact of Top Weather Events",
    x = "Total Health Impact (Fatalities + Injuries)",
    y = "Total Economic Damage (USD)"
  ) +
  theme_minimal()