Synopsis

This analysis investigates the NOAA Storm Database to identify the types of weather events that are most harmful to population health and those with the greatest economic consequences. The analysis involves processing the raw data set, summarizing the impact on health and economy, and visualizing the results. Findings reveal the types of events that lead to the most fatalities and injuries, as well as the ones causing the most significant financial losses. This information is crucial for prioritizing resources and preparing for severe weather events.

Data Processing

First, the necessary libraries and the data set are loaded. The data set is read from a CSV file, and initial data cleaning and transformation is performed.

# Load required libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)

# Load the data
storm_data <- read.csv("repdata_data_StormData.csv.bz2", stringsAsFactors = FALSE)

# Convert EVTYPE to lowercase for consistency
storm_data$EVTYPE <- tolower(storm_data$EVTYPE)

# Summarize the data
summary_data <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE),
    total_property_damage = sum(PROPDMG * ifelse(PROPDMGEXP %in% c("K", "k"), 1e3, 
                                                   ifelse(PROPDMGEXP %in% c("M", "m"), 1e6, 
                                                          ifelse(PROPDMGEXP %in% c("B", "b"), 1e9, 1)))),
    total_crop_damage = sum(CROPDMG * ifelse(CROPDMGEXP %in% c("K", "k"), 1e3, 
                                              ifelse(CROPDMGEXP %in% c("M", "m"), 1e6, 
                                                     ifelse(CROPDMGEXP %in% c("B", "b"), 1e9, 1)))))

Results

The following figures show the top 10 severe weather events by total fatalities, injuries, property damage, and crop damage.

# Top 10 harmful events by total fatalities and injuries
top_fatalities <- summary_data %>%
  arrange(desc(total_fatalities)) %>%
  top_n(10, total_fatalities)

top_injuries <- summary_data %>%
  arrange(desc(total_injuries)) %>%
  top_n(10, total_injuries)

# Top 10 events by property damage and crop damage
top_property_damage <- summary_data %>%
  arrange(desc(total_property_damage)) %>%
  top_n(10, total_property_damage)

top_crop_damage <- summary_data %>%
  arrange(desc(total_crop_damage)) %>%
  top_n(10, total_crop_damage)

# Combined plot for health impact
ggplot() +
  geom_bar(data = top_fatalities, aes(x = reorder(EVTYPE, total_fatalities), y = total_fatalities, fill = "Fatalities"), stat = "identity") +
  geom_bar(data = top_injuries, aes(x = reorder(EVTYPE, total_injuries), y = total_injuries, fill = "Injuries"), stat = "identity", alpha = 0.6) +
  coord_flip() +
  labs(title = "Top 10 Weather Events by Fatalities and Injuries",
       x = "Event Type",
       y = "Count",
       fill = "Impact") +
  theme_minimal()

# Combined plot for economic impact
ggplot() +
  geom_bar(data = top_property_damage, aes(x = reorder(EVTYPE, total_property_damage), y = total_property_damage, fill = "Property Damage"), stat = "identity") +
  geom_bar(data = top_crop_damage, aes(x = reorder(EVTYPE, total_crop_damage), y = total_crop_damage, fill = "Crop Damage"), stat = "identity", alpha = 0.6) +
  coord_flip() +
  labs(title = "Top 10 Weather Events by Property and Crop Damage",
       x = "Event Type",
       y = "Damage (in dollars)",
       fill = "Damage Type") +
  theme_minimal()