This report explores the NOAA Storm Database to identify the types of severe weather events in the U.S. that are most harmful to population health and have the greatest economic consequences. The dataset includes events recorded from 1950 through November 2011, with variables on fatalities, injuries, and damage estimates.
We use R to process the raw data, clean the event types, and calculate total health and economic impacts for each weather event type. The results indicate that tornadoes are the most harmful to human health, while floods and hurricanes cause the most significant financial losses.
All data processing and analysis are conducted in this document using reproducible code and visualized through clear plots to support resource planning and policy decisions.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(stringr)
# Load dataset from compressed CSV
storm_data <- read.csv("repdata_data_StormData.csv.bz2", stringsAsFactors = FALSE)
# Select important columns
storm_df <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Helper function to convert damage exponents
convert_exp <- function(e) {
if (e %in% c('h', 'H')) return(1e2)
else if (e %in% c('k', 'K')) return(1e3)
else if (e %in% c('m', 'M')) return(1e6)
else if (e %in% c('b', 'B')) return(1e9)
else if (e %in% c('', '0')) return(1)
else return(0)
}
# Apply exponent conversion
storm_df$PROPDMGEXP <- sapply(storm_df$PROPDMGEXP, convert_exp)
storm_df$CROPDMGEXP <- sapply(storm_df$CROPDMGEXP, convert_exp)
# Calculate total damages
storm_df <- storm_df %>%
mutate(
PROPDMG_TOTAL = PROPDMG * PROPDMGEXP,
CROPDMG_TOTAL = CROPDMG * CROPDMGEXP
)
health_impact <- storm_df %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE)
) %>%
mutate(total_health = fatalities + injuries) %>%
arrange(desc(total_health)) %>%
head(10)
# Plot
ggplot(health_impact, aes(x = reorder(EVTYPE, -total_health), y = total_health)) +
geom_col(fill = "darkred") +
labs(title = "Top 10 Weather Events Affecting Population Health",
x = "Event Type", y = "Total Fatalities and Injuries") +
theme_minimal() +
coord_flip()
economic_impact <- storm_df %>%
group_by(EVTYPE) %>%
summarise(
total_damage = sum(PROPDMG_TOTAL + CROPDMG_TOTAL, na.rm = TRUE)
) %>%
arrange(desc(total_damage)) %>%
head(10)
# Plot
ggplot(economic_impact, aes(x = reorder(EVTYPE, -total_damage), y = total_damage / 1e9)) +
geom_col(fill = "steelblue") +
labs(title = "Top 10 Weather Events by Economic Damage",
x = "Event Type", y = "Damage (Billion USD)") +
theme_minimal() +
coord_flip()